xref: /openbmc/linux/kernel/bpf/verifier.c (revision b0bc615df488abd0e95107e4a9ecefb9bf8c250a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 
29 #include "disasm.h"
30 
31 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
32 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
33 	[_id] = & _name ## _verifier_ops,
34 #define BPF_MAP_TYPE(_id, _ops)
35 #define BPF_LINK_TYPE(_id, _name)
36 #include <linux/bpf_types.h>
37 #undef BPF_PROG_TYPE
38 #undef BPF_MAP_TYPE
39 #undef BPF_LINK_TYPE
40 };
41 
42 /* bpf_check() is a static code analyzer that walks eBPF program
43  * instruction by instruction and updates register/stack state.
44  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
45  *
46  * The first pass is depth-first-search to check that the program is a DAG.
47  * It rejects the following programs:
48  * - larger than BPF_MAXINSNS insns
49  * - if loop is present (detected via back-edge)
50  * - unreachable insns exist (shouldn't be a forest. program = one function)
51  * - out of bounds or malformed jumps
52  * The second pass is all possible path descent from the 1st insn.
53  * Since it's analyzing all paths through the program, the length of the
54  * analysis is limited to 64k insn, which may be hit even if total number of
55  * insn is less then 4K, but there are too many branches that change stack/regs.
56  * Number of 'branches to be analyzed' is limited to 1k
57  *
58  * On entry to each instruction, each register has a type, and the instruction
59  * changes the types of the registers depending on instruction semantics.
60  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
61  * copied to R1.
62  *
63  * All registers are 64-bit.
64  * R0 - return register
65  * R1-R5 argument passing registers
66  * R6-R9 callee saved registers
67  * R10 - frame pointer read-only
68  *
69  * At the start of BPF program the register R1 contains a pointer to bpf_context
70  * and has type PTR_TO_CTX.
71  *
72  * Verifier tracks arithmetic operations on pointers in case:
73  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
74  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
75  * 1st insn copies R10 (which has FRAME_PTR) type into R1
76  * and 2nd arithmetic instruction is pattern matched to recognize
77  * that it wants to construct a pointer to some element within stack.
78  * So after 2nd insn, the register R1 has type PTR_TO_STACK
79  * (and -20 constant is saved for further stack bounds checking).
80  * Meaning that this reg is a pointer to stack plus known immediate constant.
81  *
82  * Most of the time the registers have SCALAR_VALUE type, which
83  * means the register has some value, but it's not a valid pointer.
84  * (like pointer plus pointer becomes SCALAR_VALUE type)
85  *
86  * When verifier sees load or store instructions the type of base register
87  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
88  * four pointer types recognized by check_mem_access() function.
89  *
90  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
91  * and the range of [ptr, ptr + map's value_size) is accessible.
92  *
93  * registers used to pass values to function calls are checked against
94  * function argument constraints.
95  *
96  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
97  * It means that the register type passed to this function must be
98  * PTR_TO_STACK and it will be used inside the function as
99  * 'pointer to map element key'
100  *
101  * For example the argument constraints for bpf_map_lookup_elem():
102  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
103  *   .arg1_type = ARG_CONST_MAP_PTR,
104  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
105  *
106  * ret_type says that this function returns 'pointer to map elem value or null'
107  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
108  * 2nd argument should be a pointer to stack, which will be used inside
109  * the helper function as a pointer to map element key.
110  *
111  * On the kernel side the helper function looks like:
112  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
113  * {
114  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
115  *    void *key = (void *) (unsigned long) r2;
116  *    void *value;
117  *
118  *    here kernel can access 'key' and 'map' pointers safely, knowing that
119  *    [key, key + map->key_size) bytes are valid and were initialized on
120  *    the stack of eBPF program.
121  * }
122  *
123  * Corresponding eBPF program may look like:
124  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
125  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
126  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
127  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
128  * here verifier looks at prototype of map_lookup_elem() and sees:
129  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
130  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
131  *
132  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
133  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
134  * and were initialized prior to this call.
135  * If it's ok, then verifier allows this BPF_CALL insn and looks at
136  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
137  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
138  * returns either pointer to map value or NULL.
139  *
140  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
141  * insn, the register holding that pointer in the true branch changes state to
142  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
143  * branch. See check_cond_jmp_op().
144  *
145  * After the call R0 is set to return type of the function and registers R1-R5
146  * are set to NOT_INIT to indicate that they are no longer readable.
147  *
148  * The following reference types represent a potential reference to a kernel
149  * resource which, after first being allocated, must be checked and freed by
150  * the BPF program:
151  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
152  *
153  * When the verifier sees a helper call return a reference type, it allocates a
154  * pointer id for the reference and stores it in the current function state.
155  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
156  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
157  * passes through a NULL-check conditional. For the branch wherein the state is
158  * changed to CONST_IMM, the verifier releases the reference.
159  *
160  * For each helper function that allocates a reference, such as
161  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
162  * bpf_sk_release(). When a reference type passes into the release function,
163  * the verifier also releases the reference. If any unchecked or unreleased
164  * reference remains at the end of the program, the verifier rejects it.
165  */
166 
167 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
168 struct bpf_verifier_stack_elem {
169 	/* verifer state is 'st'
170 	 * before processing instruction 'insn_idx'
171 	 * and after processing instruction 'prev_insn_idx'
172 	 */
173 	struct bpf_verifier_state st;
174 	int insn_idx;
175 	int prev_insn_idx;
176 	struct bpf_verifier_stack_elem *next;
177 	/* length of verifier log at the time this state was pushed on stack */
178 	u32 log_pos;
179 };
180 
181 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
182 #define BPF_COMPLEXITY_LIMIT_STATES	64
183 
184 #define BPF_MAP_KEY_POISON	(1ULL << 63)
185 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
186 
187 #define BPF_MAP_PTR_UNPRIV	1UL
188 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
189 					  POISON_POINTER_DELTA))
190 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
191 
192 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
193 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
194 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
195 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
196 static int ref_set_non_owning(struct bpf_verifier_env *env,
197 			      struct bpf_reg_state *reg);
198 
199 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
200 {
201 	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
202 }
203 
204 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
205 {
206 	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
207 }
208 
209 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
210 			      const struct bpf_map *map, bool unpriv)
211 {
212 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
213 	unpriv |= bpf_map_ptr_unpriv(aux);
214 	aux->map_ptr_state = (unsigned long)map |
215 			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
216 }
217 
218 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
219 {
220 	return aux->map_key_state & BPF_MAP_KEY_POISON;
221 }
222 
223 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
224 {
225 	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
226 }
227 
228 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
229 {
230 	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
231 }
232 
233 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
234 {
235 	bool poisoned = bpf_map_key_poisoned(aux);
236 
237 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
238 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
239 }
240 
241 static bool bpf_pseudo_call(const struct bpf_insn *insn)
242 {
243 	return insn->code == (BPF_JMP | BPF_CALL) &&
244 	       insn->src_reg == BPF_PSEUDO_CALL;
245 }
246 
247 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
248 {
249 	return insn->code == (BPF_JMP | BPF_CALL) &&
250 	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
251 }
252 
253 struct bpf_call_arg_meta {
254 	struct bpf_map *map_ptr;
255 	bool raw_mode;
256 	bool pkt_access;
257 	u8 release_regno;
258 	int regno;
259 	int access_size;
260 	int mem_size;
261 	u64 msize_max_value;
262 	int ref_obj_id;
263 	int dynptr_id;
264 	int map_uid;
265 	int func_id;
266 	struct btf *btf;
267 	u32 btf_id;
268 	struct btf *ret_btf;
269 	u32 ret_btf_id;
270 	u32 subprogno;
271 	struct btf_field *kptr_field;
272 };
273 
274 struct bpf_kfunc_call_arg_meta {
275 	/* In parameters */
276 	struct btf *btf;
277 	u32 func_id;
278 	u32 kfunc_flags;
279 	const struct btf_type *func_proto;
280 	const char *func_name;
281 	/* Out parameters */
282 	u32 ref_obj_id;
283 	u8 release_regno;
284 	bool r0_rdonly;
285 	u32 ret_btf_id;
286 	u64 r0_size;
287 	u32 subprogno;
288 	struct {
289 		u64 value;
290 		bool found;
291 	} arg_constant;
292 	struct {
293 		struct btf *btf;
294 		u32 btf_id;
295 	} arg_obj_drop;
296 	struct {
297 		struct btf_field *field;
298 	} arg_list_head;
299 	struct {
300 		struct btf_field *field;
301 	} arg_rbtree_root;
302 	struct {
303 		enum bpf_dynptr_type type;
304 		u32 id;
305 	} initialized_dynptr;
306 	struct {
307 		u8 spi;
308 		u8 frameno;
309 	} iter;
310 	u64 mem_size;
311 };
312 
313 struct btf *btf_vmlinux;
314 
315 static DEFINE_MUTEX(bpf_verifier_lock);
316 
317 static const struct bpf_line_info *
318 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
319 {
320 	const struct bpf_line_info *linfo;
321 	const struct bpf_prog *prog;
322 	u32 i, nr_linfo;
323 
324 	prog = env->prog;
325 	nr_linfo = prog->aux->nr_linfo;
326 
327 	if (!nr_linfo || insn_off >= prog->len)
328 		return NULL;
329 
330 	linfo = prog->aux->linfo;
331 	for (i = 1; i < nr_linfo; i++)
332 		if (insn_off < linfo[i].insn_off)
333 			break;
334 
335 	return &linfo[i - 1];
336 }
337 
338 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
339 {
340 	struct bpf_verifier_env *env = private_data;
341 	va_list args;
342 
343 	if (!bpf_verifier_log_needed(&env->log))
344 		return;
345 
346 	va_start(args, fmt);
347 	bpf_verifier_vlog(&env->log, fmt, args);
348 	va_end(args);
349 }
350 
351 static const char *ltrim(const char *s)
352 {
353 	while (isspace(*s))
354 		s++;
355 
356 	return s;
357 }
358 
359 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
360 					 u32 insn_off,
361 					 const char *prefix_fmt, ...)
362 {
363 	const struct bpf_line_info *linfo;
364 
365 	if (!bpf_verifier_log_needed(&env->log))
366 		return;
367 
368 	linfo = find_linfo(env, insn_off);
369 	if (!linfo || linfo == env->prev_linfo)
370 		return;
371 
372 	if (prefix_fmt) {
373 		va_list args;
374 
375 		va_start(args, prefix_fmt);
376 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
377 		va_end(args);
378 	}
379 
380 	verbose(env, "%s\n",
381 		ltrim(btf_name_by_offset(env->prog->aux->btf,
382 					 linfo->line_off)));
383 
384 	env->prev_linfo = linfo;
385 }
386 
387 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
388 				   struct bpf_reg_state *reg,
389 				   struct tnum *range, const char *ctx,
390 				   const char *reg_name)
391 {
392 	char tn_buf[48];
393 
394 	verbose(env, "At %s the register %s ", ctx, reg_name);
395 	if (!tnum_is_unknown(reg->var_off)) {
396 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
397 		verbose(env, "has value %s", tn_buf);
398 	} else {
399 		verbose(env, "has unknown scalar value");
400 	}
401 	tnum_strn(tn_buf, sizeof(tn_buf), *range);
402 	verbose(env, " should have been in %s\n", tn_buf);
403 }
404 
405 static bool type_is_pkt_pointer(enum bpf_reg_type type)
406 {
407 	type = base_type(type);
408 	return type == PTR_TO_PACKET ||
409 	       type == PTR_TO_PACKET_META;
410 }
411 
412 static bool type_is_sk_pointer(enum bpf_reg_type type)
413 {
414 	return type == PTR_TO_SOCKET ||
415 		type == PTR_TO_SOCK_COMMON ||
416 		type == PTR_TO_TCP_SOCK ||
417 		type == PTR_TO_XDP_SOCK;
418 }
419 
420 static bool type_may_be_null(u32 type)
421 {
422 	return type & PTR_MAYBE_NULL;
423 }
424 
425 static bool reg_type_not_null(enum bpf_reg_type type)
426 {
427 	if (type_may_be_null(type))
428 		return false;
429 
430 	type = base_type(type);
431 	return type == PTR_TO_SOCKET ||
432 		type == PTR_TO_TCP_SOCK ||
433 		type == PTR_TO_MAP_VALUE ||
434 		type == PTR_TO_MAP_KEY ||
435 		type == PTR_TO_SOCK_COMMON ||
436 		type == PTR_TO_MEM;
437 }
438 
439 static bool type_is_ptr_alloc_obj(u32 type)
440 {
441 	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
442 }
443 
444 static bool type_is_non_owning_ref(u32 type)
445 {
446 	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
447 }
448 
449 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
450 {
451 	struct btf_record *rec = NULL;
452 	struct btf_struct_meta *meta;
453 
454 	if (reg->type == PTR_TO_MAP_VALUE) {
455 		rec = reg->map_ptr->record;
456 	} else if (type_is_ptr_alloc_obj(reg->type)) {
457 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
458 		if (meta)
459 			rec = meta->record;
460 	}
461 	return rec;
462 }
463 
464 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
465 {
466 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
467 }
468 
469 static bool type_is_rdonly_mem(u32 type)
470 {
471 	return type & MEM_RDONLY;
472 }
473 
474 static bool is_acquire_function(enum bpf_func_id func_id,
475 				const struct bpf_map *map)
476 {
477 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
478 
479 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
480 	    func_id == BPF_FUNC_sk_lookup_udp ||
481 	    func_id == BPF_FUNC_skc_lookup_tcp ||
482 	    func_id == BPF_FUNC_ringbuf_reserve ||
483 	    func_id == BPF_FUNC_kptr_xchg)
484 		return true;
485 
486 	if (func_id == BPF_FUNC_map_lookup_elem &&
487 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
488 	     map_type == BPF_MAP_TYPE_SOCKHASH))
489 		return true;
490 
491 	return false;
492 }
493 
494 static bool is_ptr_cast_function(enum bpf_func_id func_id)
495 {
496 	return func_id == BPF_FUNC_tcp_sock ||
497 		func_id == BPF_FUNC_sk_fullsock ||
498 		func_id == BPF_FUNC_skc_to_tcp_sock ||
499 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
500 		func_id == BPF_FUNC_skc_to_udp6_sock ||
501 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
502 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
503 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
504 }
505 
506 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
507 {
508 	return func_id == BPF_FUNC_dynptr_data;
509 }
510 
511 static bool is_callback_calling_function(enum bpf_func_id func_id)
512 {
513 	return func_id == BPF_FUNC_for_each_map_elem ||
514 	       func_id == BPF_FUNC_timer_set_callback ||
515 	       func_id == BPF_FUNC_find_vma ||
516 	       func_id == BPF_FUNC_loop ||
517 	       func_id == BPF_FUNC_user_ringbuf_drain;
518 }
519 
520 static bool is_storage_get_function(enum bpf_func_id func_id)
521 {
522 	return func_id == BPF_FUNC_sk_storage_get ||
523 	       func_id == BPF_FUNC_inode_storage_get ||
524 	       func_id == BPF_FUNC_task_storage_get ||
525 	       func_id == BPF_FUNC_cgrp_storage_get;
526 }
527 
528 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
529 					const struct bpf_map *map)
530 {
531 	int ref_obj_uses = 0;
532 
533 	if (is_ptr_cast_function(func_id))
534 		ref_obj_uses++;
535 	if (is_acquire_function(func_id, map))
536 		ref_obj_uses++;
537 	if (is_dynptr_ref_function(func_id))
538 		ref_obj_uses++;
539 
540 	return ref_obj_uses > 1;
541 }
542 
543 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
544 {
545 	return BPF_CLASS(insn->code) == BPF_STX &&
546 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
547 	       insn->imm == BPF_CMPXCHG;
548 }
549 
550 /* string representation of 'enum bpf_reg_type'
551  *
552  * Note that reg_type_str() can not appear more than once in a single verbose()
553  * statement.
554  */
555 static const char *reg_type_str(struct bpf_verifier_env *env,
556 				enum bpf_reg_type type)
557 {
558 	char postfix[16] = {0}, prefix[64] = {0};
559 	static const char * const str[] = {
560 		[NOT_INIT]		= "?",
561 		[SCALAR_VALUE]		= "scalar",
562 		[PTR_TO_CTX]		= "ctx",
563 		[CONST_PTR_TO_MAP]	= "map_ptr",
564 		[PTR_TO_MAP_VALUE]	= "map_value",
565 		[PTR_TO_STACK]		= "fp",
566 		[PTR_TO_PACKET]		= "pkt",
567 		[PTR_TO_PACKET_META]	= "pkt_meta",
568 		[PTR_TO_PACKET_END]	= "pkt_end",
569 		[PTR_TO_FLOW_KEYS]	= "flow_keys",
570 		[PTR_TO_SOCKET]		= "sock",
571 		[PTR_TO_SOCK_COMMON]	= "sock_common",
572 		[PTR_TO_TCP_SOCK]	= "tcp_sock",
573 		[PTR_TO_TP_BUFFER]	= "tp_buffer",
574 		[PTR_TO_XDP_SOCK]	= "xdp_sock",
575 		[PTR_TO_BTF_ID]		= "ptr_",
576 		[PTR_TO_MEM]		= "mem",
577 		[PTR_TO_BUF]		= "buf",
578 		[PTR_TO_FUNC]		= "func",
579 		[PTR_TO_MAP_KEY]	= "map_key",
580 		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
581 	};
582 
583 	if (type & PTR_MAYBE_NULL) {
584 		if (base_type(type) == PTR_TO_BTF_ID)
585 			strncpy(postfix, "or_null_", 16);
586 		else
587 			strncpy(postfix, "_or_null", 16);
588 	}
589 
590 	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
591 		 type & MEM_RDONLY ? "rdonly_" : "",
592 		 type & MEM_RINGBUF ? "ringbuf_" : "",
593 		 type & MEM_USER ? "user_" : "",
594 		 type & MEM_PERCPU ? "percpu_" : "",
595 		 type & MEM_RCU ? "rcu_" : "",
596 		 type & PTR_UNTRUSTED ? "untrusted_" : "",
597 		 type & PTR_TRUSTED ? "trusted_" : ""
598 	);
599 
600 	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
601 		 prefix, str[base_type(type)], postfix);
602 	return env->type_str_buf;
603 }
604 
605 static char slot_type_char[] = {
606 	[STACK_INVALID]	= '?',
607 	[STACK_SPILL]	= 'r',
608 	[STACK_MISC]	= 'm',
609 	[STACK_ZERO]	= '0',
610 	[STACK_DYNPTR]	= 'd',
611 	[STACK_ITER]	= 'i',
612 };
613 
614 static void print_liveness(struct bpf_verifier_env *env,
615 			   enum bpf_reg_liveness live)
616 {
617 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
618 	    verbose(env, "_");
619 	if (live & REG_LIVE_READ)
620 		verbose(env, "r");
621 	if (live & REG_LIVE_WRITTEN)
622 		verbose(env, "w");
623 	if (live & REG_LIVE_DONE)
624 		verbose(env, "D");
625 }
626 
627 static int __get_spi(s32 off)
628 {
629 	return (-off - 1) / BPF_REG_SIZE;
630 }
631 
632 static struct bpf_func_state *func(struct bpf_verifier_env *env,
633 				   const struct bpf_reg_state *reg)
634 {
635 	struct bpf_verifier_state *cur = env->cur_state;
636 
637 	return cur->frame[reg->frameno];
638 }
639 
640 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
641 {
642        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
643 
644        /* We need to check that slots between [spi - nr_slots + 1, spi] are
645 	* within [0, allocated_stack).
646 	*
647 	* Please note that the spi grows downwards. For example, a dynptr
648 	* takes the size of two stack slots; the first slot will be at
649 	* spi and the second slot will be at spi - 1.
650 	*/
651        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
652 }
653 
654 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
655 			          const char *obj_kind, int nr_slots)
656 {
657 	int off, spi;
658 
659 	if (!tnum_is_const(reg->var_off)) {
660 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
661 		return -EINVAL;
662 	}
663 
664 	off = reg->off + reg->var_off.value;
665 	if (off % BPF_REG_SIZE) {
666 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
667 		return -EINVAL;
668 	}
669 
670 	spi = __get_spi(off);
671 	if (spi + 1 < nr_slots) {
672 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
673 		return -EINVAL;
674 	}
675 
676 	if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
677 		return -ERANGE;
678 	return spi;
679 }
680 
681 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
682 {
683 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
684 }
685 
686 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
687 {
688 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
689 }
690 
691 static const char *btf_type_name(const struct btf *btf, u32 id)
692 {
693 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
694 }
695 
696 static const char *dynptr_type_str(enum bpf_dynptr_type type)
697 {
698 	switch (type) {
699 	case BPF_DYNPTR_TYPE_LOCAL:
700 		return "local";
701 	case BPF_DYNPTR_TYPE_RINGBUF:
702 		return "ringbuf";
703 	case BPF_DYNPTR_TYPE_SKB:
704 		return "skb";
705 	case BPF_DYNPTR_TYPE_XDP:
706 		return "xdp";
707 	case BPF_DYNPTR_TYPE_INVALID:
708 		return "<invalid>";
709 	default:
710 		WARN_ONCE(1, "unknown dynptr type %d\n", type);
711 		return "<unknown>";
712 	}
713 }
714 
715 static const char *iter_type_str(const struct btf *btf, u32 btf_id)
716 {
717 	if (!btf || btf_id == 0)
718 		return "<invalid>";
719 
720 	/* we already validated that type is valid and has conforming name */
721 	return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
722 }
723 
724 static const char *iter_state_str(enum bpf_iter_state state)
725 {
726 	switch (state) {
727 	case BPF_ITER_STATE_ACTIVE:
728 		return "active";
729 	case BPF_ITER_STATE_DRAINED:
730 		return "drained";
731 	case BPF_ITER_STATE_INVALID:
732 		return "<invalid>";
733 	default:
734 		WARN_ONCE(1, "unknown iter state %d\n", state);
735 		return "<unknown>";
736 	}
737 }
738 
739 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
740 {
741 	env->scratched_regs |= 1U << regno;
742 }
743 
744 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
745 {
746 	env->scratched_stack_slots |= 1ULL << spi;
747 }
748 
749 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
750 {
751 	return (env->scratched_regs >> regno) & 1;
752 }
753 
754 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
755 {
756 	return (env->scratched_stack_slots >> regno) & 1;
757 }
758 
759 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
760 {
761 	return env->scratched_regs || env->scratched_stack_slots;
762 }
763 
764 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
765 {
766 	env->scratched_regs = 0U;
767 	env->scratched_stack_slots = 0ULL;
768 }
769 
770 /* Used for printing the entire verifier state. */
771 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
772 {
773 	env->scratched_regs = ~0U;
774 	env->scratched_stack_slots = ~0ULL;
775 }
776 
777 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
778 {
779 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
780 	case DYNPTR_TYPE_LOCAL:
781 		return BPF_DYNPTR_TYPE_LOCAL;
782 	case DYNPTR_TYPE_RINGBUF:
783 		return BPF_DYNPTR_TYPE_RINGBUF;
784 	case DYNPTR_TYPE_SKB:
785 		return BPF_DYNPTR_TYPE_SKB;
786 	case DYNPTR_TYPE_XDP:
787 		return BPF_DYNPTR_TYPE_XDP;
788 	default:
789 		return BPF_DYNPTR_TYPE_INVALID;
790 	}
791 }
792 
793 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
794 {
795 	switch (type) {
796 	case BPF_DYNPTR_TYPE_LOCAL:
797 		return DYNPTR_TYPE_LOCAL;
798 	case BPF_DYNPTR_TYPE_RINGBUF:
799 		return DYNPTR_TYPE_RINGBUF;
800 	case BPF_DYNPTR_TYPE_SKB:
801 		return DYNPTR_TYPE_SKB;
802 	case BPF_DYNPTR_TYPE_XDP:
803 		return DYNPTR_TYPE_XDP;
804 	default:
805 		return 0;
806 	}
807 }
808 
809 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
810 {
811 	return type == BPF_DYNPTR_TYPE_RINGBUF;
812 }
813 
814 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
815 			      enum bpf_dynptr_type type,
816 			      bool first_slot, int dynptr_id);
817 
818 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
819 				struct bpf_reg_state *reg);
820 
821 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
822 				   struct bpf_reg_state *sreg1,
823 				   struct bpf_reg_state *sreg2,
824 				   enum bpf_dynptr_type type)
825 {
826 	int id = ++env->id_gen;
827 
828 	__mark_dynptr_reg(sreg1, type, true, id);
829 	__mark_dynptr_reg(sreg2, type, false, id);
830 }
831 
832 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
833 			       struct bpf_reg_state *reg,
834 			       enum bpf_dynptr_type type)
835 {
836 	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
837 }
838 
839 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
840 				        struct bpf_func_state *state, int spi);
841 
842 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
843 				   enum bpf_arg_type arg_type, int insn_idx)
844 {
845 	struct bpf_func_state *state = func(env, reg);
846 	enum bpf_dynptr_type type;
847 	int spi, i, id, err;
848 
849 	spi = dynptr_get_spi(env, reg);
850 	if (spi < 0)
851 		return spi;
852 
853 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
854 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
855 	 * to ensure that for the following example:
856 	 *	[d1][d1][d2][d2]
857 	 * spi    3   2   1   0
858 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
859 	 * case they do belong to same dynptr, second call won't see slot_type
860 	 * as STACK_DYNPTR and will simply skip destruction.
861 	 */
862 	err = destroy_if_dynptr_stack_slot(env, state, spi);
863 	if (err)
864 		return err;
865 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
866 	if (err)
867 		return err;
868 
869 	for (i = 0; i < BPF_REG_SIZE; i++) {
870 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
871 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
872 	}
873 
874 	type = arg_to_dynptr_type(arg_type);
875 	if (type == BPF_DYNPTR_TYPE_INVALID)
876 		return -EINVAL;
877 
878 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
879 			       &state->stack[spi - 1].spilled_ptr, type);
880 
881 	if (dynptr_type_refcounted(type)) {
882 		/* The id is used to track proper releasing */
883 		id = acquire_reference_state(env, insn_idx);
884 		if (id < 0)
885 			return id;
886 
887 		state->stack[spi].spilled_ptr.ref_obj_id = id;
888 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
889 	}
890 
891 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
892 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
893 
894 	return 0;
895 }
896 
897 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
898 {
899 	struct bpf_func_state *state = func(env, reg);
900 	int spi, i;
901 
902 	spi = dynptr_get_spi(env, reg);
903 	if (spi < 0)
904 		return spi;
905 
906 	for (i = 0; i < BPF_REG_SIZE; i++) {
907 		state->stack[spi].slot_type[i] = STACK_INVALID;
908 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
909 	}
910 
911 	/* Invalidate any slices associated with this dynptr */
912 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
913 		WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
914 
915 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
916 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
917 
918 	/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
919 	 *
920 	 * While we don't allow reading STACK_INVALID, it is still possible to
921 	 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
922 	 * helpers or insns can do partial read of that part without failing,
923 	 * but check_stack_range_initialized, check_stack_read_var_off, and
924 	 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
925 	 * the slot conservatively. Hence we need to prevent those liveness
926 	 * marking walks.
927 	 *
928 	 * This was not a problem before because STACK_INVALID is only set by
929 	 * default (where the default reg state has its reg->parent as NULL), or
930 	 * in clean_live_states after REG_LIVE_DONE (at which point
931 	 * mark_reg_read won't walk reg->parent chain), but not randomly during
932 	 * verifier state exploration (like we did above). Hence, for our case
933 	 * parentage chain will still be live (i.e. reg->parent may be
934 	 * non-NULL), while earlier reg->parent was NULL, so we need
935 	 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
936 	 * done later on reads or by mark_dynptr_read as well to unnecessary
937 	 * mark registers in verifier state.
938 	 */
939 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
940 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
941 
942 	return 0;
943 }
944 
945 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
946 			       struct bpf_reg_state *reg);
947 
948 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
949 {
950 	if (!env->allow_ptr_leaks)
951 		__mark_reg_not_init(env, reg);
952 	else
953 		__mark_reg_unknown(env, reg);
954 }
955 
956 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
957 				        struct bpf_func_state *state, int spi)
958 {
959 	struct bpf_func_state *fstate;
960 	struct bpf_reg_state *dreg;
961 	int i, dynptr_id;
962 
963 	/* We always ensure that STACK_DYNPTR is never set partially,
964 	 * hence just checking for slot_type[0] is enough. This is
965 	 * different for STACK_SPILL, where it may be only set for
966 	 * 1 byte, so code has to use is_spilled_reg.
967 	 */
968 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
969 		return 0;
970 
971 	/* Reposition spi to first slot */
972 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
973 		spi = spi + 1;
974 
975 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
976 		verbose(env, "cannot overwrite referenced dynptr\n");
977 		return -EINVAL;
978 	}
979 
980 	mark_stack_slot_scratched(env, spi);
981 	mark_stack_slot_scratched(env, spi - 1);
982 
983 	/* Writing partially to one dynptr stack slot destroys both. */
984 	for (i = 0; i < BPF_REG_SIZE; i++) {
985 		state->stack[spi].slot_type[i] = STACK_INVALID;
986 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
987 	}
988 
989 	dynptr_id = state->stack[spi].spilled_ptr.id;
990 	/* Invalidate any slices associated with this dynptr */
991 	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
992 		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
993 		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
994 			continue;
995 		if (dreg->dynptr_id == dynptr_id)
996 			mark_reg_invalid(env, dreg);
997 	}));
998 
999 	/* Do not release reference state, we are destroying dynptr on stack,
1000 	 * not using some helper to release it. Just reset register.
1001 	 */
1002 	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
1003 	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
1004 
1005 	/* Same reason as unmark_stack_slots_dynptr above */
1006 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1007 	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
1008 
1009 	return 0;
1010 }
1011 
1012 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1013 {
1014 	int spi;
1015 
1016 	if (reg->type == CONST_PTR_TO_DYNPTR)
1017 		return false;
1018 
1019 	spi = dynptr_get_spi(env, reg);
1020 
1021 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1022 	 * error because this just means the stack state hasn't been updated yet.
1023 	 * We will do check_mem_access to check and update stack bounds later.
1024 	 */
1025 	if (spi < 0 && spi != -ERANGE)
1026 		return false;
1027 
1028 	/* We don't need to check if the stack slots are marked by previous
1029 	 * dynptr initializations because we allow overwriting existing unreferenced
1030 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1031 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1032 	 * touching are completely destructed before we reinitialize them for a new
1033 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1034 	 * instead of delaying it until the end where the user will get "Unreleased
1035 	 * reference" error.
1036 	 */
1037 	return true;
1038 }
1039 
1040 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1041 {
1042 	struct bpf_func_state *state = func(env, reg);
1043 	int i, spi;
1044 
1045 	/* This already represents first slot of initialized bpf_dynptr.
1046 	 *
1047 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1048 	 * check_func_arg_reg_off's logic, so we don't need to check its
1049 	 * offset and alignment.
1050 	 */
1051 	if (reg->type == CONST_PTR_TO_DYNPTR)
1052 		return true;
1053 
1054 	spi = dynptr_get_spi(env, reg);
1055 	if (spi < 0)
1056 		return false;
1057 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1058 		return false;
1059 
1060 	for (i = 0; i < BPF_REG_SIZE; i++) {
1061 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1062 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1063 			return false;
1064 	}
1065 
1066 	return true;
1067 }
1068 
1069 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1070 				    enum bpf_arg_type arg_type)
1071 {
1072 	struct bpf_func_state *state = func(env, reg);
1073 	enum bpf_dynptr_type dynptr_type;
1074 	int spi;
1075 
1076 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1077 	if (arg_type == ARG_PTR_TO_DYNPTR)
1078 		return true;
1079 
1080 	dynptr_type = arg_to_dynptr_type(arg_type);
1081 	if (reg->type == CONST_PTR_TO_DYNPTR) {
1082 		return reg->dynptr.type == dynptr_type;
1083 	} else {
1084 		spi = dynptr_get_spi(env, reg);
1085 		if (spi < 0)
1086 			return false;
1087 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1088 	}
1089 }
1090 
1091 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1092 
1093 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1094 				 struct bpf_reg_state *reg, int insn_idx,
1095 				 struct btf *btf, u32 btf_id, int nr_slots)
1096 {
1097 	struct bpf_func_state *state = func(env, reg);
1098 	int spi, i, j, id;
1099 
1100 	spi = iter_get_spi(env, reg, nr_slots);
1101 	if (spi < 0)
1102 		return spi;
1103 
1104 	id = acquire_reference_state(env, insn_idx);
1105 	if (id < 0)
1106 		return id;
1107 
1108 	for (i = 0; i < nr_slots; i++) {
1109 		struct bpf_stack_state *slot = &state->stack[spi - i];
1110 		struct bpf_reg_state *st = &slot->spilled_ptr;
1111 
1112 		__mark_reg_known_zero(st);
1113 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1114 		st->live |= REG_LIVE_WRITTEN;
1115 		st->ref_obj_id = i == 0 ? id : 0;
1116 		st->iter.btf = btf;
1117 		st->iter.btf_id = btf_id;
1118 		st->iter.state = BPF_ITER_STATE_ACTIVE;
1119 		st->iter.depth = 0;
1120 
1121 		for (j = 0; j < BPF_REG_SIZE; j++)
1122 			slot->slot_type[j] = STACK_ITER;
1123 
1124 		mark_stack_slot_scratched(env, spi - i);
1125 	}
1126 
1127 	return 0;
1128 }
1129 
1130 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1131 				   struct bpf_reg_state *reg, int nr_slots)
1132 {
1133 	struct bpf_func_state *state = func(env, reg);
1134 	int spi, i, j;
1135 
1136 	spi = iter_get_spi(env, reg, nr_slots);
1137 	if (spi < 0)
1138 		return spi;
1139 
1140 	for (i = 0; i < nr_slots; i++) {
1141 		struct bpf_stack_state *slot = &state->stack[spi - i];
1142 		struct bpf_reg_state *st = &slot->spilled_ptr;
1143 
1144 		if (i == 0)
1145 			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1146 
1147 		__mark_reg_not_init(env, st);
1148 
1149 		/* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1150 		st->live |= REG_LIVE_WRITTEN;
1151 
1152 		for (j = 0; j < BPF_REG_SIZE; j++)
1153 			slot->slot_type[j] = STACK_INVALID;
1154 
1155 		mark_stack_slot_scratched(env, spi - i);
1156 	}
1157 
1158 	return 0;
1159 }
1160 
1161 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1162 				     struct bpf_reg_state *reg, int nr_slots)
1163 {
1164 	struct bpf_func_state *state = func(env, reg);
1165 	int spi, i, j;
1166 
1167 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1168 	 * will do check_mem_access to check and update stack bounds later, so
1169 	 * return true for that case.
1170 	 */
1171 	spi = iter_get_spi(env, reg, nr_slots);
1172 	if (spi == -ERANGE)
1173 		return true;
1174 	if (spi < 0)
1175 		return false;
1176 
1177 	for (i = 0; i < nr_slots; i++) {
1178 		struct bpf_stack_state *slot = &state->stack[spi - i];
1179 
1180 		for (j = 0; j < BPF_REG_SIZE; j++)
1181 			if (slot->slot_type[j] == STACK_ITER)
1182 				return false;
1183 	}
1184 
1185 	return true;
1186 }
1187 
1188 static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1189 				   struct btf *btf, u32 btf_id, int nr_slots)
1190 {
1191 	struct bpf_func_state *state = func(env, reg);
1192 	int spi, i, j;
1193 
1194 	spi = iter_get_spi(env, reg, nr_slots);
1195 	if (spi < 0)
1196 		return false;
1197 
1198 	for (i = 0; i < nr_slots; i++) {
1199 		struct bpf_stack_state *slot = &state->stack[spi - i];
1200 		struct bpf_reg_state *st = &slot->spilled_ptr;
1201 
1202 		/* only main (first) slot has ref_obj_id set */
1203 		if (i == 0 && !st->ref_obj_id)
1204 			return false;
1205 		if (i != 0 && st->ref_obj_id)
1206 			return false;
1207 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1208 			return false;
1209 
1210 		for (j = 0; j < BPF_REG_SIZE; j++)
1211 			if (slot->slot_type[j] != STACK_ITER)
1212 				return false;
1213 	}
1214 
1215 	return true;
1216 }
1217 
1218 /* Check if given stack slot is "special":
1219  *   - spilled register state (STACK_SPILL);
1220  *   - dynptr state (STACK_DYNPTR);
1221  *   - iter state (STACK_ITER).
1222  */
1223 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1224 {
1225 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1226 
1227 	switch (type) {
1228 	case STACK_SPILL:
1229 	case STACK_DYNPTR:
1230 	case STACK_ITER:
1231 		return true;
1232 	case STACK_INVALID:
1233 	case STACK_MISC:
1234 	case STACK_ZERO:
1235 		return false;
1236 	default:
1237 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1238 		return true;
1239 	}
1240 }
1241 
1242 /* The reg state of a pointer or a bounded scalar was saved when
1243  * it was spilled to the stack.
1244  */
1245 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1246 {
1247 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1248 }
1249 
1250 static void scrub_spilled_slot(u8 *stype)
1251 {
1252 	if (*stype != STACK_INVALID)
1253 		*stype = STACK_MISC;
1254 }
1255 
1256 static void print_verifier_state(struct bpf_verifier_env *env,
1257 				 const struct bpf_func_state *state,
1258 				 bool print_all)
1259 {
1260 	const struct bpf_reg_state *reg;
1261 	enum bpf_reg_type t;
1262 	int i;
1263 
1264 	if (state->frameno)
1265 		verbose(env, " frame%d:", state->frameno);
1266 	for (i = 0; i < MAX_BPF_REG; i++) {
1267 		reg = &state->regs[i];
1268 		t = reg->type;
1269 		if (t == NOT_INIT)
1270 			continue;
1271 		if (!print_all && !reg_scratched(env, i))
1272 			continue;
1273 		verbose(env, " R%d", i);
1274 		print_liveness(env, reg->live);
1275 		verbose(env, "=");
1276 		if (t == SCALAR_VALUE && reg->precise)
1277 			verbose(env, "P");
1278 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
1279 		    tnum_is_const(reg->var_off)) {
1280 			/* reg->off should be 0 for SCALAR_VALUE */
1281 			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1282 			verbose(env, "%lld", reg->var_off.value + reg->off);
1283 		} else {
1284 			const char *sep = "";
1285 
1286 			verbose(env, "%s", reg_type_str(env, t));
1287 			if (base_type(t) == PTR_TO_BTF_ID)
1288 				verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
1289 			verbose(env, "(");
1290 /*
1291  * _a stands for append, was shortened to avoid multiline statements below.
1292  * This macro is used to output a comma separated list of attributes.
1293  */
1294 #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1295 
1296 			if (reg->id)
1297 				verbose_a("id=%d", reg->id);
1298 			if (reg->ref_obj_id)
1299 				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
1300 			if (type_is_non_owning_ref(reg->type))
1301 				verbose_a("%s", "non_own_ref");
1302 			if (t != SCALAR_VALUE)
1303 				verbose_a("off=%d", reg->off);
1304 			if (type_is_pkt_pointer(t))
1305 				verbose_a("r=%d", reg->range);
1306 			else if (base_type(t) == CONST_PTR_TO_MAP ||
1307 				 base_type(t) == PTR_TO_MAP_KEY ||
1308 				 base_type(t) == PTR_TO_MAP_VALUE)
1309 				verbose_a("ks=%d,vs=%d",
1310 					  reg->map_ptr->key_size,
1311 					  reg->map_ptr->value_size);
1312 			if (tnum_is_const(reg->var_off)) {
1313 				/* Typically an immediate SCALAR_VALUE, but
1314 				 * could be a pointer whose offset is too big
1315 				 * for reg->off
1316 				 */
1317 				verbose_a("imm=%llx", reg->var_off.value);
1318 			} else {
1319 				if (reg->smin_value != reg->umin_value &&
1320 				    reg->smin_value != S64_MIN)
1321 					verbose_a("smin=%lld", (long long)reg->smin_value);
1322 				if (reg->smax_value != reg->umax_value &&
1323 				    reg->smax_value != S64_MAX)
1324 					verbose_a("smax=%lld", (long long)reg->smax_value);
1325 				if (reg->umin_value != 0)
1326 					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
1327 				if (reg->umax_value != U64_MAX)
1328 					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
1329 				if (!tnum_is_unknown(reg->var_off)) {
1330 					char tn_buf[48];
1331 
1332 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1333 					verbose_a("var_off=%s", tn_buf);
1334 				}
1335 				if (reg->s32_min_value != reg->smin_value &&
1336 				    reg->s32_min_value != S32_MIN)
1337 					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
1338 				if (reg->s32_max_value != reg->smax_value &&
1339 				    reg->s32_max_value != S32_MAX)
1340 					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
1341 				if (reg->u32_min_value != reg->umin_value &&
1342 				    reg->u32_min_value != U32_MIN)
1343 					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
1344 				if (reg->u32_max_value != reg->umax_value &&
1345 				    reg->u32_max_value != U32_MAX)
1346 					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
1347 			}
1348 #undef verbose_a
1349 
1350 			verbose(env, ")");
1351 		}
1352 	}
1353 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1354 		char types_buf[BPF_REG_SIZE + 1];
1355 		bool valid = false;
1356 		int j;
1357 
1358 		for (j = 0; j < BPF_REG_SIZE; j++) {
1359 			if (state->stack[i].slot_type[j] != STACK_INVALID)
1360 				valid = true;
1361 			types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1362 		}
1363 		types_buf[BPF_REG_SIZE] = 0;
1364 		if (!valid)
1365 			continue;
1366 		if (!print_all && !stack_slot_scratched(env, i))
1367 			continue;
1368 		switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
1369 		case STACK_SPILL:
1370 			reg = &state->stack[i].spilled_ptr;
1371 			t = reg->type;
1372 
1373 			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1374 			print_liveness(env, reg->live);
1375 			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1376 			if (t == SCALAR_VALUE && reg->precise)
1377 				verbose(env, "P");
1378 			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1379 				verbose(env, "%lld", reg->var_off.value + reg->off);
1380 			break;
1381 		case STACK_DYNPTR:
1382 			i += BPF_DYNPTR_NR_SLOTS - 1;
1383 			reg = &state->stack[i].spilled_ptr;
1384 
1385 			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1386 			print_liveness(env, reg->live);
1387 			verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
1388 			if (reg->ref_obj_id)
1389 				verbose(env, "(ref_id=%d)", reg->ref_obj_id);
1390 			break;
1391 		case STACK_ITER:
1392 			/* only main slot has ref_obj_id set; skip others */
1393 			reg = &state->stack[i].spilled_ptr;
1394 			if (!reg->ref_obj_id)
1395 				continue;
1396 
1397 			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1398 			print_liveness(env, reg->live);
1399 			verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
1400 				iter_type_str(reg->iter.btf, reg->iter.btf_id),
1401 				reg->ref_obj_id, iter_state_str(reg->iter.state),
1402 				reg->iter.depth);
1403 			break;
1404 		case STACK_MISC:
1405 		case STACK_ZERO:
1406 		default:
1407 			reg = &state->stack[i].spilled_ptr;
1408 
1409 			for (j = 0; j < BPF_REG_SIZE; j++)
1410 				types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1411 			types_buf[BPF_REG_SIZE] = 0;
1412 
1413 			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1414 			print_liveness(env, reg->live);
1415 			verbose(env, "=%s", types_buf);
1416 			break;
1417 		}
1418 	}
1419 	if (state->acquired_refs && state->refs[0].id) {
1420 		verbose(env, " refs=%d", state->refs[0].id);
1421 		for (i = 1; i < state->acquired_refs; i++)
1422 			if (state->refs[i].id)
1423 				verbose(env, ",%d", state->refs[i].id);
1424 	}
1425 	if (state->in_callback_fn)
1426 		verbose(env, " cb");
1427 	if (state->in_async_callback_fn)
1428 		verbose(env, " async_cb");
1429 	verbose(env, "\n");
1430 	mark_verifier_state_clean(env);
1431 }
1432 
1433 static inline u32 vlog_alignment(u32 pos)
1434 {
1435 	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1436 			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1437 }
1438 
1439 static void print_insn_state(struct bpf_verifier_env *env,
1440 			     const struct bpf_func_state *state)
1441 {
1442 	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
1443 		/* remove new line character */
1444 		bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
1445 		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
1446 	} else {
1447 		verbose(env, "%d:", env->insn_idx);
1448 	}
1449 	print_verifier_state(env, state, false);
1450 }
1451 
1452 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1453  * small to hold src. This is different from krealloc since we don't want to preserve
1454  * the contents of dst.
1455  *
1456  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1457  * not be allocated.
1458  */
1459 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1460 {
1461 	size_t alloc_bytes;
1462 	void *orig = dst;
1463 	size_t bytes;
1464 
1465 	if (ZERO_OR_NULL_PTR(src))
1466 		goto out;
1467 
1468 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1469 		return NULL;
1470 
1471 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1472 	dst = krealloc(orig, alloc_bytes, flags);
1473 	if (!dst) {
1474 		kfree(orig);
1475 		return NULL;
1476 	}
1477 
1478 	memcpy(dst, src, bytes);
1479 out:
1480 	return dst ? dst : ZERO_SIZE_PTR;
1481 }
1482 
1483 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1484  * small to hold new_n items. new items are zeroed out if the array grows.
1485  *
1486  * Contrary to krealloc_array, does not free arr if new_n is zero.
1487  */
1488 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1489 {
1490 	size_t alloc_size;
1491 	void *new_arr;
1492 
1493 	if (!new_n || old_n == new_n)
1494 		goto out;
1495 
1496 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1497 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1498 	if (!new_arr) {
1499 		kfree(arr);
1500 		return NULL;
1501 	}
1502 	arr = new_arr;
1503 
1504 	if (new_n > old_n)
1505 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1506 
1507 out:
1508 	return arr ? arr : ZERO_SIZE_PTR;
1509 }
1510 
1511 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1512 {
1513 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1514 			       sizeof(struct bpf_reference_state), GFP_KERNEL);
1515 	if (!dst->refs)
1516 		return -ENOMEM;
1517 
1518 	dst->acquired_refs = src->acquired_refs;
1519 	return 0;
1520 }
1521 
1522 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1523 {
1524 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1525 
1526 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1527 				GFP_KERNEL);
1528 	if (!dst->stack)
1529 		return -ENOMEM;
1530 
1531 	dst->allocated_stack = src->allocated_stack;
1532 	return 0;
1533 }
1534 
1535 static int resize_reference_state(struct bpf_func_state *state, size_t n)
1536 {
1537 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1538 				    sizeof(struct bpf_reference_state));
1539 	if (!state->refs)
1540 		return -ENOMEM;
1541 
1542 	state->acquired_refs = n;
1543 	return 0;
1544 }
1545 
1546 static int grow_stack_state(struct bpf_func_state *state, int size)
1547 {
1548 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1549 
1550 	if (old_n >= n)
1551 		return 0;
1552 
1553 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1554 	if (!state->stack)
1555 		return -ENOMEM;
1556 
1557 	state->allocated_stack = size;
1558 	return 0;
1559 }
1560 
1561 /* Acquire a pointer id from the env and update the state->refs to include
1562  * this new pointer reference.
1563  * On success, returns a valid pointer id to associate with the register
1564  * On failure, returns a negative errno.
1565  */
1566 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1567 {
1568 	struct bpf_func_state *state = cur_func(env);
1569 	int new_ofs = state->acquired_refs;
1570 	int id, err;
1571 
1572 	err = resize_reference_state(state, state->acquired_refs + 1);
1573 	if (err)
1574 		return err;
1575 	id = ++env->id_gen;
1576 	state->refs[new_ofs].id = id;
1577 	state->refs[new_ofs].insn_idx = insn_idx;
1578 	state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1579 
1580 	return id;
1581 }
1582 
1583 /* release function corresponding to acquire_reference_state(). Idempotent. */
1584 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1585 {
1586 	int i, last_idx;
1587 
1588 	last_idx = state->acquired_refs - 1;
1589 	for (i = 0; i < state->acquired_refs; i++) {
1590 		if (state->refs[i].id == ptr_id) {
1591 			/* Cannot release caller references in callbacks */
1592 			if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1593 				return -EINVAL;
1594 			if (last_idx && i != last_idx)
1595 				memcpy(&state->refs[i], &state->refs[last_idx],
1596 				       sizeof(*state->refs));
1597 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1598 			state->acquired_refs--;
1599 			return 0;
1600 		}
1601 	}
1602 	return -EINVAL;
1603 }
1604 
1605 static void free_func_state(struct bpf_func_state *state)
1606 {
1607 	if (!state)
1608 		return;
1609 	kfree(state->refs);
1610 	kfree(state->stack);
1611 	kfree(state);
1612 }
1613 
1614 static void clear_jmp_history(struct bpf_verifier_state *state)
1615 {
1616 	kfree(state->jmp_history);
1617 	state->jmp_history = NULL;
1618 	state->jmp_history_cnt = 0;
1619 }
1620 
1621 static void free_verifier_state(struct bpf_verifier_state *state,
1622 				bool free_self)
1623 {
1624 	int i;
1625 
1626 	for (i = 0; i <= state->curframe; i++) {
1627 		free_func_state(state->frame[i]);
1628 		state->frame[i] = NULL;
1629 	}
1630 	clear_jmp_history(state);
1631 	if (free_self)
1632 		kfree(state);
1633 }
1634 
1635 /* copy verifier state from src to dst growing dst stack space
1636  * when necessary to accommodate larger src stack
1637  */
1638 static int copy_func_state(struct bpf_func_state *dst,
1639 			   const struct bpf_func_state *src)
1640 {
1641 	int err;
1642 
1643 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1644 	err = copy_reference_state(dst, src);
1645 	if (err)
1646 		return err;
1647 	return copy_stack_state(dst, src);
1648 }
1649 
1650 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1651 			       const struct bpf_verifier_state *src)
1652 {
1653 	struct bpf_func_state *dst;
1654 	int i, err;
1655 
1656 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1657 					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1658 					    GFP_USER);
1659 	if (!dst_state->jmp_history)
1660 		return -ENOMEM;
1661 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1662 
1663 	/* if dst has more stack frames then src frame, free them */
1664 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1665 		free_func_state(dst_state->frame[i]);
1666 		dst_state->frame[i] = NULL;
1667 	}
1668 	dst_state->speculative = src->speculative;
1669 	dst_state->active_rcu_lock = src->active_rcu_lock;
1670 	dst_state->curframe = src->curframe;
1671 	dst_state->active_lock.ptr = src->active_lock.ptr;
1672 	dst_state->active_lock.id = src->active_lock.id;
1673 	dst_state->branches = src->branches;
1674 	dst_state->parent = src->parent;
1675 	dst_state->first_insn_idx = src->first_insn_idx;
1676 	dst_state->last_insn_idx = src->last_insn_idx;
1677 	for (i = 0; i <= src->curframe; i++) {
1678 		dst = dst_state->frame[i];
1679 		if (!dst) {
1680 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1681 			if (!dst)
1682 				return -ENOMEM;
1683 			dst_state->frame[i] = dst;
1684 		}
1685 		err = copy_func_state(dst, src->frame[i]);
1686 		if (err)
1687 			return err;
1688 	}
1689 	return 0;
1690 }
1691 
1692 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1693 {
1694 	while (st) {
1695 		u32 br = --st->branches;
1696 
1697 		/* WARN_ON(br > 1) technically makes sense here,
1698 		 * but see comment in push_stack(), hence:
1699 		 */
1700 		WARN_ONCE((int)br < 0,
1701 			  "BUG update_branch_counts:branches_to_explore=%d\n",
1702 			  br);
1703 		if (br)
1704 			break;
1705 		st = st->parent;
1706 	}
1707 }
1708 
1709 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1710 		     int *insn_idx, bool pop_log)
1711 {
1712 	struct bpf_verifier_state *cur = env->cur_state;
1713 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1714 	int err;
1715 
1716 	if (env->head == NULL)
1717 		return -ENOENT;
1718 
1719 	if (cur) {
1720 		err = copy_verifier_state(cur, &head->st);
1721 		if (err)
1722 			return err;
1723 	}
1724 	if (pop_log)
1725 		bpf_vlog_reset(&env->log, head->log_pos);
1726 	if (insn_idx)
1727 		*insn_idx = head->insn_idx;
1728 	if (prev_insn_idx)
1729 		*prev_insn_idx = head->prev_insn_idx;
1730 	elem = head->next;
1731 	free_verifier_state(&head->st, false);
1732 	kfree(head);
1733 	env->head = elem;
1734 	env->stack_size--;
1735 	return 0;
1736 }
1737 
1738 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1739 					     int insn_idx, int prev_insn_idx,
1740 					     bool speculative)
1741 {
1742 	struct bpf_verifier_state *cur = env->cur_state;
1743 	struct bpf_verifier_stack_elem *elem;
1744 	int err;
1745 
1746 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1747 	if (!elem)
1748 		goto err;
1749 
1750 	elem->insn_idx = insn_idx;
1751 	elem->prev_insn_idx = prev_insn_idx;
1752 	elem->next = env->head;
1753 	elem->log_pos = env->log.end_pos;
1754 	env->head = elem;
1755 	env->stack_size++;
1756 	err = copy_verifier_state(&elem->st, cur);
1757 	if (err)
1758 		goto err;
1759 	elem->st.speculative |= speculative;
1760 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1761 		verbose(env, "The sequence of %d jumps is too complex.\n",
1762 			env->stack_size);
1763 		goto err;
1764 	}
1765 	if (elem->st.parent) {
1766 		++elem->st.parent->branches;
1767 		/* WARN_ON(branches > 2) technically makes sense here,
1768 		 * but
1769 		 * 1. speculative states will bump 'branches' for non-branch
1770 		 * instructions
1771 		 * 2. is_state_visited() heuristics may decide not to create
1772 		 * a new state for a sequence of branches and all such current
1773 		 * and cloned states will be pointing to a single parent state
1774 		 * which might have large 'branches' count.
1775 		 */
1776 	}
1777 	return &elem->st;
1778 err:
1779 	free_verifier_state(env->cur_state, true);
1780 	env->cur_state = NULL;
1781 	/* pop all elements and return */
1782 	while (!pop_stack(env, NULL, NULL, false));
1783 	return NULL;
1784 }
1785 
1786 #define CALLER_SAVED_REGS 6
1787 static const int caller_saved[CALLER_SAVED_REGS] = {
1788 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1789 };
1790 
1791 /* This helper doesn't clear reg->id */
1792 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1793 {
1794 	reg->var_off = tnum_const(imm);
1795 	reg->smin_value = (s64)imm;
1796 	reg->smax_value = (s64)imm;
1797 	reg->umin_value = imm;
1798 	reg->umax_value = imm;
1799 
1800 	reg->s32_min_value = (s32)imm;
1801 	reg->s32_max_value = (s32)imm;
1802 	reg->u32_min_value = (u32)imm;
1803 	reg->u32_max_value = (u32)imm;
1804 }
1805 
1806 /* Mark the unknown part of a register (variable offset or scalar value) as
1807  * known to have the value @imm.
1808  */
1809 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1810 {
1811 	/* Clear off and union(map_ptr, range) */
1812 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1813 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1814 	reg->id = 0;
1815 	reg->ref_obj_id = 0;
1816 	___mark_reg_known(reg, imm);
1817 }
1818 
1819 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1820 {
1821 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1822 	reg->s32_min_value = (s32)imm;
1823 	reg->s32_max_value = (s32)imm;
1824 	reg->u32_min_value = (u32)imm;
1825 	reg->u32_max_value = (u32)imm;
1826 }
1827 
1828 /* Mark the 'variable offset' part of a register as zero.  This should be
1829  * used only on registers holding a pointer type.
1830  */
1831 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1832 {
1833 	__mark_reg_known(reg, 0);
1834 }
1835 
1836 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1837 {
1838 	__mark_reg_known(reg, 0);
1839 	reg->type = SCALAR_VALUE;
1840 }
1841 
1842 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1843 				struct bpf_reg_state *regs, u32 regno)
1844 {
1845 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1846 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1847 		/* Something bad happened, let's kill all regs */
1848 		for (regno = 0; regno < MAX_BPF_REG; regno++)
1849 			__mark_reg_not_init(env, regs + regno);
1850 		return;
1851 	}
1852 	__mark_reg_known_zero(regs + regno);
1853 }
1854 
1855 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1856 			      bool first_slot, int dynptr_id)
1857 {
1858 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1859 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1860 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1861 	 */
1862 	__mark_reg_known_zero(reg);
1863 	reg->type = CONST_PTR_TO_DYNPTR;
1864 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1865 	reg->id = dynptr_id;
1866 	reg->dynptr.type = type;
1867 	reg->dynptr.first_slot = first_slot;
1868 }
1869 
1870 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1871 {
1872 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1873 		const struct bpf_map *map = reg->map_ptr;
1874 
1875 		if (map->inner_map_meta) {
1876 			reg->type = CONST_PTR_TO_MAP;
1877 			reg->map_ptr = map->inner_map_meta;
1878 			/* transfer reg's id which is unique for every map_lookup_elem
1879 			 * as UID of the inner map.
1880 			 */
1881 			if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
1882 				reg->map_uid = reg->id;
1883 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1884 			reg->type = PTR_TO_XDP_SOCK;
1885 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1886 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1887 			reg->type = PTR_TO_SOCKET;
1888 		} else {
1889 			reg->type = PTR_TO_MAP_VALUE;
1890 		}
1891 		return;
1892 	}
1893 
1894 	reg->type &= ~PTR_MAYBE_NULL;
1895 }
1896 
1897 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1898 				struct btf_field_graph_root *ds_head)
1899 {
1900 	__mark_reg_known_zero(&regs[regno]);
1901 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1902 	regs[regno].btf = ds_head->btf;
1903 	regs[regno].btf_id = ds_head->value_btf_id;
1904 	regs[regno].off = ds_head->node_offset;
1905 }
1906 
1907 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1908 {
1909 	return type_is_pkt_pointer(reg->type);
1910 }
1911 
1912 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1913 {
1914 	return reg_is_pkt_pointer(reg) ||
1915 	       reg->type == PTR_TO_PACKET_END;
1916 }
1917 
1918 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1919 {
1920 	return base_type(reg->type) == PTR_TO_MEM &&
1921 		(reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
1922 }
1923 
1924 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1925 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1926 				    enum bpf_reg_type which)
1927 {
1928 	/* The register can already have a range from prior markings.
1929 	 * This is fine as long as it hasn't been advanced from its
1930 	 * origin.
1931 	 */
1932 	return reg->type == which &&
1933 	       reg->id == 0 &&
1934 	       reg->off == 0 &&
1935 	       tnum_equals_const(reg->var_off, 0);
1936 }
1937 
1938 /* Reset the min/max bounds of a register */
1939 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1940 {
1941 	reg->smin_value = S64_MIN;
1942 	reg->smax_value = S64_MAX;
1943 	reg->umin_value = 0;
1944 	reg->umax_value = U64_MAX;
1945 
1946 	reg->s32_min_value = S32_MIN;
1947 	reg->s32_max_value = S32_MAX;
1948 	reg->u32_min_value = 0;
1949 	reg->u32_max_value = U32_MAX;
1950 }
1951 
1952 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1953 {
1954 	reg->smin_value = S64_MIN;
1955 	reg->smax_value = S64_MAX;
1956 	reg->umin_value = 0;
1957 	reg->umax_value = U64_MAX;
1958 }
1959 
1960 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1961 {
1962 	reg->s32_min_value = S32_MIN;
1963 	reg->s32_max_value = S32_MAX;
1964 	reg->u32_min_value = 0;
1965 	reg->u32_max_value = U32_MAX;
1966 }
1967 
1968 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1969 {
1970 	struct tnum var32_off = tnum_subreg(reg->var_off);
1971 
1972 	/* min signed is max(sign bit) | min(other bits) */
1973 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1974 			var32_off.value | (var32_off.mask & S32_MIN));
1975 	/* max signed is min(sign bit) | max(other bits) */
1976 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1977 			var32_off.value | (var32_off.mask & S32_MAX));
1978 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1979 	reg->u32_max_value = min(reg->u32_max_value,
1980 				 (u32)(var32_off.value | var32_off.mask));
1981 }
1982 
1983 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1984 {
1985 	/* min signed is max(sign bit) | min(other bits) */
1986 	reg->smin_value = max_t(s64, reg->smin_value,
1987 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1988 	/* max signed is min(sign bit) | max(other bits) */
1989 	reg->smax_value = min_t(s64, reg->smax_value,
1990 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1991 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1992 	reg->umax_value = min(reg->umax_value,
1993 			      reg->var_off.value | reg->var_off.mask);
1994 }
1995 
1996 static void __update_reg_bounds(struct bpf_reg_state *reg)
1997 {
1998 	__update_reg32_bounds(reg);
1999 	__update_reg64_bounds(reg);
2000 }
2001 
2002 /* Uses signed min/max values to inform unsigned, and vice-versa */
2003 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2004 {
2005 	/* Learn sign from signed bounds.
2006 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
2007 	 * are the same, so combine.  This works even in the negative case, e.g.
2008 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2009 	 */
2010 	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
2011 		reg->s32_min_value = reg->u32_min_value =
2012 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
2013 		reg->s32_max_value = reg->u32_max_value =
2014 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
2015 		return;
2016 	}
2017 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
2018 	 * boundary, so we must be careful.
2019 	 */
2020 	if ((s32)reg->u32_max_value >= 0) {
2021 		/* Positive.  We can't learn anything from the smin, but smax
2022 		 * is positive, hence safe.
2023 		 */
2024 		reg->s32_min_value = reg->u32_min_value;
2025 		reg->s32_max_value = reg->u32_max_value =
2026 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
2027 	} else if ((s32)reg->u32_min_value < 0) {
2028 		/* Negative.  We can't learn anything from the smax, but smin
2029 		 * is negative, hence safe.
2030 		 */
2031 		reg->s32_min_value = reg->u32_min_value =
2032 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
2033 		reg->s32_max_value = reg->u32_max_value;
2034 	}
2035 }
2036 
2037 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2038 {
2039 	/* Learn sign from signed bounds.
2040 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
2041 	 * are the same, so combine.  This works even in the negative case, e.g.
2042 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2043 	 */
2044 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
2045 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2046 							  reg->umin_value);
2047 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2048 							  reg->umax_value);
2049 		return;
2050 	}
2051 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
2052 	 * boundary, so we must be careful.
2053 	 */
2054 	if ((s64)reg->umax_value >= 0) {
2055 		/* Positive.  We can't learn anything from the smin, but smax
2056 		 * is positive, hence safe.
2057 		 */
2058 		reg->smin_value = reg->umin_value;
2059 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2060 							  reg->umax_value);
2061 	} else if ((s64)reg->umin_value < 0) {
2062 		/* Negative.  We can't learn anything from the smax, but smin
2063 		 * is negative, hence safe.
2064 		 */
2065 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2066 							  reg->umin_value);
2067 		reg->smax_value = reg->umax_value;
2068 	}
2069 }
2070 
2071 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2072 {
2073 	__reg32_deduce_bounds(reg);
2074 	__reg64_deduce_bounds(reg);
2075 }
2076 
2077 /* Attempts to improve var_off based on unsigned min/max information */
2078 static void __reg_bound_offset(struct bpf_reg_state *reg)
2079 {
2080 	struct tnum var64_off = tnum_intersect(reg->var_off,
2081 					       tnum_range(reg->umin_value,
2082 							  reg->umax_value));
2083 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2084 					       tnum_range(reg->u32_min_value,
2085 							  reg->u32_max_value));
2086 
2087 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2088 }
2089 
2090 static void reg_bounds_sync(struct bpf_reg_state *reg)
2091 {
2092 	/* We might have learned new bounds from the var_off. */
2093 	__update_reg_bounds(reg);
2094 	/* We might have learned something about the sign bit. */
2095 	__reg_deduce_bounds(reg);
2096 	/* We might have learned some bits from the bounds. */
2097 	__reg_bound_offset(reg);
2098 	/* Intersecting with the old var_off might have improved our bounds
2099 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2100 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2101 	 */
2102 	__update_reg_bounds(reg);
2103 }
2104 
2105 static bool __reg32_bound_s64(s32 a)
2106 {
2107 	return a >= 0 && a <= S32_MAX;
2108 }
2109 
2110 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2111 {
2112 	reg->umin_value = reg->u32_min_value;
2113 	reg->umax_value = reg->u32_max_value;
2114 
2115 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2116 	 * be positive otherwise set to worse case bounds and refine later
2117 	 * from tnum.
2118 	 */
2119 	if (__reg32_bound_s64(reg->s32_min_value) &&
2120 	    __reg32_bound_s64(reg->s32_max_value)) {
2121 		reg->smin_value = reg->s32_min_value;
2122 		reg->smax_value = reg->s32_max_value;
2123 	} else {
2124 		reg->smin_value = 0;
2125 		reg->smax_value = U32_MAX;
2126 	}
2127 }
2128 
2129 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
2130 {
2131 	/* special case when 64-bit register has upper 32-bit register
2132 	 * zeroed. Typically happens after zext or <<32, >>32 sequence
2133 	 * allowing us to use 32-bit bounds directly,
2134 	 */
2135 	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
2136 		__reg_assign_32_into_64(reg);
2137 	} else {
2138 		/* Otherwise the best we can do is push lower 32bit known and
2139 		 * unknown bits into register (var_off set from jmp logic)
2140 		 * then learn as much as possible from the 64-bit tnum
2141 		 * known and unknown bits. The previous smin/smax bounds are
2142 		 * invalid here because of jmp32 compare so mark them unknown
2143 		 * so they do not impact tnum bounds calculation.
2144 		 */
2145 		__mark_reg64_unbounded(reg);
2146 	}
2147 	reg_bounds_sync(reg);
2148 }
2149 
2150 static bool __reg64_bound_s32(s64 a)
2151 {
2152 	return a >= S32_MIN && a <= S32_MAX;
2153 }
2154 
2155 static bool __reg64_bound_u32(u64 a)
2156 {
2157 	return a >= U32_MIN && a <= U32_MAX;
2158 }
2159 
2160 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
2161 {
2162 	__mark_reg32_unbounded(reg);
2163 	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
2164 		reg->s32_min_value = (s32)reg->smin_value;
2165 		reg->s32_max_value = (s32)reg->smax_value;
2166 	}
2167 	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
2168 		reg->u32_min_value = (u32)reg->umin_value;
2169 		reg->u32_max_value = (u32)reg->umax_value;
2170 	}
2171 	reg_bounds_sync(reg);
2172 }
2173 
2174 /* Mark a register as having a completely unknown (scalar) value. */
2175 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2176 			       struct bpf_reg_state *reg)
2177 {
2178 	/*
2179 	 * Clear type, off, and union(map_ptr, range) and
2180 	 * padding between 'type' and union
2181 	 */
2182 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2183 	reg->type = SCALAR_VALUE;
2184 	reg->id = 0;
2185 	reg->ref_obj_id = 0;
2186 	reg->var_off = tnum_unknown;
2187 	reg->frameno = 0;
2188 	reg->precise = !env->bpf_capable;
2189 	__mark_reg_unbounded(reg);
2190 }
2191 
2192 static void mark_reg_unknown(struct bpf_verifier_env *env,
2193 			     struct bpf_reg_state *regs, u32 regno)
2194 {
2195 	if (WARN_ON(regno >= MAX_BPF_REG)) {
2196 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2197 		/* Something bad happened, let's kill all regs except FP */
2198 		for (regno = 0; regno < BPF_REG_FP; regno++)
2199 			__mark_reg_not_init(env, regs + regno);
2200 		return;
2201 	}
2202 	__mark_reg_unknown(env, regs + regno);
2203 }
2204 
2205 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2206 				struct bpf_reg_state *reg)
2207 {
2208 	__mark_reg_unknown(env, reg);
2209 	reg->type = NOT_INIT;
2210 }
2211 
2212 static void mark_reg_not_init(struct bpf_verifier_env *env,
2213 			      struct bpf_reg_state *regs, u32 regno)
2214 {
2215 	if (WARN_ON(regno >= MAX_BPF_REG)) {
2216 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2217 		/* Something bad happened, let's kill all regs except FP */
2218 		for (regno = 0; regno < BPF_REG_FP; regno++)
2219 			__mark_reg_not_init(env, regs + regno);
2220 		return;
2221 	}
2222 	__mark_reg_not_init(env, regs + regno);
2223 }
2224 
2225 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2226 			    struct bpf_reg_state *regs, u32 regno,
2227 			    enum bpf_reg_type reg_type,
2228 			    struct btf *btf, u32 btf_id,
2229 			    enum bpf_type_flag flag)
2230 {
2231 	if (reg_type == SCALAR_VALUE) {
2232 		mark_reg_unknown(env, regs, regno);
2233 		return;
2234 	}
2235 	mark_reg_known_zero(env, regs, regno);
2236 	regs[regno].type = PTR_TO_BTF_ID | flag;
2237 	regs[regno].btf = btf;
2238 	regs[regno].btf_id = btf_id;
2239 }
2240 
2241 #define DEF_NOT_SUBREG	(0)
2242 static void init_reg_state(struct bpf_verifier_env *env,
2243 			   struct bpf_func_state *state)
2244 {
2245 	struct bpf_reg_state *regs = state->regs;
2246 	int i;
2247 
2248 	for (i = 0; i < MAX_BPF_REG; i++) {
2249 		mark_reg_not_init(env, regs, i);
2250 		regs[i].live = REG_LIVE_NONE;
2251 		regs[i].parent = NULL;
2252 		regs[i].subreg_def = DEF_NOT_SUBREG;
2253 	}
2254 
2255 	/* frame pointer */
2256 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2257 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2258 	regs[BPF_REG_FP].frameno = state->frameno;
2259 }
2260 
2261 #define BPF_MAIN_FUNC (-1)
2262 static void init_func_state(struct bpf_verifier_env *env,
2263 			    struct bpf_func_state *state,
2264 			    int callsite, int frameno, int subprogno)
2265 {
2266 	state->callsite = callsite;
2267 	state->frameno = frameno;
2268 	state->subprogno = subprogno;
2269 	state->callback_ret_range = tnum_range(0, 0);
2270 	init_reg_state(env, state);
2271 	mark_verifier_state_scratched(env);
2272 }
2273 
2274 /* Similar to push_stack(), but for async callbacks */
2275 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2276 						int insn_idx, int prev_insn_idx,
2277 						int subprog)
2278 {
2279 	struct bpf_verifier_stack_elem *elem;
2280 	struct bpf_func_state *frame;
2281 
2282 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2283 	if (!elem)
2284 		goto err;
2285 
2286 	elem->insn_idx = insn_idx;
2287 	elem->prev_insn_idx = prev_insn_idx;
2288 	elem->next = env->head;
2289 	elem->log_pos = env->log.end_pos;
2290 	env->head = elem;
2291 	env->stack_size++;
2292 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2293 		verbose(env,
2294 			"The sequence of %d jumps is too complex for async cb.\n",
2295 			env->stack_size);
2296 		goto err;
2297 	}
2298 	/* Unlike push_stack() do not copy_verifier_state().
2299 	 * The caller state doesn't matter.
2300 	 * This is async callback. It starts in a fresh stack.
2301 	 * Initialize it similar to do_check_common().
2302 	 */
2303 	elem->st.branches = 1;
2304 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2305 	if (!frame)
2306 		goto err;
2307 	init_func_state(env, frame,
2308 			BPF_MAIN_FUNC /* callsite */,
2309 			0 /* frameno within this callchain */,
2310 			subprog /* subprog number within this prog */);
2311 	elem->st.frame[0] = frame;
2312 	return &elem->st;
2313 err:
2314 	free_verifier_state(env->cur_state, true);
2315 	env->cur_state = NULL;
2316 	/* pop all elements and return */
2317 	while (!pop_stack(env, NULL, NULL, false));
2318 	return NULL;
2319 }
2320 
2321 
2322 enum reg_arg_type {
2323 	SRC_OP,		/* register is used as source operand */
2324 	DST_OP,		/* register is used as destination operand */
2325 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
2326 };
2327 
2328 static int cmp_subprogs(const void *a, const void *b)
2329 {
2330 	return ((struct bpf_subprog_info *)a)->start -
2331 	       ((struct bpf_subprog_info *)b)->start;
2332 }
2333 
2334 static int find_subprog(struct bpf_verifier_env *env, int off)
2335 {
2336 	struct bpf_subprog_info *p;
2337 
2338 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2339 		    sizeof(env->subprog_info[0]), cmp_subprogs);
2340 	if (!p)
2341 		return -ENOENT;
2342 	return p - env->subprog_info;
2343 
2344 }
2345 
2346 static int add_subprog(struct bpf_verifier_env *env, int off)
2347 {
2348 	int insn_cnt = env->prog->len;
2349 	int ret;
2350 
2351 	if (off >= insn_cnt || off < 0) {
2352 		verbose(env, "call to invalid destination\n");
2353 		return -EINVAL;
2354 	}
2355 	ret = find_subprog(env, off);
2356 	if (ret >= 0)
2357 		return ret;
2358 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2359 		verbose(env, "too many subprograms\n");
2360 		return -E2BIG;
2361 	}
2362 	/* determine subprog starts. The end is one before the next starts */
2363 	env->subprog_info[env->subprog_cnt++].start = off;
2364 	sort(env->subprog_info, env->subprog_cnt,
2365 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2366 	return env->subprog_cnt - 1;
2367 }
2368 
2369 #define MAX_KFUNC_DESCS 256
2370 #define MAX_KFUNC_BTFS	256
2371 
2372 struct bpf_kfunc_desc {
2373 	struct btf_func_model func_model;
2374 	u32 func_id;
2375 	s32 imm;
2376 	u16 offset;
2377 };
2378 
2379 struct bpf_kfunc_btf {
2380 	struct btf *btf;
2381 	struct module *module;
2382 	u16 offset;
2383 };
2384 
2385 struct bpf_kfunc_desc_tab {
2386 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2387 	u32 nr_descs;
2388 };
2389 
2390 struct bpf_kfunc_btf_tab {
2391 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2392 	u32 nr_descs;
2393 };
2394 
2395 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2396 {
2397 	const struct bpf_kfunc_desc *d0 = a;
2398 	const struct bpf_kfunc_desc *d1 = b;
2399 
2400 	/* func_id is not greater than BTF_MAX_TYPE */
2401 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2402 }
2403 
2404 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2405 {
2406 	const struct bpf_kfunc_btf *d0 = a;
2407 	const struct bpf_kfunc_btf *d1 = b;
2408 
2409 	return d0->offset - d1->offset;
2410 }
2411 
2412 static const struct bpf_kfunc_desc *
2413 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2414 {
2415 	struct bpf_kfunc_desc desc = {
2416 		.func_id = func_id,
2417 		.offset = offset,
2418 	};
2419 	struct bpf_kfunc_desc_tab *tab;
2420 
2421 	tab = prog->aux->kfunc_tab;
2422 	return bsearch(&desc, tab->descs, tab->nr_descs,
2423 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2424 }
2425 
2426 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2427 					 s16 offset)
2428 {
2429 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2430 	struct bpf_kfunc_btf_tab *tab;
2431 	struct bpf_kfunc_btf *b;
2432 	struct module *mod;
2433 	struct btf *btf;
2434 	int btf_fd;
2435 
2436 	tab = env->prog->aux->kfunc_btf_tab;
2437 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2438 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2439 	if (!b) {
2440 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2441 			verbose(env, "too many different module BTFs\n");
2442 			return ERR_PTR(-E2BIG);
2443 		}
2444 
2445 		if (bpfptr_is_null(env->fd_array)) {
2446 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2447 			return ERR_PTR(-EPROTO);
2448 		}
2449 
2450 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2451 					    offset * sizeof(btf_fd),
2452 					    sizeof(btf_fd)))
2453 			return ERR_PTR(-EFAULT);
2454 
2455 		btf = btf_get_by_fd(btf_fd);
2456 		if (IS_ERR(btf)) {
2457 			verbose(env, "invalid module BTF fd specified\n");
2458 			return btf;
2459 		}
2460 
2461 		if (!btf_is_module(btf)) {
2462 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2463 			btf_put(btf);
2464 			return ERR_PTR(-EINVAL);
2465 		}
2466 
2467 		mod = btf_try_get_module(btf);
2468 		if (!mod) {
2469 			btf_put(btf);
2470 			return ERR_PTR(-ENXIO);
2471 		}
2472 
2473 		b = &tab->descs[tab->nr_descs++];
2474 		b->btf = btf;
2475 		b->module = mod;
2476 		b->offset = offset;
2477 
2478 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2479 		     kfunc_btf_cmp_by_off, NULL);
2480 	}
2481 	return b->btf;
2482 }
2483 
2484 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2485 {
2486 	if (!tab)
2487 		return;
2488 
2489 	while (tab->nr_descs--) {
2490 		module_put(tab->descs[tab->nr_descs].module);
2491 		btf_put(tab->descs[tab->nr_descs].btf);
2492 	}
2493 	kfree(tab);
2494 }
2495 
2496 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2497 {
2498 	if (offset) {
2499 		if (offset < 0) {
2500 			/* In the future, this can be allowed to increase limit
2501 			 * of fd index into fd_array, interpreted as u16.
2502 			 */
2503 			verbose(env, "negative offset disallowed for kernel module function call\n");
2504 			return ERR_PTR(-EINVAL);
2505 		}
2506 
2507 		return __find_kfunc_desc_btf(env, offset);
2508 	}
2509 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2510 }
2511 
2512 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2513 {
2514 	const struct btf_type *func, *func_proto;
2515 	struct bpf_kfunc_btf_tab *btf_tab;
2516 	struct bpf_kfunc_desc_tab *tab;
2517 	struct bpf_prog_aux *prog_aux;
2518 	struct bpf_kfunc_desc *desc;
2519 	const char *func_name;
2520 	struct btf *desc_btf;
2521 	unsigned long call_imm;
2522 	unsigned long addr;
2523 	int err;
2524 
2525 	prog_aux = env->prog->aux;
2526 	tab = prog_aux->kfunc_tab;
2527 	btf_tab = prog_aux->kfunc_btf_tab;
2528 	if (!tab) {
2529 		if (!btf_vmlinux) {
2530 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2531 			return -ENOTSUPP;
2532 		}
2533 
2534 		if (!env->prog->jit_requested) {
2535 			verbose(env, "JIT is required for calling kernel function\n");
2536 			return -ENOTSUPP;
2537 		}
2538 
2539 		if (!bpf_jit_supports_kfunc_call()) {
2540 			verbose(env, "JIT does not support calling kernel function\n");
2541 			return -ENOTSUPP;
2542 		}
2543 
2544 		if (!env->prog->gpl_compatible) {
2545 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2546 			return -EINVAL;
2547 		}
2548 
2549 		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2550 		if (!tab)
2551 			return -ENOMEM;
2552 		prog_aux->kfunc_tab = tab;
2553 	}
2554 
2555 	/* func_id == 0 is always invalid, but instead of returning an error, be
2556 	 * conservative and wait until the code elimination pass before returning
2557 	 * error, so that invalid calls that get pruned out can be in BPF programs
2558 	 * loaded from userspace.  It is also required that offset be untouched
2559 	 * for such calls.
2560 	 */
2561 	if (!func_id && !offset)
2562 		return 0;
2563 
2564 	if (!btf_tab && offset) {
2565 		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2566 		if (!btf_tab)
2567 			return -ENOMEM;
2568 		prog_aux->kfunc_btf_tab = btf_tab;
2569 	}
2570 
2571 	desc_btf = find_kfunc_desc_btf(env, offset);
2572 	if (IS_ERR(desc_btf)) {
2573 		verbose(env, "failed to find BTF for kernel function\n");
2574 		return PTR_ERR(desc_btf);
2575 	}
2576 
2577 	if (find_kfunc_desc(env->prog, func_id, offset))
2578 		return 0;
2579 
2580 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2581 		verbose(env, "too many different kernel function calls\n");
2582 		return -E2BIG;
2583 	}
2584 
2585 	func = btf_type_by_id(desc_btf, func_id);
2586 	if (!func || !btf_type_is_func(func)) {
2587 		verbose(env, "kernel btf_id %u is not a function\n",
2588 			func_id);
2589 		return -EINVAL;
2590 	}
2591 	func_proto = btf_type_by_id(desc_btf, func->type);
2592 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2593 		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2594 			func_id);
2595 		return -EINVAL;
2596 	}
2597 
2598 	func_name = btf_name_by_offset(desc_btf, func->name_off);
2599 	addr = kallsyms_lookup_name(func_name);
2600 	if (!addr) {
2601 		verbose(env, "cannot find address for kernel function %s\n",
2602 			func_name);
2603 		return -EINVAL;
2604 	}
2605 
2606 	call_imm = BPF_CALL_IMM(addr);
2607 	/* Check whether or not the relative offset overflows desc->imm */
2608 	if ((unsigned long)(s32)call_imm != call_imm) {
2609 		verbose(env, "address of kernel function %s is out of range\n",
2610 			func_name);
2611 		return -EINVAL;
2612 	}
2613 
2614 	if (bpf_dev_bound_kfunc_id(func_id)) {
2615 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2616 		if (err)
2617 			return err;
2618 	}
2619 
2620 	desc = &tab->descs[tab->nr_descs++];
2621 	desc->func_id = func_id;
2622 	desc->imm = call_imm;
2623 	desc->offset = offset;
2624 	err = btf_distill_func_proto(&env->log, desc_btf,
2625 				     func_proto, func_name,
2626 				     &desc->func_model);
2627 	if (!err)
2628 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2629 		     kfunc_desc_cmp_by_id_off, NULL);
2630 	return err;
2631 }
2632 
2633 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2634 {
2635 	const struct bpf_kfunc_desc *d0 = a;
2636 	const struct bpf_kfunc_desc *d1 = b;
2637 
2638 	if (d0->imm > d1->imm)
2639 		return 1;
2640 	else if (d0->imm < d1->imm)
2641 		return -1;
2642 	return 0;
2643 }
2644 
2645 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2646 {
2647 	struct bpf_kfunc_desc_tab *tab;
2648 
2649 	tab = prog->aux->kfunc_tab;
2650 	if (!tab)
2651 		return;
2652 
2653 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2654 	     kfunc_desc_cmp_by_imm, NULL);
2655 }
2656 
2657 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2658 {
2659 	return !!prog->aux->kfunc_tab;
2660 }
2661 
2662 const struct btf_func_model *
2663 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2664 			 const struct bpf_insn *insn)
2665 {
2666 	const struct bpf_kfunc_desc desc = {
2667 		.imm = insn->imm,
2668 	};
2669 	const struct bpf_kfunc_desc *res;
2670 	struct bpf_kfunc_desc_tab *tab;
2671 
2672 	tab = prog->aux->kfunc_tab;
2673 	res = bsearch(&desc, tab->descs, tab->nr_descs,
2674 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2675 
2676 	return res ? &res->func_model : NULL;
2677 }
2678 
2679 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2680 {
2681 	struct bpf_subprog_info *subprog = env->subprog_info;
2682 	struct bpf_insn *insn = env->prog->insnsi;
2683 	int i, ret, insn_cnt = env->prog->len;
2684 
2685 	/* Add entry function. */
2686 	ret = add_subprog(env, 0);
2687 	if (ret)
2688 		return ret;
2689 
2690 	for (i = 0; i < insn_cnt; i++, insn++) {
2691 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2692 		    !bpf_pseudo_kfunc_call(insn))
2693 			continue;
2694 
2695 		if (!env->bpf_capable) {
2696 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2697 			return -EPERM;
2698 		}
2699 
2700 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2701 			ret = add_subprog(env, i + insn->imm + 1);
2702 		else
2703 			ret = add_kfunc_call(env, insn->imm, insn->off);
2704 
2705 		if (ret < 0)
2706 			return ret;
2707 	}
2708 
2709 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2710 	 * logic. 'subprog_cnt' should not be increased.
2711 	 */
2712 	subprog[env->subprog_cnt].start = insn_cnt;
2713 
2714 	if (env->log.level & BPF_LOG_LEVEL2)
2715 		for (i = 0; i < env->subprog_cnt; i++)
2716 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2717 
2718 	return 0;
2719 }
2720 
2721 static int check_subprogs(struct bpf_verifier_env *env)
2722 {
2723 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2724 	struct bpf_subprog_info *subprog = env->subprog_info;
2725 	struct bpf_insn *insn = env->prog->insnsi;
2726 	int insn_cnt = env->prog->len;
2727 
2728 	/* now check that all jumps are within the same subprog */
2729 	subprog_start = subprog[cur_subprog].start;
2730 	subprog_end = subprog[cur_subprog + 1].start;
2731 	for (i = 0; i < insn_cnt; i++) {
2732 		u8 code = insn[i].code;
2733 
2734 		if (code == (BPF_JMP | BPF_CALL) &&
2735 		    insn[i].src_reg == 0 &&
2736 		    insn[i].imm == BPF_FUNC_tail_call)
2737 			subprog[cur_subprog].has_tail_call = true;
2738 		if (BPF_CLASS(code) == BPF_LD &&
2739 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2740 			subprog[cur_subprog].has_ld_abs = true;
2741 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2742 			goto next;
2743 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2744 			goto next;
2745 		off = i + insn[i].off + 1;
2746 		if (off < subprog_start || off >= subprog_end) {
2747 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2748 			return -EINVAL;
2749 		}
2750 next:
2751 		if (i == subprog_end - 1) {
2752 			/* to avoid fall-through from one subprog into another
2753 			 * the last insn of the subprog should be either exit
2754 			 * or unconditional jump back
2755 			 */
2756 			if (code != (BPF_JMP | BPF_EXIT) &&
2757 			    code != (BPF_JMP | BPF_JA)) {
2758 				verbose(env, "last insn is not an exit or jmp\n");
2759 				return -EINVAL;
2760 			}
2761 			subprog_start = subprog_end;
2762 			cur_subprog++;
2763 			if (cur_subprog < env->subprog_cnt)
2764 				subprog_end = subprog[cur_subprog + 1].start;
2765 		}
2766 	}
2767 	return 0;
2768 }
2769 
2770 /* Parentage chain of this register (or stack slot) should take care of all
2771  * issues like callee-saved registers, stack slot allocation time, etc.
2772  */
2773 static int mark_reg_read(struct bpf_verifier_env *env,
2774 			 const struct bpf_reg_state *state,
2775 			 struct bpf_reg_state *parent, u8 flag)
2776 {
2777 	bool writes = parent == state->parent; /* Observe write marks */
2778 	int cnt = 0;
2779 
2780 	while (parent) {
2781 		/* if read wasn't screened by an earlier write ... */
2782 		if (writes && state->live & REG_LIVE_WRITTEN)
2783 			break;
2784 		if (parent->live & REG_LIVE_DONE) {
2785 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2786 				reg_type_str(env, parent->type),
2787 				parent->var_off.value, parent->off);
2788 			return -EFAULT;
2789 		}
2790 		/* The first condition is more likely to be true than the
2791 		 * second, checked it first.
2792 		 */
2793 		if ((parent->live & REG_LIVE_READ) == flag ||
2794 		    parent->live & REG_LIVE_READ64)
2795 			/* The parentage chain never changes and
2796 			 * this parent was already marked as LIVE_READ.
2797 			 * There is no need to keep walking the chain again and
2798 			 * keep re-marking all parents as LIVE_READ.
2799 			 * This case happens when the same register is read
2800 			 * multiple times without writes into it in-between.
2801 			 * Also, if parent has the stronger REG_LIVE_READ64 set,
2802 			 * then no need to set the weak REG_LIVE_READ32.
2803 			 */
2804 			break;
2805 		/* ... then we depend on parent's value */
2806 		parent->live |= flag;
2807 		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2808 		if (flag == REG_LIVE_READ64)
2809 			parent->live &= ~REG_LIVE_READ32;
2810 		state = parent;
2811 		parent = state->parent;
2812 		writes = true;
2813 		cnt++;
2814 	}
2815 
2816 	if (env->longest_mark_read_walk < cnt)
2817 		env->longest_mark_read_walk = cnt;
2818 	return 0;
2819 }
2820 
2821 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2822 {
2823 	struct bpf_func_state *state = func(env, reg);
2824 	int spi, ret;
2825 
2826 	/* For CONST_PTR_TO_DYNPTR, it must have already been done by
2827 	 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
2828 	 * check_kfunc_call.
2829 	 */
2830 	if (reg->type == CONST_PTR_TO_DYNPTR)
2831 		return 0;
2832 	spi = dynptr_get_spi(env, reg);
2833 	if (spi < 0)
2834 		return spi;
2835 	/* Caller ensures dynptr is valid and initialized, which means spi is in
2836 	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
2837 	 * read.
2838 	 */
2839 	ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
2840 			    state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
2841 	if (ret)
2842 		return ret;
2843 	return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
2844 			     state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
2845 }
2846 
2847 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
2848 			  int spi, int nr_slots)
2849 {
2850 	struct bpf_func_state *state = func(env, reg);
2851 	int err, i;
2852 
2853 	for (i = 0; i < nr_slots; i++) {
2854 		struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
2855 
2856 		err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
2857 		if (err)
2858 			return err;
2859 
2860 		mark_stack_slot_scratched(env, spi - i);
2861 	}
2862 
2863 	return 0;
2864 }
2865 
2866 /* This function is supposed to be used by the following 32-bit optimization
2867  * code only. It returns TRUE if the source or destination register operates
2868  * on 64-bit, otherwise return FALSE.
2869  */
2870 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2871 		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2872 {
2873 	u8 code, class, op;
2874 
2875 	code = insn->code;
2876 	class = BPF_CLASS(code);
2877 	op = BPF_OP(code);
2878 	if (class == BPF_JMP) {
2879 		/* BPF_EXIT for "main" will reach here. Return TRUE
2880 		 * conservatively.
2881 		 */
2882 		if (op == BPF_EXIT)
2883 			return true;
2884 		if (op == BPF_CALL) {
2885 			/* BPF to BPF call will reach here because of marking
2886 			 * caller saved clobber with DST_OP_NO_MARK for which we
2887 			 * don't care the register def because they are anyway
2888 			 * marked as NOT_INIT already.
2889 			 */
2890 			if (insn->src_reg == BPF_PSEUDO_CALL)
2891 				return false;
2892 			/* Helper call will reach here because of arg type
2893 			 * check, conservatively return TRUE.
2894 			 */
2895 			if (t == SRC_OP)
2896 				return true;
2897 
2898 			return false;
2899 		}
2900 	}
2901 
2902 	if (class == BPF_ALU64 || class == BPF_JMP ||
2903 	    /* BPF_END always use BPF_ALU class. */
2904 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2905 		return true;
2906 
2907 	if (class == BPF_ALU || class == BPF_JMP32)
2908 		return false;
2909 
2910 	if (class == BPF_LDX) {
2911 		if (t != SRC_OP)
2912 			return BPF_SIZE(code) == BPF_DW;
2913 		/* LDX source must be ptr. */
2914 		return true;
2915 	}
2916 
2917 	if (class == BPF_STX) {
2918 		/* BPF_STX (including atomic variants) has multiple source
2919 		 * operands, one of which is a ptr. Check whether the caller is
2920 		 * asking about it.
2921 		 */
2922 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
2923 			return true;
2924 		return BPF_SIZE(code) == BPF_DW;
2925 	}
2926 
2927 	if (class == BPF_LD) {
2928 		u8 mode = BPF_MODE(code);
2929 
2930 		/* LD_IMM64 */
2931 		if (mode == BPF_IMM)
2932 			return true;
2933 
2934 		/* Both LD_IND and LD_ABS return 32-bit data. */
2935 		if (t != SRC_OP)
2936 			return  false;
2937 
2938 		/* Implicit ctx ptr. */
2939 		if (regno == BPF_REG_6)
2940 			return true;
2941 
2942 		/* Explicit source could be any width. */
2943 		return true;
2944 	}
2945 
2946 	if (class == BPF_ST)
2947 		/* The only source register for BPF_ST is a ptr. */
2948 		return true;
2949 
2950 	/* Conservatively return true at default. */
2951 	return true;
2952 }
2953 
2954 /* Return the regno defined by the insn, or -1. */
2955 static int insn_def_regno(const struct bpf_insn *insn)
2956 {
2957 	switch (BPF_CLASS(insn->code)) {
2958 	case BPF_JMP:
2959 	case BPF_JMP32:
2960 	case BPF_ST:
2961 		return -1;
2962 	case BPF_STX:
2963 		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2964 		    (insn->imm & BPF_FETCH)) {
2965 			if (insn->imm == BPF_CMPXCHG)
2966 				return BPF_REG_0;
2967 			else
2968 				return insn->src_reg;
2969 		} else {
2970 			return -1;
2971 		}
2972 	default:
2973 		return insn->dst_reg;
2974 	}
2975 }
2976 
2977 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2978 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2979 {
2980 	int dst_reg = insn_def_regno(insn);
2981 
2982 	if (dst_reg == -1)
2983 		return false;
2984 
2985 	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2986 }
2987 
2988 static void mark_insn_zext(struct bpf_verifier_env *env,
2989 			   struct bpf_reg_state *reg)
2990 {
2991 	s32 def_idx = reg->subreg_def;
2992 
2993 	if (def_idx == DEF_NOT_SUBREG)
2994 		return;
2995 
2996 	env->insn_aux_data[def_idx - 1].zext_dst = true;
2997 	/* The dst will be zero extended, so won't be sub-register anymore. */
2998 	reg->subreg_def = DEF_NOT_SUBREG;
2999 }
3000 
3001 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3002 			 enum reg_arg_type t)
3003 {
3004 	struct bpf_verifier_state *vstate = env->cur_state;
3005 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3006 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3007 	struct bpf_reg_state *reg, *regs = state->regs;
3008 	bool rw64;
3009 
3010 	if (regno >= MAX_BPF_REG) {
3011 		verbose(env, "R%d is invalid\n", regno);
3012 		return -EINVAL;
3013 	}
3014 
3015 	mark_reg_scratched(env, regno);
3016 
3017 	reg = &regs[regno];
3018 	rw64 = is_reg64(env, insn, regno, reg, t);
3019 	if (t == SRC_OP) {
3020 		/* check whether register used as source operand can be read */
3021 		if (reg->type == NOT_INIT) {
3022 			verbose(env, "R%d !read_ok\n", regno);
3023 			return -EACCES;
3024 		}
3025 		/* We don't need to worry about FP liveness because it's read-only */
3026 		if (regno == BPF_REG_FP)
3027 			return 0;
3028 
3029 		if (rw64)
3030 			mark_insn_zext(env, reg);
3031 
3032 		return mark_reg_read(env, reg, reg->parent,
3033 				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3034 	} else {
3035 		/* check whether register used as dest operand can be written to */
3036 		if (regno == BPF_REG_FP) {
3037 			verbose(env, "frame pointer is read only\n");
3038 			return -EACCES;
3039 		}
3040 		reg->live |= REG_LIVE_WRITTEN;
3041 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3042 		if (t == DST_OP)
3043 			mark_reg_unknown(env, regs, regno);
3044 	}
3045 	return 0;
3046 }
3047 
3048 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3049 {
3050 	env->insn_aux_data[idx].jmp_point = true;
3051 }
3052 
3053 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3054 {
3055 	return env->insn_aux_data[insn_idx].jmp_point;
3056 }
3057 
3058 /* for any branch, call, exit record the history of jmps in the given state */
3059 static int push_jmp_history(struct bpf_verifier_env *env,
3060 			    struct bpf_verifier_state *cur)
3061 {
3062 	u32 cnt = cur->jmp_history_cnt;
3063 	struct bpf_idx_pair *p;
3064 	size_t alloc_size;
3065 
3066 	if (!is_jmp_point(env, env->insn_idx))
3067 		return 0;
3068 
3069 	cnt++;
3070 	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
3071 	p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
3072 	if (!p)
3073 		return -ENOMEM;
3074 	p[cnt - 1].idx = env->insn_idx;
3075 	p[cnt - 1].prev_idx = env->prev_insn_idx;
3076 	cur->jmp_history = p;
3077 	cur->jmp_history_cnt = cnt;
3078 	return 0;
3079 }
3080 
3081 /* Backtrack one insn at a time. If idx is not at the top of recorded
3082  * history then previous instruction came from straight line execution.
3083  */
3084 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
3085 			     u32 *history)
3086 {
3087 	u32 cnt = *history;
3088 
3089 	if (cnt && st->jmp_history[cnt - 1].idx == i) {
3090 		i = st->jmp_history[cnt - 1].prev_idx;
3091 		(*history)--;
3092 	} else {
3093 		i--;
3094 	}
3095 	return i;
3096 }
3097 
3098 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3099 {
3100 	const struct btf_type *func;
3101 	struct btf *desc_btf;
3102 
3103 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3104 		return NULL;
3105 
3106 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3107 	if (IS_ERR(desc_btf))
3108 		return "<error>";
3109 
3110 	func = btf_type_by_id(desc_btf, insn->imm);
3111 	return btf_name_by_offset(desc_btf, func->name_off);
3112 }
3113 
3114 /* For given verifier state backtrack_insn() is called from the last insn to
3115  * the first insn. Its purpose is to compute a bitmask of registers and
3116  * stack slots that needs precision in the parent verifier state.
3117  */
3118 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
3119 			  u32 *reg_mask, u64 *stack_mask)
3120 {
3121 	const struct bpf_insn_cbs cbs = {
3122 		.cb_call	= disasm_kfunc_name,
3123 		.cb_print	= verbose,
3124 		.private_data	= env,
3125 	};
3126 	struct bpf_insn *insn = env->prog->insnsi + idx;
3127 	u8 class = BPF_CLASS(insn->code);
3128 	u8 opcode = BPF_OP(insn->code);
3129 	u8 mode = BPF_MODE(insn->code);
3130 	u32 dreg = 1u << insn->dst_reg;
3131 	u32 sreg = 1u << insn->src_reg;
3132 	u32 spi;
3133 
3134 	if (insn->code == 0)
3135 		return 0;
3136 	if (env->log.level & BPF_LOG_LEVEL2) {
3137 		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
3138 		verbose(env, "%d: ", idx);
3139 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3140 	}
3141 
3142 	if (class == BPF_ALU || class == BPF_ALU64) {
3143 		if (!(*reg_mask & dreg))
3144 			return 0;
3145 		if (opcode == BPF_MOV) {
3146 			if (BPF_SRC(insn->code) == BPF_X) {
3147 				/* dreg = sreg
3148 				 * dreg needs precision after this insn
3149 				 * sreg needs precision before this insn
3150 				 */
3151 				*reg_mask &= ~dreg;
3152 				*reg_mask |= sreg;
3153 			} else {
3154 				/* dreg = K
3155 				 * dreg needs precision after this insn.
3156 				 * Corresponding register is already marked
3157 				 * as precise=true in this verifier state.
3158 				 * No further markings in parent are necessary
3159 				 */
3160 				*reg_mask &= ~dreg;
3161 			}
3162 		} else {
3163 			if (BPF_SRC(insn->code) == BPF_X) {
3164 				/* dreg += sreg
3165 				 * both dreg and sreg need precision
3166 				 * before this insn
3167 				 */
3168 				*reg_mask |= sreg;
3169 			} /* else dreg += K
3170 			   * dreg still needs precision before this insn
3171 			   */
3172 		}
3173 	} else if (class == BPF_LDX) {
3174 		if (!(*reg_mask & dreg))
3175 			return 0;
3176 		*reg_mask &= ~dreg;
3177 
3178 		/* scalars can only be spilled into stack w/o losing precision.
3179 		 * Load from any other memory can be zero extended.
3180 		 * The desire to keep that precision is already indicated
3181 		 * by 'precise' mark in corresponding register of this state.
3182 		 * No further tracking necessary.
3183 		 */
3184 		if (insn->src_reg != BPF_REG_FP)
3185 			return 0;
3186 
3187 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
3188 		 * that [fp - off] slot contains scalar that needs to be
3189 		 * tracked with precision
3190 		 */
3191 		spi = (-insn->off - 1) / BPF_REG_SIZE;
3192 		if (spi >= 64) {
3193 			verbose(env, "BUG spi %d\n", spi);
3194 			WARN_ONCE(1, "verifier backtracking bug");
3195 			return -EFAULT;
3196 		}
3197 		*stack_mask |= 1ull << spi;
3198 	} else if (class == BPF_STX || class == BPF_ST) {
3199 		if (*reg_mask & dreg)
3200 			/* stx & st shouldn't be using _scalar_ dst_reg
3201 			 * to access memory. It means backtracking
3202 			 * encountered a case of pointer subtraction.
3203 			 */
3204 			return -ENOTSUPP;
3205 		/* scalars can only be spilled into stack */
3206 		if (insn->dst_reg != BPF_REG_FP)
3207 			return 0;
3208 		spi = (-insn->off - 1) / BPF_REG_SIZE;
3209 		if (spi >= 64) {
3210 			verbose(env, "BUG spi %d\n", spi);
3211 			WARN_ONCE(1, "verifier backtracking bug");
3212 			return -EFAULT;
3213 		}
3214 		if (!(*stack_mask & (1ull << spi)))
3215 			return 0;
3216 		*stack_mask &= ~(1ull << spi);
3217 		if (class == BPF_STX)
3218 			*reg_mask |= sreg;
3219 	} else if (class == BPF_JMP || class == BPF_JMP32) {
3220 		if (opcode == BPF_CALL) {
3221 			if (insn->src_reg == BPF_PSEUDO_CALL)
3222 				return -ENOTSUPP;
3223 			/* BPF helpers that invoke callback subprogs are
3224 			 * equivalent to BPF_PSEUDO_CALL above
3225 			 */
3226 			if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
3227 				return -ENOTSUPP;
3228 			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
3229 			 * catch this error later. Make backtracking conservative
3230 			 * with ENOTSUPP.
3231 			 */
3232 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3233 				return -ENOTSUPP;
3234 			/* regular helper call sets R0 */
3235 			*reg_mask &= ~1;
3236 			if (*reg_mask & 0x3f) {
3237 				/* if backtracing was looking for registers R1-R5
3238 				 * they should have been found already.
3239 				 */
3240 				verbose(env, "BUG regs %x\n", *reg_mask);
3241 				WARN_ONCE(1, "verifier backtracking bug");
3242 				return -EFAULT;
3243 			}
3244 		} else if (opcode == BPF_EXIT) {
3245 			return -ENOTSUPP;
3246 		}
3247 	} else if (class == BPF_LD) {
3248 		if (!(*reg_mask & dreg))
3249 			return 0;
3250 		*reg_mask &= ~dreg;
3251 		/* It's ld_imm64 or ld_abs or ld_ind.
3252 		 * For ld_imm64 no further tracking of precision
3253 		 * into parent is necessary
3254 		 */
3255 		if (mode == BPF_IND || mode == BPF_ABS)
3256 			/* to be analyzed */
3257 			return -ENOTSUPP;
3258 	}
3259 	return 0;
3260 }
3261 
3262 /* the scalar precision tracking algorithm:
3263  * . at the start all registers have precise=false.
3264  * . scalar ranges are tracked as normal through alu and jmp insns.
3265  * . once precise value of the scalar register is used in:
3266  *   .  ptr + scalar alu
3267  *   . if (scalar cond K|scalar)
3268  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
3269  *   backtrack through the verifier states and mark all registers and
3270  *   stack slots with spilled constants that these scalar regisers
3271  *   should be precise.
3272  * . during state pruning two registers (or spilled stack slots)
3273  *   are equivalent if both are not precise.
3274  *
3275  * Note the verifier cannot simply walk register parentage chain,
3276  * since many different registers and stack slots could have been
3277  * used to compute single precise scalar.
3278  *
3279  * The approach of starting with precise=true for all registers and then
3280  * backtrack to mark a register as not precise when the verifier detects
3281  * that program doesn't care about specific value (e.g., when helper
3282  * takes register as ARG_ANYTHING parameter) is not safe.
3283  *
3284  * It's ok to walk single parentage chain of the verifier states.
3285  * It's possible that this backtracking will go all the way till 1st insn.
3286  * All other branches will be explored for needing precision later.
3287  *
3288  * The backtracking needs to deal with cases like:
3289  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3290  * r9 -= r8
3291  * r5 = r9
3292  * if r5 > 0x79f goto pc+7
3293  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3294  * r5 += 1
3295  * ...
3296  * call bpf_perf_event_output#25
3297  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3298  *
3299  * and this case:
3300  * r6 = 1
3301  * call foo // uses callee's r6 inside to compute r0
3302  * r0 += r6
3303  * if r0 == 0 goto
3304  *
3305  * to track above reg_mask/stack_mask needs to be independent for each frame.
3306  *
3307  * Also if parent's curframe > frame where backtracking started,
3308  * the verifier need to mark registers in both frames, otherwise callees
3309  * may incorrectly prune callers. This is similar to
3310  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3311  *
3312  * For now backtracking falls back into conservative marking.
3313  */
3314 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3315 				     struct bpf_verifier_state *st)
3316 {
3317 	struct bpf_func_state *func;
3318 	struct bpf_reg_state *reg;
3319 	int i, j;
3320 
3321 	/* big hammer: mark all scalars precise in this path.
3322 	 * pop_stack may still get !precise scalars.
3323 	 * We also skip current state and go straight to first parent state,
3324 	 * because precision markings in current non-checkpointed state are
3325 	 * not needed. See why in the comment in __mark_chain_precision below.
3326 	 */
3327 	for (st = st->parent; st; st = st->parent) {
3328 		for (i = 0; i <= st->curframe; i++) {
3329 			func = st->frame[i];
3330 			for (j = 0; j < BPF_REG_FP; j++) {
3331 				reg = &func->regs[j];
3332 				if (reg->type != SCALAR_VALUE)
3333 					continue;
3334 				reg->precise = true;
3335 			}
3336 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3337 				if (!is_spilled_reg(&func->stack[j]))
3338 					continue;
3339 				reg = &func->stack[j].spilled_ptr;
3340 				if (reg->type != SCALAR_VALUE)
3341 					continue;
3342 				reg->precise = true;
3343 			}
3344 		}
3345 	}
3346 }
3347 
3348 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3349 {
3350 	struct bpf_func_state *func;
3351 	struct bpf_reg_state *reg;
3352 	int i, j;
3353 
3354 	for (i = 0; i <= st->curframe; i++) {
3355 		func = st->frame[i];
3356 		for (j = 0; j < BPF_REG_FP; j++) {
3357 			reg = &func->regs[j];
3358 			if (reg->type != SCALAR_VALUE)
3359 				continue;
3360 			reg->precise = false;
3361 		}
3362 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3363 			if (!is_spilled_reg(&func->stack[j]))
3364 				continue;
3365 			reg = &func->stack[j].spilled_ptr;
3366 			if (reg->type != SCALAR_VALUE)
3367 				continue;
3368 			reg->precise = false;
3369 		}
3370 	}
3371 }
3372 
3373 /*
3374  * __mark_chain_precision() backtracks BPF program instruction sequence and
3375  * chain of verifier states making sure that register *regno* (if regno >= 0)
3376  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
3377  * SCALARS, as well as any other registers and slots that contribute to
3378  * a tracked state of given registers/stack slots, depending on specific BPF
3379  * assembly instructions (see backtrack_insns() for exact instruction handling
3380  * logic). This backtracking relies on recorded jmp_history and is able to
3381  * traverse entire chain of parent states. This process ends only when all the
3382  * necessary registers/slots and their transitive dependencies are marked as
3383  * precise.
3384  *
3385  * One important and subtle aspect is that precise marks *do not matter* in
3386  * the currently verified state (current state). It is important to understand
3387  * why this is the case.
3388  *
3389  * First, note that current state is the state that is not yet "checkpointed",
3390  * i.e., it is not yet put into env->explored_states, and it has no children
3391  * states as well. It's ephemeral, and can end up either a) being discarded if
3392  * compatible explored state is found at some point or BPF_EXIT instruction is
3393  * reached or b) checkpointed and put into env->explored_states, branching out
3394  * into one or more children states.
3395  *
3396  * In the former case, precise markings in current state are completely
3397  * ignored by state comparison code (see regsafe() for details). Only
3398  * checkpointed ("old") state precise markings are important, and if old
3399  * state's register/slot is precise, regsafe() assumes current state's
3400  * register/slot as precise and checks value ranges exactly and precisely. If
3401  * states turn out to be compatible, current state's necessary precise
3402  * markings and any required parent states' precise markings are enforced
3403  * after the fact with propagate_precision() logic, after the fact. But it's
3404  * important to realize that in this case, even after marking current state
3405  * registers/slots as precise, we immediately discard current state. So what
3406  * actually matters is any of the precise markings propagated into current
3407  * state's parent states, which are always checkpointed (due to b) case above).
3408  * As such, for scenario a) it doesn't matter if current state has precise
3409  * markings set or not.
3410  *
3411  * Now, for the scenario b), checkpointing and forking into child(ren)
3412  * state(s). Note that before current state gets to checkpointing step, any
3413  * processed instruction always assumes precise SCALAR register/slot
3414  * knowledge: if precise value or range is useful to prune jump branch, BPF
3415  * verifier takes this opportunity enthusiastically. Similarly, when
3416  * register's value is used to calculate offset or memory address, exact
3417  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
3418  * what we mentioned above about state comparison ignoring precise markings
3419  * during state comparison, BPF verifier ignores and also assumes precise
3420  * markings *at will* during instruction verification process. But as verifier
3421  * assumes precision, it also propagates any precision dependencies across
3422  * parent states, which are not yet finalized, so can be further restricted
3423  * based on new knowledge gained from restrictions enforced by their children
3424  * states. This is so that once those parent states are finalized, i.e., when
3425  * they have no more active children state, state comparison logic in
3426  * is_state_visited() would enforce strict and precise SCALAR ranges, if
3427  * required for correctness.
3428  *
3429  * To build a bit more intuition, note also that once a state is checkpointed,
3430  * the path we took to get to that state is not important. This is crucial
3431  * property for state pruning. When state is checkpointed and finalized at
3432  * some instruction index, it can be correctly and safely used to "short
3433  * circuit" any *compatible* state that reaches exactly the same instruction
3434  * index. I.e., if we jumped to that instruction from a completely different
3435  * code path than original finalized state was derived from, it doesn't
3436  * matter, current state can be discarded because from that instruction
3437  * forward having a compatible state will ensure we will safely reach the
3438  * exit. States describe preconditions for further exploration, but completely
3439  * forget the history of how we got here.
3440  *
3441  * This also means that even if we needed precise SCALAR range to get to
3442  * finalized state, but from that point forward *that same* SCALAR register is
3443  * never used in a precise context (i.e., it's precise value is not needed for
3444  * correctness), it's correct and safe to mark such register as "imprecise"
3445  * (i.e., precise marking set to false). This is what we rely on when we do
3446  * not set precise marking in current state. If no child state requires
3447  * precision for any given SCALAR register, it's safe to dictate that it can
3448  * be imprecise. If any child state does require this register to be precise,
3449  * we'll mark it precise later retroactively during precise markings
3450  * propagation from child state to parent states.
3451  *
3452  * Skipping precise marking setting in current state is a mild version of
3453  * relying on the above observation. But we can utilize this property even
3454  * more aggressively by proactively forgetting any precise marking in the
3455  * current state (which we inherited from the parent state), right before we
3456  * checkpoint it and branch off into new child state. This is done by
3457  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
3458  * finalized states which help in short circuiting more future states.
3459  */
3460 static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
3461 				  int spi)
3462 {
3463 	struct bpf_verifier_state *st = env->cur_state;
3464 	int first_idx = st->first_insn_idx;
3465 	int last_idx = env->insn_idx;
3466 	struct bpf_func_state *func;
3467 	struct bpf_reg_state *reg;
3468 	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
3469 	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
3470 	bool skip_first = true;
3471 	bool new_marks = false;
3472 	int i, err;
3473 
3474 	if (!env->bpf_capable)
3475 		return 0;
3476 
3477 	/* Do sanity checks against current state of register and/or stack
3478 	 * slot, but don't set precise flag in current state, as precision
3479 	 * tracking in the current state is unnecessary.
3480 	 */
3481 	func = st->frame[frame];
3482 	if (regno >= 0) {
3483 		reg = &func->regs[regno];
3484 		if (reg->type != SCALAR_VALUE) {
3485 			WARN_ONCE(1, "backtracing misuse");
3486 			return -EFAULT;
3487 		}
3488 		new_marks = true;
3489 	}
3490 
3491 	while (spi >= 0) {
3492 		if (!is_spilled_reg(&func->stack[spi])) {
3493 			stack_mask = 0;
3494 			break;
3495 		}
3496 		reg = &func->stack[spi].spilled_ptr;
3497 		if (reg->type != SCALAR_VALUE) {
3498 			stack_mask = 0;
3499 			break;
3500 		}
3501 		new_marks = true;
3502 		break;
3503 	}
3504 
3505 	if (!new_marks)
3506 		return 0;
3507 	if (!reg_mask && !stack_mask)
3508 		return 0;
3509 
3510 	for (;;) {
3511 		DECLARE_BITMAP(mask, 64);
3512 		u32 history = st->jmp_history_cnt;
3513 
3514 		if (env->log.level & BPF_LOG_LEVEL2)
3515 			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
3516 
3517 		if (last_idx < 0) {
3518 			/* we are at the entry into subprog, which
3519 			 * is expected for global funcs, but only if
3520 			 * requested precise registers are R1-R5
3521 			 * (which are global func's input arguments)
3522 			 */
3523 			if (st->curframe == 0 &&
3524 			    st->frame[0]->subprogno > 0 &&
3525 			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
3526 			    stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
3527 				bitmap_from_u64(mask, reg_mask);
3528 				for_each_set_bit(i, mask, 32) {
3529 					reg = &st->frame[0]->regs[i];
3530 					if (reg->type != SCALAR_VALUE) {
3531 						reg_mask &= ~(1u << i);
3532 						continue;
3533 					}
3534 					reg->precise = true;
3535 				}
3536 				return 0;
3537 			}
3538 
3539 			verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
3540 				st->frame[0]->subprogno, reg_mask, stack_mask);
3541 			WARN_ONCE(1, "verifier backtracking bug");
3542 			return -EFAULT;
3543 		}
3544 
3545 		for (i = last_idx;;) {
3546 			if (skip_first) {
3547 				err = 0;
3548 				skip_first = false;
3549 			} else {
3550 				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
3551 			}
3552 			if (err == -ENOTSUPP) {
3553 				mark_all_scalars_precise(env, st);
3554 				return 0;
3555 			} else if (err) {
3556 				return err;
3557 			}
3558 			if (!reg_mask && !stack_mask)
3559 				/* Found assignment(s) into tracked register in this state.
3560 				 * Since this state is already marked, just return.
3561 				 * Nothing to be tracked further in the parent state.
3562 				 */
3563 				return 0;
3564 			if (i == first_idx)
3565 				break;
3566 			i = get_prev_insn_idx(st, i, &history);
3567 			if (i >= env->prog->len) {
3568 				/* This can happen if backtracking reached insn 0
3569 				 * and there are still reg_mask or stack_mask
3570 				 * to backtrack.
3571 				 * It means the backtracking missed the spot where
3572 				 * particular register was initialized with a constant.
3573 				 */
3574 				verbose(env, "BUG backtracking idx %d\n", i);
3575 				WARN_ONCE(1, "verifier backtracking bug");
3576 				return -EFAULT;
3577 			}
3578 		}
3579 		st = st->parent;
3580 		if (!st)
3581 			break;
3582 
3583 		new_marks = false;
3584 		func = st->frame[frame];
3585 		bitmap_from_u64(mask, reg_mask);
3586 		for_each_set_bit(i, mask, 32) {
3587 			reg = &func->regs[i];
3588 			if (reg->type != SCALAR_VALUE) {
3589 				reg_mask &= ~(1u << i);
3590 				continue;
3591 			}
3592 			if (!reg->precise)
3593 				new_marks = true;
3594 			reg->precise = true;
3595 		}
3596 
3597 		bitmap_from_u64(mask, stack_mask);
3598 		for_each_set_bit(i, mask, 64) {
3599 			if (i >= func->allocated_stack / BPF_REG_SIZE) {
3600 				/* the sequence of instructions:
3601 				 * 2: (bf) r3 = r10
3602 				 * 3: (7b) *(u64 *)(r3 -8) = r0
3603 				 * 4: (79) r4 = *(u64 *)(r10 -8)
3604 				 * doesn't contain jmps. It's backtracked
3605 				 * as a single block.
3606 				 * During backtracking insn 3 is not recognized as
3607 				 * stack access, so at the end of backtracking
3608 				 * stack slot fp-8 is still marked in stack_mask.
3609 				 * However the parent state may not have accessed
3610 				 * fp-8 and it's "unallocated" stack space.
3611 				 * In such case fallback to conservative.
3612 				 */
3613 				mark_all_scalars_precise(env, st);
3614 				return 0;
3615 			}
3616 
3617 			if (!is_spilled_reg(&func->stack[i])) {
3618 				stack_mask &= ~(1ull << i);
3619 				continue;
3620 			}
3621 			reg = &func->stack[i].spilled_ptr;
3622 			if (reg->type != SCALAR_VALUE) {
3623 				stack_mask &= ~(1ull << i);
3624 				continue;
3625 			}
3626 			if (!reg->precise)
3627 				new_marks = true;
3628 			reg->precise = true;
3629 		}
3630 		if (env->log.level & BPF_LOG_LEVEL2) {
3631 			verbose(env, "parent %s regs=%x stack=%llx marks:",
3632 				new_marks ? "didn't have" : "already had",
3633 				reg_mask, stack_mask);
3634 			print_verifier_state(env, func, true);
3635 		}
3636 
3637 		if (!reg_mask && !stack_mask)
3638 			break;
3639 		if (!new_marks)
3640 			break;
3641 
3642 		last_idx = st->last_insn_idx;
3643 		first_idx = st->first_insn_idx;
3644 	}
3645 	return 0;
3646 }
3647 
3648 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3649 {
3650 	return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
3651 }
3652 
3653 static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
3654 {
3655 	return __mark_chain_precision(env, frame, regno, -1);
3656 }
3657 
3658 static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
3659 {
3660 	return __mark_chain_precision(env, frame, -1, spi);
3661 }
3662 
3663 static bool is_spillable_regtype(enum bpf_reg_type type)
3664 {
3665 	switch (base_type(type)) {
3666 	case PTR_TO_MAP_VALUE:
3667 	case PTR_TO_STACK:
3668 	case PTR_TO_CTX:
3669 	case PTR_TO_PACKET:
3670 	case PTR_TO_PACKET_META:
3671 	case PTR_TO_PACKET_END:
3672 	case PTR_TO_FLOW_KEYS:
3673 	case CONST_PTR_TO_MAP:
3674 	case PTR_TO_SOCKET:
3675 	case PTR_TO_SOCK_COMMON:
3676 	case PTR_TO_TCP_SOCK:
3677 	case PTR_TO_XDP_SOCK:
3678 	case PTR_TO_BTF_ID:
3679 	case PTR_TO_BUF:
3680 	case PTR_TO_MEM:
3681 	case PTR_TO_FUNC:
3682 	case PTR_TO_MAP_KEY:
3683 		return true;
3684 	default:
3685 		return false;
3686 	}
3687 }
3688 
3689 /* Does this register contain a constant zero? */
3690 static bool register_is_null(struct bpf_reg_state *reg)
3691 {
3692 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
3693 }
3694 
3695 static bool register_is_const(struct bpf_reg_state *reg)
3696 {
3697 	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
3698 }
3699 
3700 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
3701 {
3702 	return tnum_is_unknown(reg->var_off) &&
3703 	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
3704 	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
3705 	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
3706 	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
3707 }
3708 
3709 static bool register_is_bounded(struct bpf_reg_state *reg)
3710 {
3711 	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
3712 }
3713 
3714 static bool __is_pointer_value(bool allow_ptr_leaks,
3715 			       const struct bpf_reg_state *reg)
3716 {
3717 	if (allow_ptr_leaks)
3718 		return false;
3719 
3720 	return reg->type != SCALAR_VALUE;
3721 }
3722 
3723 /* Copy src state preserving dst->parent and dst->live fields */
3724 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
3725 {
3726 	struct bpf_reg_state *parent = dst->parent;
3727 	enum bpf_reg_liveness live = dst->live;
3728 
3729 	*dst = *src;
3730 	dst->parent = parent;
3731 	dst->live = live;
3732 }
3733 
3734 static void save_register_state(struct bpf_func_state *state,
3735 				int spi, struct bpf_reg_state *reg,
3736 				int size)
3737 {
3738 	int i;
3739 
3740 	copy_register_state(&state->stack[spi].spilled_ptr, reg);
3741 	if (size == BPF_REG_SIZE)
3742 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3743 
3744 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3745 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3746 
3747 	/* size < 8 bytes spill */
3748 	for (; i; i--)
3749 		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
3750 }
3751 
3752 static bool is_bpf_st_mem(struct bpf_insn *insn)
3753 {
3754 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3755 }
3756 
3757 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3758  * stack boundary and alignment are checked in check_mem_access()
3759  */
3760 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3761 				       /* stack frame we're writing to */
3762 				       struct bpf_func_state *state,
3763 				       int off, int size, int value_regno,
3764 				       int insn_idx)
3765 {
3766 	struct bpf_func_state *cur; /* state of the current function */
3767 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3768 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3769 	struct bpf_reg_state *reg = NULL;
3770 	u32 dst_reg = insn->dst_reg;
3771 
3772 	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
3773 	if (err)
3774 		return err;
3775 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3776 	 * so it's aligned access and [off, off + size) are within stack limits
3777 	 */
3778 	if (!env->allow_ptr_leaks &&
3779 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
3780 	    size != BPF_REG_SIZE) {
3781 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3782 		return -EACCES;
3783 	}
3784 
3785 	cur = env->cur_state->frame[env->cur_state->curframe];
3786 	if (value_regno >= 0)
3787 		reg = &cur->regs[value_regno];
3788 	if (!env->bypass_spec_v4) {
3789 		bool sanitize = reg && is_spillable_regtype(reg->type);
3790 
3791 		for (i = 0; i < size; i++) {
3792 			u8 type = state->stack[spi].slot_type[i];
3793 
3794 			if (type != STACK_MISC && type != STACK_ZERO) {
3795 				sanitize = true;
3796 				break;
3797 			}
3798 		}
3799 
3800 		if (sanitize)
3801 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3802 	}
3803 
3804 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3805 	if (err)
3806 		return err;
3807 
3808 	mark_stack_slot_scratched(env, spi);
3809 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
3810 	    !register_is_null(reg) && env->bpf_capable) {
3811 		if (dst_reg != BPF_REG_FP) {
3812 			/* The backtracking logic can only recognize explicit
3813 			 * stack slot address like [fp - 8]. Other spill of
3814 			 * scalar via different register has to be conservative.
3815 			 * Backtrack from here and mark all registers as precise
3816 			 * that contributed into 'reg' being a constant.
3817 			 */
3818 			err = mark_chain_precision(env, value_regno);
3819 			if (err)
3820 				return err;
3821 		}
3822 		save_register_state(state, spi, reg, size);
3823 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3824 		   insn->imm != 0 && env->bpf_capable) {
3825 		struct bpf_reg_state fake_reg = {};
3826 
3827 		__mark_reg_known(&fake_reg, (u32)insn->imm);
3828 		fake_reg.type = SCALAR_VALUE;
3829 		save_register_state(state, spi, &fake_reg, size);
3830 	} else if (reg && is_spillable_regtype(reg->type)) {
3831 		/* register containing pointer is being spilled into stack */
3832 		if (size != BPF_REG_SIZE) {
3833 			verbose_linfo(env, insn_idx, "; ");
3834 			verbose(env, "invalid size of register spill\n");
3835 			return -EACCES;
3836 		}
3837 		if (state != cur && reg->type == PTR_TO_STACK) {
3838 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3839 			return -EINVAL;
3840 		}
3841 		save_register_state(state, spi, reg, size);
3842 	} else {
3843 		u8 type = STACK_MISC;
3844 
3845 		/* regular write of data into stack destroys any spilled ptr */
3846 		state->stack[spi].spilled_ptr.type = NOT_INIT;
3847 		/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3848 		if (is_stack_slot_special(&state->stack[spi]))
3849 			for (i = 0; i < BPF_REG_SIZE; i++)
3850 				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3851 
3852 		/* only mark the slot as written if all 8 bytes were written
3853 		 * otherwise read propagation may incorrectly stop too soon
3854 		 * when stack slots are partially written.
3855 		 * This heuristic means that read propagation will be
3856 		 * conservative, since it will add reg_live_read marks
3857 		 * to stack slots all the way to first state when programs
3858 		 * writes+reads less than 8 bytes
3859 		 */
3860 		if (size == BPF_REG_SIZE)
3861 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3862 
3863 		/* when we zero initialize stack slots mark them as such */
3864 		if ((reg && register_is_null(reg)) ||
3865 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3866 			/* backtracking doesn't work for STACK_ZERO yet. */
3867 			err = mark_chain_precision(env, value_regno);
3868 			if (err)
3869 				return err;
3870 			type = STACK_ZERO;
3871 		}
3872 
3873 		/* Mark slots affected by this stack write. */
3874 		for (i = 0; i < size; i++)
3875 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
3876 				type;
3877 	}
3878 	return 0;
3879 }
3880 
3881 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3882  * known to contain a variable offset.
3883  * This function checks whether the write is permitted and conservatively
3884  * tracks the effects of the write, considering that each stack slot in the
3885  * dynamic range is potentially written to.
3886  *
3887  * 'off' includes 'regno->off'.
3888  * 'value_regno' can be -1, meaning that an unknown value is being written to
3889  * the stack.
3890  *
3891  * Spilled pointers in range are not marked as written because we don't know
3892  * what's going to be actually written. This means that read propagation for
3893  * future reads cannot be terminated by this write.
3894  *
3895  * For privileged programs, uninitialized stack slots are considered
3896  * initialized by this write (even though we don't know exactly what offsets
3897  * are going to be written to). The idea is that we don't want the verifier to
3898  * reject future reads that access slots written to through variable offsets.
3899  */
3900 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3901 				     /* func where register points to */
3902 				     struct bpf_func_state *state,
3903 				     int ptr_regno, int off, int size,
3904 				     int value_regno, int insn_idx)
3905 {
3906 	struct bpf_func_state *cur; /* state of the current function */
3907 	int min_off, max_off;
3908 	int i, err;
3909 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3910 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3911 	bool writing_zero = false;
3912 	/* set if the fact that we're writing a zero is used to let any
3913 	 * stack slots remain STACK_ZERO
3914 	 */
3915 	bool zero_used = false;
3916 
3917 	cur = env->cur_state->frame[env->cur_state->curframe];
3918 	ptr_reg = &cur->regs[ptr_regno];
3919 	min_off = ptr_reg->smin_value + off;
3920 	max_off = ptr_reg->smax_value + off + size;
3921 	if (value_regno >= 0)
3922 		value_reg = &cur->regs[value_regno];
3923 	if ((value_reg && register_is_null(value_reg)) ||
3924 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3925 		writing_zero = true;
3926 
3927 	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
3928 	if (err)
3929 		return err;
3930 
3931 	for (i = min_off; i < max_off; i++) {
3932 		int spi;
3933 
3934 		spi = __get_spi(i);
3935 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3936 		if (err)
3937 			return err;
3938 	}
3939 
3940 	/* Variable offset writes destroy any spilled pointers in range. */
3941 	for (i = min_off; i < max_off; i++) {
3942 		u8 new_type, *stype;
3943 		int slot, spi;
3944 
3945 		slot = -i - 1;
3946 		spi = slot / BPF_REG_SIZE;
3947 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3948 		mark_stack_slot_scratched(env, spi);
3949 
3950 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3951 			/* Reject the write if range we may write to has not
3952 			 * been initialized beforehand. If we didn't reject
3953 			 * here, the ptr status would be erased below (even
3954 			 * though not all slots are actually overwritten),
3955 			 * possibly opening the door to leaks.
3956 			 *
3957 			 * We do however catch STACK_INVALID case below, and
3958 			 * only allow reading possibly uninitialized memory
3959 			 * later for CAP_PERFMON, as the write may not happen to
3960 			 * that slot.
3961 			 */
3962 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3963 				insn_idx, i);
3964 			return -EINVAL;
3965 		}
3966 
3967 		/* Erase all spilled pointers. */
3968 		state->stack[spi].spilled_ptr.type = NOT_INIT;
3969 
3970 		/* Update the slot type. */
3971 		new_type = STACK_MISC;
3972 		if (writing_zero && *stype == STACK_ZERO) {
3973 			new_type = STACK_ZERO;
3974 			zero_used = true;
3975 		}
3976 		/* If the slot is STACK_INVALID, we check whether it's OK to
3977 		 * pretend that it will be initialized by this write. The slot
3978 		 * might not actually be written to, and so if we mark it as
3979 		 * initialized future reads might leak uninitialized memory.
3980 		 * For privileged programs, we will accept such reads to slots
3981 		 * that may or may not be written because, if we're reject
3982 		 * them, the error would be too confusing.
3983 		 */
3984 		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3985 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3986 					insn_idx, i);
3987 			return -EINVAL;
3988 		}
3989 		*stype = new_type;
3990 	}
3991 	if (zero_used) {
3992 		/* backtracking doesn't work for STACK_ZERO yet. */
3993 		err = mark_chain_precision(env, value_regno);
3994 		if (err)
3995 			return err;
3996 	}
3997 	return 0;
3998 }
3999 
4000 /* When register 'dst_regno' is assigned some values from stack[min_off,
4001  * max_off), we set the register's type according to the types of the
4002  * respective stack slots. If all the stack values are known to be zeros, then
4003  * so is the destination reg. Otherwise, the register is considered to be
4004  * SCALAR. This function does not deal with register filling; the caller must
4005  * ensure that all spilled registers in the stack range have been marked as
4006  * read.
4007  */
4008 static void mark_reg_stack_read(struct bpf_verifier_env *env,
4009 				/* func where src register points to */
4010 				struct bpf_func_state *ptr_state,
4011 				int min_off, int max_off, int dst_regno)
4012 {
4013 	struct bpf_verifier_state *vstate = env->cur_state;
4014 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4015 	int i, slot, spi;
4016 	u8 *stype;
4017 	int zeros = 0;
4018 
4019 	for (i = min_off; i < max_off; i++) {
4020 		slot = -i - 1;
4021 		spi = slot / BPF_REG_SIZE;
4022 		stype = ptr_state->stack[spi].slot_type;
4023 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4024 			break;
4025 		zeros++;
4026 	}
4027 	if (zeros == max_off - min_off) {
4028 		/* any access_size read into register is zero extended,
4029 		 * so the whole register == const_zero
4030 		 */
4031 		__mark_reg_const_zero(&state->regs[dst_regno]);
4032 		/* backtracking doesn't support STACK_ZERO yet,
4033 		 * so mark it precise here, so that later
4034 		 * backtracking can stop here.
4035 		 * Backtracking may not need this if this register
4036 		 * doesn't participate in pointer adjustment.
4037 		 * Forward propagation of precise flag is not
4038 		 * necessary either. This mark is only to stop
4039 		 * backtracking. Any register that contributed
4040 		 * to const 0 was marked precise before spill.
4041 		 */
4042 		state->regs[dst_regno].precise = true;
4043 	} else {
4044 		/* have read misc data from the stack */
4045 		mark_reg_unknown(env, state->regs, dst_regno);
4046 	}
4047 	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4048 }
4049 
4050 /* Read the stack at 'off' and put the results into the register indicated by
4051  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4052  * spilled reg.
4053  *
4054  * 'dst_regno' can be -1, meaning that the read value is not going to a
4055  * register.
4056  *
4057  * The access is assumed to be within the current stack bounds.
4058  */
4059 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4060 				      /* func where src register points to */
4061 				      struct bpf_func_state *reg_state,
4062 				      int off, int size, int dst_regno)
4063 {
4064 	struct bpf_verifier_state *vstate = env->cur_state;
4065 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4066 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4067 	struct bpf_reg_state *reg;
4068 	u8 *stype, type;
4069 
4070 	stype = reg_state->stack[spi].slot_type;
4071 	reg = &reg_state->stack[spi].spilled_ptr;
4072 
4073 	if (is_spilled_reg(&reg_state->stack[spi])) {
4074 		u8 spill_size = 1;
4075 
4076 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4077 			spill_size++;
4078 
4079 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4080 			if (reg->type != SCALAR_VALUE) {
4081 				verbose_linfo(env, env->insn_idx, "; ");
4082 				verbose(env, "invalid size of register fill\n");
4083 				return -EACCES;
4084 			}
4085 
4086 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4087 			if (dst_regno < 0)
4088 				return 0;
4089 
4090 			if (!(off % BPF_REG_SIZE) && size == spill_size) {
4091 				/* The earlier check_reg_arg() has decided the
4092 				 * subreg_def for this insn.  Save it first.
4093 				 */
4094 				s32 subreg_def = state->regs[dst_regno].subreg_def;
4095 
4096 				copy_register_state(&state->regs[dst_regno], reg);
4097 				state->regs[dst_regno].subreg_def = subreg_def;
4098 			} else {
4099 				for (i = 0; i < size; i++) {
4100 					type = stype[(slot - i) % BPF_REG_SIZE];
4101 					if (type == STACK_SPILL)
4102 						continue;
4103 					if (type == STACK_MISC)
4104 						continue;
4105 					if (type == STACK_INVALID && env->allow_uninit_stack)
4106 						continue;
4107 					verbose(env, "invalid read from stack off %d+%d size %d\n",
4108 						off, i, size);
4109 					return -EACCES;
4110 				}
4111 				mark_reg_unknown(env, state->regs, dst_regno);
4112 			}
4113 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4114 			return 0;
4115 		}
4116 
4117 		if (dst_regno >= 0) {
4118 			/* restore register state from stack */
4119 			copy_register_state(&state->regs[dst_regno], reg);
4120 			/* mark reg as written since spilled pointer state likely
4121 			 * has its liveness marks cleared by is_state_visited()
4122 			 * which resets stack/reg liveness for state transitions
4123 			 */
4124 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4125 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4126 			/* If dst_regno==-1, the caller is asking us whether
4127 			 * it is acceptable to use this value as a SCALAR_VALUE
4128 			 * (e.g. for XADD).
4129 			 * We must not allow unprivileged callers to do that
4130 			 * with spilled pointers.
4131 			 */
4132 			verbose(env, "leaking pointer from stack off %d\n",
4133 				off);
4134 			return -EACCES;
4135 		}
4136 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4137 	} else {
4138 		for (i = 0; i < size; i++) {
4139 			type = stype[(slot - i) % BPF_REG_SIZE];
4140 			if (type == STACK_MISC)
4141 				continue;
4142 			if (type == STACK_ZERO)
4143 				continue;
4144 			if (type == STACK_INVALID && env->allow_uninit_stack)
4145 				continue;
4146 			verbose(env, "invalid read from stack off %d+%d size %d\n",
4147 				off, i, size);
4148 			return -EACCES;
4149 		}
4150 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4151 		if (dst_regno >= 0)
4152 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4153 	}
4154 	return 0;
4155 }
4156 
4157 enum bpf_access_src {
4158 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4159 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
4160 };
4161 
4162 static int check_stack_range_initialized(struct bpf_verifier_env *env,
4163 					 int regno, int off, int access_size,
4164 					 bool zero_size_allowed,
4165 					 enum bpf_access_src type,
4166 					 struct bpf_call_arg_meta *meta);
4167 
4168 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4169 {
4170 	return cur_regs(env) + regno;
4171 }
4172 
4173 /* Read the stack at 'ptr_regno + off' and put the result into the register
4174  * 'dst_regno'.
4175  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4176  * but not its variable offset.
4177  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4178  *
4179  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4180  * filling registers (i.e. reads of spilled register cannot be detected when
4181  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4182  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4183  * offset; for a fixed offset check_stack_read_fixed_off should be used
4184  * instead.
4185  */
4186 static int check_stack_read_var_off(struct bpf_verifier_env *env,
4187 				    int ptr_regno, int off, int size, int dst_regno)
4188 {
4189 	/* The state of the source register. */
4190 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4191 	struct bpf_func_state *ptr_state = func(env, reg);
4192 	int err;
4193 	int min_off, max_off;
4194 
4195 	/* Note that we pass a NULL meta, so raw access will not be permitted.
4196 	 */
4197 	err = check_stack_range_initialized(env, ptr_regno, off, size,
4198 					    false, ACCESS_DIRECT, NULL);
4199 	if (err)
4200 		return err;
4201 
4202 	min_off = reg->smin_value + off;
4203 	max_off = reg->smax_value + off;
4204 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4205 	return 0;
4206 }
4207 
4208 /* check_stack_read dispatches to check_stack_read_fixed_off or
4209  * check_stack_read_var_off.
4210  *
4211  * The caller must ensure that the offset falls within the allocated stack
4212  * bounds.
4213  *
4214  * 'dst_regno' is a register which will receive the value from the stack. It
4215  * can be -1, meaning that the read value is not going to a register.
4216  */
4217 static int check_stack_read(struct bpf_verifier_env *env,
4218 			    int ptr_regno, int off, int size,
4219 			    int dst_regno)
4220 {
4221 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4222 	struct bpf_func_state *state = func(env, reg);
4223 	int err;
4224 	/* Some accesses are only permitted with a static offset. */
4225 	bool var_off = !tnum_is_const(reg->var_off);
4226 
4227 	/* The offset is required to be static when reads don't go to a
4228 	 * register, in order to not leak pointers (see
4229 	 * check_stack_read_fixed_off).
4230 	 */
4231 	if (dst_regno < 0 && var_off) {
4232 		char tn_buf[48];
4233 
4234 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4235 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4236 			tn_buf, off, size);
4237 		return -EACCES;
4238 	}
4239 	/* Variable offset is prohibited for unprivileged mode for simplicity
4240 	 * since it requires corresponding support in Spectre masking for stack
4241 	 * ALU. See also retrieve_ptr_limit(). The check in
4242 	 * check_stack_access_for_ptr_arithmetic() called by
4243 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
4244 	 * with variable offsets, therefore no check is required here. Further,
4245 	 * just checking it here would be insufficient as speculative stack
4246 	 * writes could still lead to unsafe speculative behaviour.
4247 	 */
4248 	if (!var_off) {
4249 		off += reg->var_off.value;
4250 		err = check_stack_read_fixed_off(env, state, off, size,
4251 						 dst_regno);
4252 	} else {
4253 		/* Variable offset stack reads need more conservative handling
4254 		 * than fixed offset ones. Note that dst_regno >= 0 on this
4255 		 * branch.
4256 		 */
4257 		err = check_stack_read_var_off(env, ptr_regno, off, size,
4258 					       dst_regno);
4259 	}
4260 	return err;
4261 }
4262 
4263 
4264 /* check_stack_write dispatches to check_stack_write_fixed_off or
4265  * check_stack_write_var_off.
4266  *
4267  * 'ptr_regno' is the register used as a pointer into the stack.
4268  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
4269  * 'value_regno' is the register whose value we're writing to the stack. It can
4270  * be -1, meaning that we're not writing from a register.
4271  *
4272  * The caller must ensure that the offset falls within the maximum stack size.
4273  */
4274 static int check_stack_write(struct bpf_verifier_env *env,
4275 			     int ptr_regno, int off, int size,
4276 			     int value_regno, int insn_idx)
4277 {
4278 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4279 	struct bpf_func_state *state = func(env, reg);
4280 	int err;
4281 
4282 	if (tnum_is_const(reg->var_off)) {
4283 		off += reg->var_off.value;
4284 		err = check_stack_write_fixed_off(env, state, off, size,
4285 						  value_regno, insn_idx);
4286 	} else {
4287 		/* Variable offset stack reads need more conservative handling
4288 		 * than fixed offset ones.
4289 		 */
4290 		err = check_stack_write_var_off(env, state,
4291 						ptr_regno, off, size,
4292 						value_regno, insn_idx);
4293 	}
4294 	return err;
4295 }
4296 
4297 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4298 				 int off, int size, enum bpf_access_type type)
4299 {
4300 	struct bpf_reg_state *regs = cur_regs(env);
4301 	struct bpf_map *map = regs[regno].map_ptr;
4302 	u32 cap = bpf_map_flags_to_cap(map);
4303 
4304 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4305 		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
4306 			map->value_size, off, size);
4307 		return -EACCES;
4308 	}
4309 
4310 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4311 		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
4312 			map->value_size, off, size);
4313 		return -EACCES;
4314 	}
4315 
4316 	return 0;
4317 }
4318 
4319 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4320 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4321 			      int off, int size, u32 mem_size,
4322 			      bool zero_size_allowed)
4323 {
4324 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4325 	struct bpf_reg_state *reg;
4326 
4327 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4328 		return 0;
4329 
4330 	reg = &cur_regs(env)[regno];
4331 	switch (reg->type) {
4332 	case PTR_TO_MAP_KEY:
4333 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4334 			mem_size, off, size);
4335 		break;
4336 	case PTR_TO_MAP_VALUE:
4337 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4338 			mem_size, off, size);
4339 		break;
4340 	case PTR_TO_PACKET:
4341 	case PTR_TO_PACKET_META:
4342 	case PTR_TO_PACKET_END:
4343 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4344 			off, size, regno, reg->id, off, mem_size);
4345 		break;
4346 	case PTR_TO_MEM:
4347 	default:
4348 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4349 			mem_size, off, size);
4350 	}
4351 
4352 	return -EACCES;
4353 }
4354 
4355 /* check read/write into a memory region with possible variable offset */
4356 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4357 				   int off, int size, u32 mem_size,
4358 				   bool zero_size_allowed)
4359 {
4360 	struct bpf_verifier_state *vstate = env->cur_state;
4361 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4362 	struct bpf_reg_state *reg = &state->regs[regno];
4363 	int err;
4364 
4365 	/* We may have adjusted the register pointing to memory region, so we
4366 	 * need to try adding each of min_value and max_value to off
4367 	 * to make sure our theoretical access will be safe.
4368 	 *
4369 	 * The minimum value is only important with signed
4370 	 * comparisons where we can't assume the floor of a
4371 	 * value is 0.  If we are using signed variables for our
4372 	 * index'es we need to make sure that whatever we use
4373 	 * will have a set floor within our range.
4374 	 */
4375 	if (reg->smin_value < 0 &&
4376 	    (reg->smin_value == S64_MIN ||
4377 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4378 	      reg->smin_value + off < 0)) {
4379 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4380 			regno);
4381 		return -EACCES;
4382 	}
4383 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
4384 				 mem_size, zero_size_allowed);
4385 	if (err) {
4386 		verbose(env, "R%d min value is outside of the allowed memory range\n",
4387 			regno);
4388 		return err;
4389 	}
4390 
4391 	/* If we haven't set a max value then we need to bail since we can't be
4392 	 * sure we won't do bad things.
4393 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
4394 	 */
4395 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
4396 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
4397 			regno);
4398 		return -EACCES;
4399 	}
4400 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
4401 				 mem_size, zero_size_allowed);
4402 	if (err) {
4403 		verbose(env, "R%d max value is outside of the allowed memory range\n",
4404 			regno);
4405 		return err;
4406 	}
4407 
4408 	return 0;
4409 }
4410 
4411 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4412 			       const struct bpf_reg_state *reg, int regno,
4413 			       bool fixed_off_ok)
4414 {
4415 	/* Access to this pointer-typed register or passing it to a helper
4416 	 * is only allowed in its original, unmodified form.
4417 	 */
4418 
4419 	if (reg->off < 0) {
4420 		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
4421 			reg_type_str(env, reg->type), regno, reg->off);
4422 		return -EACCES;
4423 	}
4424 
4425 	if (!fixed_off_ok && reg->off) {
4426 		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
4427 			reg_type_str(env, reg->type), regno, reg->off);
4428 		return -EACCES;
4429 	}
4430 
4431 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4432 		char tn_buf[48];
4433 
4434 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4435 		verbose(env, "variable %s access var_off=%s disallowed\n",
4436 			reg_type_str(env, reg->type), tn_buf);
4437 		return -EACCES;
4438 	}
4439 
4440 	return 0;
4441 }
4442 
4443 int check_ptr_off_reg(struct bpf_verifier_env *env,
4444 		      const struct bpf_reg_state *reg, int regno)
4445 {
4446 	return __check_ptr_off_reg(env, reg, regno, false);
4447 }
4448 
4449 static int map_kptr_match_type(struct bpf_verifier_env *env,
4450 			       struct btf_field *kptr_field,
4451 			       struct bpf_reg_state *reg, u32 regno)
4452 {
4453 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4454 	int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4455 	const char *reg_name = "";
4456 
4457 	/* Only unreferenced case accepts untrusted pointers */
4458 	if (kptr_field->type == BPF_KPTR_UNREF)
4459 		perm_flags |= PTR_UNTRUSTED;
4460 
4461 	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
4462 		goto bad_type;
4463 
4464 	if (!btf_is_kernel(reg->btf)) {
4465 		verbose(env, "R%d must point to kernel BTF\n", regno);
4466 		return -EINVAL;
4467 	}
4468 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4469 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4470 
4471 	/* For ref_ptr case, release function check should ensure we get one
4472 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4473 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4474 	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
4475 	 * reg->off and reg->ref_obj_id are not needed here.
4476 	 */
4477 	if (__check_ptr_off_reg(env, reg, regno, true))
4478 		return -EACCES;
4479 
4480 	/* A full type match is needed, as BTF can be vmlinux or module BTF, and
4481 	 * we also need to take into account the reg->off.
4482 	 *
4483 	 * We want to support cases like:
4484 	 *
4485 	 * struct foo {
4486 	 *         struct bar br;
4487 	 *         struct baz bz;
4488 	 * };
4489 	 *
4490 	 * struct foo *v;
4491 	 * v = func();	      // PTR_TO_BTF_ID
4492 	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
4493 	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
4494 	 *                    // first member type of struct after comparison fails
4495 	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
4496 	 *                    // to match type
4497 	 *
4498 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
4499 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4500 	 * the struct to match type against first member of struct, i.e. reject
4501 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4502 	 * strict mode to true for type match.
4503 	 */
4504 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
4505 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4506 				  kptr_field->type == BPF_KPTR_REF))
4507 		goto bad_type;
4508 	return 0;
4509 bad_type:
4510 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4511 		reg_type_str(env, reg->type), reg_name);
4512 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4513 	if (kptr_field->type == BPF_KPTR_UNREF)
4514 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4515 			targ_name);
4516 	else
4517 		verbose(env, "\n");
4518 	return -EINVAL;
4519 }
4520 
4521 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4522  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4523  */
4524 static bool in_rcu_cs(struct bpf_verifier_env *env)
4525 {
4526 	return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
4527 }
4528 
4529 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4530 BTF_SET_START(rcu_protected_types)
4531 BTF_ID(struct, prog_test_ref_kfunc)
4532 BTF_ID(struct, cgroup)
4533 BTF_ID(struct, bpf_cpumask)
4534 BTF_ID(struct, task_struct)
4535 BTF_SET_END(rcu_protected_types)
4536 
4537 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4538 {
4539 	if (!btf_is_kernel(btf))
4540 		return false;
4541 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4542 }
4543 
4544 static bool rcu_safe_kptr(const struct btf_field *field)
4545 {
4546 	const struct btf_field_kptr *kptr = &field->kptr;
4547 
4548 	return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
4549 }
4550 
4551 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
4552 				 int value_regno, int insn_idx,
4553 				 struct btf_field *kptr_field)
4554 {
4555 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4556 	int class = BPF_CLASS(insn->code);
4557 	struct bpf_reg_state *val_reg;
4558 
4559 	/* Things we already checked for in check_map_access and caller:
4560 	 *  - Reject cases where variable offset may touch kptr
4561 	 *  - size of access (must be BPF_DW)
4562 	 *  - tnum_is_const(reg->var_off)
4563 	 *  - kptr_field->offset == off + reg->var_off.value
4564 	 */
4565 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4566 	if (BPF_MODE(insn->code) != BPF_MEM) {
4567 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4568 		return -EACCES;
4569 	}
4570 
4571 	/* We only allow loading referenced kptr, since it will be marked as
4572 	 * untrusted, similar to unreferenced kptr.
4573 	 */
4574 	if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
4575 		verbose(env, "store to referenced kptr disallowed\n");
4576 		return -EACCES;
4577 	}
4578 
4579 	if (class == BPF_LDX) {
4580 		val_reg = reg_state(env, value_regno);
4581 		/* We can simply mark the value_regno receiving the pointer
4582 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4583 		 */
4584 		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
4585 				kptr_field->kptr.btf_id,
4586 				rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
4587 				PTR_MAYBE_NULL | MEM_RCU :
4588 				PTR_MAYBE_NULL | PTR_UNTRUSTED);
4589 		/* For mark_ptr_or_null_reg */
4590 		val_reg->id = ++env->id_gen;
4591 	} else if (class == BPF_STX) {
4592 		val_reg = reg_state(env, value_regno);
4593 		if (!register_is_null(val_reg) &&
4594 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4595 			return -EACCES;
4596 	} else if (class == BPF_ST) {
4597 		if (insn->imm) {
4598 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4599 				kptr_field->offset);
4600 			return -EACCES;
4601 		}
4602 	} else {
4603 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4604 		return -EACCES;
4605 	}
4606 	return 0;
4607 }
4608 
4609 /* check read/write into a map element with possible variable offset */
4610 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
4611 			    int off, int size, bool zero_size_allowed,
4612 			    enum bpf_access_src src)
4613 {
4614 	struct bpf_verifier_state *vstate = env->cur_state;
4615 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4616 	struct bpf_reg_state *reg = &state->regs[regno];
4617 	struct bpf_map *map = reg->map_ptr;
4618 	struct btf_record *rec;
4619 	int err, i;
4620 
4621 	err = check_mem_region_access(env, regno, off, size, map->value_size,
4622 				      zero_size_allowed);
4623 	if (err)
4624 		return err;
4625 
4626 	if (IS_ERR_OR_NULL(map->record))
4627 		return 0;
4628 	rec = map->record;
4629 	for (i = 0; i < rec->cnt; i++) {
4630 		struct btf_field *field = &rec->fields[i];
4631 		u32 p = field->offset;
4632 
4633 		/* If any part of a field  can be touched by load/store, reject
4634 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4635 		 * it is sufficient to check x1 < y2 && y1 < x2.
4636 		 */
4637 		if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
4638 		    p < reg->umax_value + off + size) {
4639 			switch (field->type) {
4640 			case BPF_KPTR_UNREF:
4641 			case BPF_KPTR_REF:
4642 				if (src != ACCESS_DIRECT) {
4643 					verbose(env, "kptr cannot be accessed indirectly by helper\n");
4644 					return -EACCES;
4645 				}
4646 				if (!tnum_is_const(reg->var_off)) {
4647 					verbose(env, "kptr access cannot have variable offset\n");
4648 					return -EACCES;
4649 				}
4650 				if (p != off + reg->var_off.value) {
4651 					verbose(env, "kptr access misaligned expected=%u off=%llu\n",
4652 						p, off + reg->var_off.value);
4653 					return -EACCES;
4654 				}
4655 				if (size != bpf_size_to_bytes(BPF_DW)) {
4656 					verbose(env, "kptr access size must be BPF_DW\n");
4657 					return -EACCES;
4658 				}
4659 				break;
4660 			default:
4661 				verbose(env, "%s cannot be accessed directly by load/store\n",
4662 					btf_field_type_name(field->type));
4663 				return -EACCES;
4664 			}
4665 		}
4666 	}
4667 	return 0;
4668 }
4669 
4670 #define MAX_PACKET_OFF 0xffff
4671 
4672 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4673 				       const struct bpf_call_arg_meta *meta,
4674 				       enum bpf_access_type t)
4675 {
4676 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4677 
4678 	switch (prog_type) {
4679 	/* Program types only with direct read access go here! */
4680 	case BPF_PROG_TYPE_LWT_IN:
4681 	case BPF_PROG_TYPE_LWT_OUT:
4682 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4683 	case BPF_PROG_TYPE_SK_REUSEPORT:
4684 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4685 	case BPF_PROG_TYPE_CGROUP_SKB:
4686 		if (t == BPF_WRITE)
4687 			return false;
4688 		fallthrough;
4689 
4690 	/* Program types with direct read + write access go here! */
4691 	case BPF_PROG_TYPE_SCHED_CLS:
4692 	case BPF_PROG_TYPE_SCHED_ACT:
4693 	case BPF_PROG_TYPE_XDP:
4694 	case BPF_PROG_TYPE_LWT_XMIT:
4695 	case BPF_PROG_TYPE_SK_SKB:
4696 	case BPF_PROG_TYPE_SK_MSG:
4697 		if (meta)
4698 			return meta->pkt_access;
4699 
4700 		env->seen_direct_write = true;
4701 		return true;
4702 
4703 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4704 		if (t == BPF_WRITE)
4705 			env->seen_direct_write = true;
4706 
4707 		return true;
4708 
4709 	default:
4710 		return false;
4711 	}
4712 }
4713 
4714 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
4715 			       int size, bool zero_size_allowed)
4716 {
4717 	struct bpf_reg_state *regs = cur_regs(env);
4718 	struct bpf_reg_state *reg = &regs[regno];
4719 	int err;
4720 
4721 	/* We may have added a variable offset to the packet pointer; but any
4722 	 * reg->range we have comes after that.  We are only checking the fixed
4723 	 * offset.
4724 	 */
4725 
4726 	/* We don't allow negative numbers, because we aren't tracking enough
4727 	 * detail to prove they're safe.
4728 	 */
4729 	if (reg->smin_value < 0) {
4730 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4731 			regno);
4732 		return -EACCES;
4733 	}
4734 
4735 	err = reg->range < 0 ? -EINVAL :
4736 	      __check_mem_access(env, regno, off, size, reg->range,
4737 				 zero_size_allowed);
4738 	if (err) {
4739 		verbose(env, "R%d offset is outside of the packet\n", regno);
4740 		return err;
4741 	}
4742 
4743 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4744 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4745 	 * otherwise find_good_pkt_pointers would have refused to set range info
4746 	 * that __check_mem_access would have rejected this pkt access.
4747 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4748 	 */
4749 	env->prog->aux->max_pkt_offset =
4750 		max_t(u32, env->prog->aux->max_pkt_offset,
4751 		      off + reg->umax_value + size - 1);
4752 
4753 	return err;
4754 }
4755 
4756 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4757 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4758 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
4759 			    struct btf **btf, u32 *btf_id)
4760 {
4761 	struct bpf_insn_access_aux info = {
4762 		.reg_type = *reg_type,
4763 		.log = &env->log,
4764 	};
4765 
4766 	if (env->ops->is_valid_access &&
4767 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
4768 		/* A non zero info.ctx_field_size indicates that this field is a
4769 		 * candidate for later verifier transformation to load the whole
4770 		 * field and then apply a mask when accessed with a narrower
4771 		 * access than actual ctx access size. A zero info.ctx_field_size
4772 		 * will only allow for whole field access and rejects any other
4773 		 * type of narrower access.
4774 		 */
4775 		*reg_type = info.reg_type;
4776 
4777 		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
4778 			*btf = info.btf;
4779 			*btf_id = info.btf_id;
4780 		} else {
4781 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
4782 		}
4783 		/* remember the offset of last byte accessed in ctx */
4784 		if (env->prog->aux->max_ctx_offset < off + size)
4785 			env->prog->aux->max_ctx_offset = off + size;
4786 		return 0;
4787 	}
4788 
4789 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4790 	return -EACCES;
4791 }
4792 
4793 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4794 				  int size)
4795 {
4796 	if (size < 0 || off < 0 ||
4797 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4798 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4799 			off, size);
4800 		return -EACCES;
4801 	}
4802 	return 0;
4803 }
4804 
4805 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4806 			     u32 regno, int off, int size,
4807 			     enum bpf_access_type t)
4808 {
4809 	struct bpf_reg_state *regs = cur_regs(env);
4810 	struct bpf_reg_state *reg = &regs[regno];
4811 	struct bpf_insn_access_aux info = {};
4812 	bool valid;
4813 
4814 	if (reg->smin_value < 0) {
4815 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4816 			regno);
4817 		return -EACCES;
4818 	}
4819 
4820 	switch (reg->type) {
4821 	case PTR_TO_SOCK_COMMON:
4822 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4823 		break;
4824 	case PTR_TO_SOCKET:
4825 		valid = bpf_sock_is_valid_access(off, size, t, &info);
4826 		break;
4827 	case PTR_TO_TCP_SOCK:
4828 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4829 		break;
4830 	case PTR_TO_XDP_SOCK:
4831 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4832 		break;
4833 	default:
4834 		valid = false;
4835 	}
4836 
4837 
4838 	if (valid) {
4839 		env->insn_aux_data[insn_idx].ctx_field_size =
4840 			info.ctx_field_size;
4841 		return 0;
4842 	}
4843 
4844 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
4845 		regno, reg_type_str(env, reg->type), off, size);
4846 
4847 	return -EACCES;
4848 }
4849 
4850 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4851 {
4852 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4853 }
4854 
4855 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4856 {
4857 	const struct bpf_reg_state *reg = reg_state(env, regno);
4858 
4859 	return reg->type == PTR_TO_CTX;
4860 }
4861 
4862 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4863 {
4864 	const struct bpf_reg_state *reg = reg_state(env, regno);
4865 
4866 	return type_is_sk_pointer(reg->type);
4867 }
4868 
4869 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4870 {
4871 	const struct bpf_reg_state *reg = reg_state(env, regno);
4872 
4873 	return type_is_pkt_pointer(reg->type);
4874 }
4875 
4876 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4877 {
4878 	const struct bpf_reg_state *reg = reg_state(env, regno);
4879 
4880 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4881 	return reg->type == PTR_TO_FLOW_KEYS;
4882 }
4883 
4884 static bool is_trusted_reg(const struct bpf_reg_state *reg)
4885 {
4886 	/* A referenced register is always trusted. */
4887 	if (reg->ref_obj_id)
4888 		return true;
4889 
4890 	/* If a register is not referenced, it is trusted if it has the
4891 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4892 	 * other type modifiers may be safe, but we elect to take an opt-in
4893 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4894 	 * not.
4895 	 *
4896 	 * Eventually, we should make PTR_TRUSTED the single source of truth
4897 	 * for whether a register is trusted.
4898 	 */
4899 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4900 	       !bpf_type_has_unsafe_modifiers(reg->type);
4901 }
4902 
4903 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4904 {
4905 	return reg->type & MEM_RCU;
4906 }
4907 
4908 static void clear_trusted_flags(enum bpf_type_flag *flag)
4909 {
4910 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
4911 }
4912 
4913 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4914 				   const struct bpf_reg_state *reg,
4915 				   int off, int size, bool strict)
4916 {
4917 	struct tnum reg_off;
4918 	int ip_align;
4919 
4920 	/* Byte size accesses are always allowed. */
4921 	if (!strict || size == 1)
4922 		return 0;
4923 
4924 	/* For platforms that do not have a Kconfig enabling
4925 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4926 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4927 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4928 	 * to this code only in strict mode where we want to emulate
4929 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4930 	 * unconditional IP align value of '2'.
4931 	 */
4932 	ip_align = 2;
4933 
4934 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4935 	if (!tnum_is_aligned(reg_off, size)) {
4936 		char tn_buf[48];
4937 
4938 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4939 		verbose(env,
4940 			"misaligned packet access off %d+%s+%d+%d size %d\n",
4941 			ip_align, tn_buf, reg->off, off, size);
4942 		return -EACCES;
4943 	}
4944 
4945 	return 0;
4946 }
4947 
4948 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4949 				       const struct bpf_reg_state *reg,
4950 				       const char *pointer_desc,
4951 				       int off, int size, bool strict)
4952 {
4953 	struct tnum reg_off;
4954 
4955 	/* Byte size accesses are always allowed. */
4956 	if (!strict || size == 1)
4957 		return 0;
4958 
4959 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4960 	if (!tnum_is_aligned(reg_off, size)) {
4961 		char tn_buf[48];
4962 
4963 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4964 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
4965 			pointer_desc, tn_buf, reg->off, off, size);
4966 		return -EACCES;
4967 	}
4968 
4969 	return 0;
4970 }
4971 
4972 static int check_ptr_alignment(struct bpf_verifier_env *env,
4973 			       const struct bpf_reg_state *reg, int off,
4974 			       int size, bool strict_alignment_once)
4975 {
4976 	bool strict = env->strict_alignment || strict_alignment_once;
4977 	const char *pointer_desc = "";
4978 
4979 	switch (reg->type) {
4980 	case PTR_TO_PACKET:
4981 	case PTR_TO_PACKET_META:
4982 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4983 		 * right in front, treat it the very same way.
4984 		 */
4985 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4986 	case PTR_TO_FLOW_KEYS:
4987 		pointer_desc = "flow keys ";
4988 		break;
4989 	case PTR_TO_MAP_KEY:
4990 		pointer_desc = "key ";
4991 		break;
4992 	case PTR_TO_MAP_VALUE:
4993 		pointer_desc = "value ";
4994 		break;
4995 	case PTR_TO_CTX:
4996 		pointer_desc = "context ";
4997 		break;
4998 	case PTR_TO_STACK:
4999 		pointer_desc = "stack ";
5000 		/* The stack spill tracking logic in check_stack_write_fixed_off()
5001 		 * and check_stack_read_fixed_off() relies on stack accesses being
5002 		 * aligned.
5003 		 */
5004 		strict = true;
5005 		break;
5006 	case PTR_TO_SOCKET:
5007 		pointer_desc = "sock ";
5008 		break;
5009 	case PTR_TO_SOCK_COMMON:
5010 		pointer_desc = "sock_common ";
5011 		break;
5012 	case PTR_TO_TCP_SOCK:
5013 		pointer_desc = "tcp_sock ";
5014 		break;
5015 	case PTR_TO_XDP_SOCK:
5016 		pointer_desc = "xdp_sock ";
5017 		break;
5018 	default:
5019 		break;
5020 	}
5021 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5022 					   strict);
5023 }
5024 
5025 static int update_stack_depth(struct bpf_verifier_env *env,
5026 			      const struct bpf_func_state *func,
5027 			      int off)
5028 {
5029 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
5030 
5031 	if (stack >= -off)
5032 		return 0;
5033 
5034 	/* update known max for given subprogram */
5035 	env->subprog_info[func->subprogno].stack_depth = -off;
5036 	return 0;
5037 }
5038 
5039 /* starting from main bpf function walk all instructions of the function
5040  * and recursively walk all callees that given function can call.
5041  * Ignore jump and exit insns.
5042  * Since recursion is prevented by check_cfg() this algorithm
5043  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
5044  */
5045 static int check_max_stack_depth(struct bpf_verifier_env *env)
5046 {
5047 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
5048 	struct bpf_subprog_info *subprog = env->subprog_info;
5049 	struct bpf_insn *insn = env->prog->insnsi;
5050 	bool tail_call_reachable = false;
5051 	int ret_insn[MAX_CALL_FRAMES];
5052 	int ret_prog[MAX_CALL_FRAMES];
5053 	int j;
5054 
5055 process_func:
5056 	/* protect against potential stack overflow that might happen when
5057 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5058 	 * depth for such case down to 256 so that the worst case scenario
5059 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5060 	 * 8k).
5061 	 *
5062 	 * To get the idea what might happen, see an example:
5063 	 * func1 -> sub rsp, 128
5064 	 *  subfunc1 -> sub rsp, 256
5065 	 *  tailcall1 -> add rsp, 256
5066 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5067 	 *   subfunc2 -> sub rsp, 64
5068 	 *   subfunc22 -> sub rsp, 128
5069 	 *   tailcall2 -> add rsp, 128
5070 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5071 	 *
5072 	 * tailcall will unwind the current stack frame but it will not get rid
5073 	 * of caller's stack as shown on the example above.
5074 	 */
5075 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5076 		verbose(env,
5077 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5078 			depth);
5079 		return -EACCES;
5080 	}
5081 	/* round up to 32-bytes, since this is granularity
5082 	 * of interpreter stack size
5083 	 */
5084 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5085 	if (depth > MAX_BPF_STACK) {
5086 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
5087 			frame + 1, depth);
5088 		return -EACCES;
5089 	}
5090 continue_func:
5091 	subprog_end = subprog[idx + 1].start;
5092 	for (; i < subprog_end; i++) {
5093 		int next_insn;
5094 
5095 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5096 			continue;
5097 		/* remember insn and function to return to */
5098 		ret_insn[frame] = i + 1;
5099 		ret_prog[frame] = idx;
5100 
5101 		/* find the callee */
5102 		next_insn = i + insn[i].imm + 1;
5103 		idx = find_subprog(env, next_insn);
5104 		if (idx < 0) {
5105 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5106 				  next_insn);
5107 			return -EFAULT;
5108 		}
5109 		if (subprog[idx].is_async_cb) {
5110 			if (subprog[idx].has_tail_call) {
5111 				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
5112 				return -EFAULT;
5113 			}
5114 			 /* async callbacks don't increase bpf prog stack size */
5115 			continue;
5116 		}
5117 		i = next_insn;
5118 
5119 		if (subprog[idx].has_tail_call)
5120 			tail_call_reachable = true;
5121 
5122 		frame++;
5123 		if (frame >= MAX_CALL_FRAMES) {
5124 			verbose(env, "the call stack of %d frames is too deep !\n",
5125 				frame);
5126 			return -E2BIG;
5127 		}
5128 		goto process_func;
5129 	}
5130 	/* if tail call got detected across bpf2bpf calls then mark each of the
5131 	 * currently present subprog frames as tail call reachable subprogs;
5132 	 * this info will be utilized by JIT so that we will be preserving the
5133 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5134 	 */
5135 	if (tail_call_reachable)
5136 		for (j = 0; j < frame; j++)
5137 			subprog[ret_prog[j]].tail_call_reachable = true;
5138 	if (subprog[0].tail_call_reachable)
5139 		env->prog->aux->tail_call_reachable = true;
5140 
5141 	/* end of for() loop means the last insn of the 'subprog'
5142 	 * was reached. Doesn't matter whether it was JA or EXIT
5143 	 */
5144 	if (frame == 0)
5145 		return 0;
5146 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5147 	frame--;
5148 	i = ret_insn[frame];
5149 	idx = ret_prog[frame];
5150 	goto continue_func;
5151 }
5152 
5153 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
5154 static int get_callee_stack_depth(struct bpf_verifier_env *env,
5155 				  const struct bpf_insn *insn, int idx)
5156 {
5157 	int start = idx + insn->imm + 1, subprog;
5158 
5159 	subprog = find_subprog(env, start);
5160 	if (subprog < 0) {
5161 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5162 			  start);
5163 		return -EFAULT;
5164 	}
5165 	return env->subprog_info[subprog].stack_depth;
5166 }
5167 #endif
5168 
5169 static int __check_buffer_access(struct bpf_verifier_env *env,
5170 				 const char *buf_info,
5171 				 const struct bpf_reg_state *reg,
5172 				 int regno, int off, int size)
5173 {
5174 	if (off < 0) {
5175 		verbose(env,
5176 			"R%d invalid %s buffer access: off=%d, size=%d\n",
5177 			regno, buf_info, off, size);
5178 		return -EACCES;
5179 	}
5180 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5181 		char tn_buf[48];
5182 
5183 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5184 		verbose(env,
5185 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
5186 			regno, off, tn_buf);
5187 		return -EACCES;
5188 	}
5189 
5190 	return 0;
5191 }
5192 
5193 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5194 				  const struct bpf_reg_state *reg,
5195 				  int regno, int off, int size)
5196 {
5197 	int err;
5198 
5199 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5200 	if (err)
5201 		return err;
5202 
5203 	if (off + size > env->prog->aux->max_tp_access)
5204 		env->prog->aux->max_tp_access = off + size;
5205 
5206 	return 0;
5207 }
5208 
5209 static int check_buffer_access(struct bpf_verifier_env *env,
5210 			       const struct bpf_reg_state *reg,
5211 			       int regno, int off, int size,
5212 			       bool zero_size_allowed,
5213 			       u32 *max_access)
5214 {
5215 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5216 	int err;
5217 
5218 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5219 	if (err)
5220 		return err;
5221 
5222 	if (off + size > *max_access)
5223 		*max_access = off + size;
5224 
5225 	return 0;
5226 }
5227 
5228 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5229 static void zext_32_to_64(struct bpf_reg_state *reg)
5230 {
5231 	reg->var_off = tnum_subreg(reg->var_off);
5232 	__reg_assign_32_into_64(reg);
5233 }
5234 
5235 /* truncate register to smaller size (in bytes)
5236  * must be called with size < BPF_REG_SIZE
5237  */
5238 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5239 {
5240 	u64 mask;
5241 
5242 	/* clear high bits in bit representation */
5243 	reg->var_off = tnum_cast(reg->var_off, size);
5244 
5245 	/* fix arithmetic bounds */
5246 	mask = ((u64)1 << (size * 8)) - 1;
5247 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5248 		reg->umin_value &= mask;
5249 		reg->umax_value &= mask;
5250 	} else {
5251 		reg->umin_value = 0;
5252 		reg->umax_value = mask;
5253 	}
5254 	reg->smin_value = reg->umin_value;
5255 	reg->smax_value = reg->umax_value;
5256 
5257 	/* If size is smaller than 32bit register the 32bit register
5258 	 * values are also truncated so we push 64-bit bounds into
5259 	 * 32-bit bounds. Above were truncated < 32-bits already.
5260 	 */
5261 	if (size >= 4)
5262 		return;
5263 	__reg_combine_64_into_32(reg);
5264 }
5265 
5266 static bool bpf_map_is_rdonly(const struct bpf_map *map)
5267 {
5268 	/* A map is considered read-only if the following condition are true:
5269 	 *
5270 	 * 1) BPF program side cannot change any of the map content. The
5271 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5272 	 *    and was set at map creation time.
5273 	 * 2) The map value(s) have been initialized from user space by a
5274 	 *    loader and then "frozen", such that no new map update/delete
5275 	 *    operations from syscall side are possible for the rest of
5276 	 *    the map's lifetime from that point onwards.
5277 	 * 3) Any parallel/pending map update/delete operations from syscall
5278 	 *    side have been completed. Only after that point, it's safe to
5279 	 *    assume that map value(s) are immutable.
5280 	 */
5281 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5282 	       READ_ONCE(map->frozen) &&
5283 	       !bpf_map_write_active(map);
5284 }
5285 
5286 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
5287 {
5288 	void *ptr;
5289 	u64 addr;
5290 	int err;
5291 
5292 	err = map->ops->map_direct_value_addr(map, &addr, off);
5293 	if (err)
5294 		return err;
5295 	ptr = (void *)(long)addr + off;
5296 
5297 	switch (size) {
5298 	case sizeof(u8):
5299 		*val = (u64)*(u8 *)ptr;
5300 		break;
5301 	case sizeof(u16):
5302 		*val = (u64)*(u16 *)ptr;
5303 		break;
5304 	case sizeof(u32):
5305 		*val = (u64)*(u32 *)ptr;
5306 		break;
5307 	case sizeof(u64):
5308 		*val = *(u64 *)ptr;
5309 		break;
5310 	default:
5311 		return -EINVAL;
5312 	}
5313 	return 0;
5314 }
5315 
5316 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5317 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5318 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5319 
5320 /*
5321  * Allow list few fields as RCU trusted or full trusted.
5322  * This logic doesn't allow mix tagging and will be removed once GCC supports
5323  * btf_type_tag.
5324  */
5325 
5326 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5327 BTF_TYPE_SAFE_RCU(struct task_struct) {
5328 	const cpumask_t *cpus_ptr;
5329 	struct css_set __rcu *cgroups;
5330 	struct task_struct __rcu *real_parent;
5331 	struct task_struct *group_leader;
5332 };
5333 
5334 BTF_TYPE_SAFE_RCU(struct cgroup) {
5335 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5336 	struct kernfs_node *kn;
5337 };
5338 
5339 BTF_TYPE_SAFE_RCU(struct css_set) {
5340 	struct cgroup *dfl_cgrp;
5341 };
5342 
5343 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5344 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5345 	struct file __rcu *exe_file;
5346 };
5347 
5348 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5349  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5350  */
5351 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5352 	struct sock *sk;
5353 };
5354 
5355 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5356 	struct sock *sk;
5357 };
5358 
5359 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5360 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5361 	struct seq_file *seq;
5362 };
5363 
5364 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5365 	struct bpf_iter_meta *meta;
5366 	struct task_struct *task;
5367 };
5368 
5369 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5370 	struct file *file;
5371 };
5372 
5373 BTF_TYPE_SAFE_TRUSTED(struct file) {
5374 	struct inode *f_inode;
5375 };
5376 
5377 BTF_TYPE_SAFE_TRUSTED(struct dentry) {
5378 	/* no negative dentry-s in places where bpf can see it */
5379 	struct inode *d_inode;
5380 };
5381 
5382 BTF_TYPE_SAFE_TRUSTED(struct socket) {
5383 	struct sock *sk;
5384 };
5385 
5386 static bool type_is_rcu(struct bpf_verifier_env *env,
5387 			struct bpf_reg_state *reg,
5388 			const char *field_name, u32 btf_id)
5389 {
5390 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5391 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5392 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5393 
5394 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5395 }
5396 
5397 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5398 				struct bpf_reg_state *reg,
5399 				const char *field_name, u32 btf_id)
5400 {
5401 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5402 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5403 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5404 
5405 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5406 }
5407 
5408 static bool type_is_trusted(struct bpf_verifier_env *env,
5409 			    struct bpf_reg_state *reg,
5410 			    const char *field_name, u32 btf_id)
5411 {
5412 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5413 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5414 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5415 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5416 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
5417 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
5418 
5419 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5420 }
5421 
5422 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5423 				   struct bpf_reg_state *regs,
5424 				   int regno, int off, int size,
5425 				   enum bpf_access_type atype,
5426 				   int value_regno)
5427 {
5428 	struct bpf_reg_state *reg = regs + regno;
5429 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5430 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5431 	const char *field_name = NULL;
5432 	enum bpf_type_flag flag = 0;
5433 	u32 btf_id = 0;
5434 	int ret;
5435 
5436 	if (!env->allow_ptr_leaks) {
5437 		verbose(env,
5438 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5439 			tname);
5440 		return -EPERM;
5441 	}
5442 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5443 		verbose(env,
5444 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5445 			tname);
5446 		return -EINVAL;
5447 	}
5448 	if (off < 0) {
5449 		verbose(env,
5450 			"R%d is ptr_%s invalid negative access: off=%d\n",
5451 			regno, tname, off);
5452 		return -EACCES;
5453 	}
5454 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5455 		char tn_buf[48];
5456 
5457 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5458 		verbose(env,
5459 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5460 			regno, tname, off, tn_buf);
5461 		return -EACCES;
5462 	}
5463 
5464 	if (reg->type & MEM_USER) {
5465 		verbose(env,
5466 			"R%d is ptr_%s access user memory: off=%d\n",
5467 			regno, tname, off);
5468 		return -EACCES;
5469 	}
5470 
5471 	if (reg->type & MEM_PERCPU) {
5472 		verbose(env,
5473 			"R%d is ptr_%s access percpu memory: off=%d\n",
5474 			regno, tname, off);
5475 		return -EACCES;
5476 	}
5477 
5478 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
5479 		if (!btf_is_kernel(reg->btf)) {
5480 			verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
5481 			return -EFAULT;
5482 		}
5483 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
5484 	} else {
5485 		/* Writes are permitted with default btf_struct_access for
5486 		 * program allocated objects (which always have ref_obj_id > 0),
5487 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5488 		 */
5489 		if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
5490 			verbose(env, "only read is supported\n");
5491 			return -EACCES;
5492 		}
5493 
5494 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5495 		    !reg->ref_obj_id) {
5496 			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
5497 			return -EFAULT;
5498 		}
5499 
5500 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
5501 	}
5502 
5503 	if (ret < 0)
5504 		return ret;
5505 
5506 	if (ret != PTR_TO_BTF_ID) {
5507 		/* just mark; */
5508 
5509 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5510 		/* If this is an untrusted pointer, all pointers formed by walking it
5511 		 * also inherit the untrusted flag.
5512 		 */
5513 		flag = PTR_UNTRUSTED;
5514 
5515 	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
5516 		/* By default any pointer obtained from walking a trusted pointer is no
5517 		 * longer trusted, unless the field being accessed has explicitly been
5518 		 * marked as inheriting its parent's state of trust (either full or RCU).
5519 		 * For example:
5520 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
5521 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
5522 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5523 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5524 		 *
5525 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
5526 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
5527 		 */
5528 		if (type_is_trusted(env, reg, field_name, btf_id)) {
5529 			flag |= PTR_TRUSTED;
5530 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5531 			if (type_is_rcu(env, reg, field_name, btf_id)) {
5532 				/* ignore __rcu tag and mark it MEM_RCU */
5533 				flag |= MEM_RCU;
5534 			} else if (flag & MEM_RCU ||
5535 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
5536 				/* __rcu tagged pointers can be NULL */
5537 				flag |= MEM_RCU | PTR_MAYBE_NULL;
5538 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
5539 				/* keep as-is */
5540 			} else {
5541 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
5542 				clear_trusted_flags(&flag);
5543 			}
5544 		} else {
5545 			/*
5546 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
5547 			 * aggressively mark as untrusted otherwise such
5548 			 * pointers will be plain PTR_TO_BTF_ID without flags
5549 			 * and will be allowed to be passed into helpers for
5550 			 * compat reasons.
5551 			 */
5552 			flag = PTR_UNTRUSTED;
5553 		}
5554 	} else {
5555 		/* Old compat. Deprecated */
5556 		clear_trusted_flags(&flag);
5557 	}
5558 
5559 	if (atype == BPF_READ && value_regno >= 0)
5560 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5561 
5562 	return 0;
5563 }
5564 
5565 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5566 				   struct bpf_reg_state *regs,
5567 				   int regno, int off, int size,
5568 				   enum bpf_access_type atype,
5569 				   int value_regno)
5570 {
5571 	struct bpf_reg_state *reg = regs + regno;
5572 	struct bpf_map *map = reg->map_ptr;
5573 	struct bpf_reg_state map_reg;
5574 	enum bpf_type_flag flag = 0;
5575 	const struct btf_type *t;
5576 	const char *tname;
5577 	u32 btf_id;
5578 	int ret;
5579 
5580 	if (!btf_vmlinux) {
5581 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5582 		return -ENOTSUPP;
5583 	}
5584 
5585 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5586 		verbose(env, "map_ptr access not supported for map type %d\n",
5587 			map->map_type);
5588 		return -ENOTSUPP;
5589 	}
5590 
5591 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5592 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5593 
5594 	if (!env->allow_ptr_leaks) {
5595 		verbose(env,
5596 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5597 			tname);
5598 		return -EPERM;
5599 	}
5600 
5601 	if (off < 0) {
5602 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
5603 			regno, tname, off);
5604 		return -EACCES;
5605 	}
5606 
5607 	if (atype != BPF_READ) {
5608 		verbose(env, "only read from %s is supported\n", tname);
5609 		return -EACCES;
5610 	}
5611 
5612 	/* Simulate access to a PTR_TO_BTF_ID */
5613 	memset(&map_reg, 0, sizeof(map_reg));
5614 	mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
5615 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
5616 	if (ret < 0)
5617 		return ret;
5618 
5619 	if (value_regno >= 0)
5620 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5621 
5622 	return 0;
5623 }
5624 
5625 /* Check that the stack access at the given offset is within bounds. The
5626  * maximum valid offset is -1.
5627  *
5628  * The minimum valid offset is -MAX_BPF_STACK for writes, and
5629  * -state->allocated_stack for reads.
5630  */
5631 static int check_stack_slot_within_bounds(int off,
5632 					  struct bpf_func_state *state,
5633 					  enum bpf_access_type t)
5634 {
5635 	int min_valid_off;
5636 
5637 	if (t == BPF_WRITE)
5638 		min_valid_off = -MAX_BPF_STACK;
5639 	else
5640 		min_valid_off = -state->allocated_stack;
5641 
5642 	if (off < min_valid_off || off > -1)
5643 		return -EACCES;
5644 	return 0;
5645 }
5646 
5647 /* Check that the stack access at 'regno + off' falls within the maximum stack
5648  * bounds.
5649  *
5650  * 'off' includes `regno->offset`, but not its dynamic part (if any).
5651  */
5652 static int check_stack_access_within_bounds(
5653 		struct bpf_verifier_env *env,
5654 		int regno, int off, int access_size,
5655 		enum bpf_access_src src, enum bpf_access_type type)
5656 {
5657 	struct bpf_reg_state *regs = cur_regs(env);
5658 	struct bpf_reg_state *reg = regs + regno;
5659 	struct bpf_func_state *state = func(env, reg);
5660 	int min_off, max_off;
5661 	int err;
5662 	char *err_extra;
5663 
5664 	if (src == ACCESS_HELPER)
5665 		/* We don't know if helpers are reading or writing (or both). */
5666 		err_extra = " indirect access to";
5667 	else if (type == BPF_READ)
5668 		err_extra = " read from";
5669 	else
5670 		err_extra = " write to";
5671 
5672 	if (tnum_is_const(reg->var_off)) {
5673 		min_off = reg->var_off.value + off;
5674 		if (access_size > 0)
5675 			max_off = min_off + access_size - 1;
5676 		else
5677 			max_off = min_off;
5678 	} else {
5679 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
5680 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
5681 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
5682 				err_extra, regno);
5683 			return -EACCES;
5684 		}
5685 		min_off = reg->smin_value + off;
5686 		if (access_size > 0)
5687 			max_off = reg->smax_value + off + access_size - 1;
5688 		else
5689 			max_off = min_off;
5690 	}
5691 
5692 	err = check_stack_slot_within_bounds(min_off, state, type);
5693 	if (!err)
5694 		err = check_stack_slot_within_bounds(max_off, state, type);
5695 
5696 	if (err) {
5697 		if (tnum_is_const(reg->var_off)) {
5698 			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
5699 				err_extra, regno, off, access_size);
5700 		} else {
5701 			char tn_buf[48];
5702 
5703 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5704 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
5705 				err_extra, regno, tn_buf, access_size);
5706 		}
5707 	}
5708 	return err;
5709 }
5710 
5711 /* check whether memory at (regno + off) is accessible for t = (read | write)
5712  * if t==write, value_regno is a register which value is stored into memory
5713  * if t==read, value_regno is a register which will receive the value from memory
5714  * if t==write && value_regno==-1, some unknown value is stored into memory
5715  * if t==read && value_regno==-1, don't care what we read from memory
5716  */
5717 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
5718 			    int off, int bpf_size, enum bpf_access_type t,
5719 			    int value_regno, bool strict_alignment_once)
5720 {
5721 	struct bpf_reg_state *regs = cur_regs(env);
5722 	struct bpf_reg_state *reg = regs + regno;
5723 	struct bpf_func_state *state;
5724 	int size, err = 0;
5725 
5726 	size = bpf_size_to_bytes(bpf_size);
5727 	if (size < 0)
5728 		return size;
5729 
5730 	/* alignment checks will add in reg->off themselves */
5731 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
5732 	if (err)
5733 		return err;
5734 
5735 	/* for access checks, reg->off is just part of off */
5736 	off += reg->off;
5737 
5738 	if (reg->type == PTR_TO_MAP_KEY) {
5739 		if (t == BPF_WRITE) {
5740 			verbose(env, "write to change key R%d not allowed\n", regno);
5741 			return -EACCES;
5742 		}
5743 
5744 		err = check_mem_region_access(env, regno, off, size,
5745 					      reg->map_ptr->key_size, false);
5746 		if (err)
5747 			return err;
5748 		if (value_regno >= 0)
5749 			mark_reg_unknown(env, regs, value_regno);
5750 	} else if (reg->type == PTR_TO_MAP_VALUE) {
5751 		struct btf_field *kptr_field = NULL;
5752 
5753 		if (t == BPF_WRITE && value_regno >= 0 &&
5754 		    is_pointer_value(env, value_regno)) {
5755 			verbose(env, "R%d leaks addr into map\n", value_regno);
5756 			return -EACCES;
5757 		}
5758 		err = check_map_access_type(env, regno, off, size, t);
5759 		if (err)
5760 			return err;
5761 		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
5762 		if (err)
5763 			return err;
5764 		if (tnum_is_const(reg->var_off))
5765 			kptr_field = btf_record_find(reg->map_ptr->record,
5766 						     off + reg->var_off.value, BPF_KPTR);
5767 		if (kptr_field) {
5768 			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
5769 		} else if (t == BPF_READ && value_regno >= 0) {
5770 			struct bpf_map *map = reg->map_ptr;
5771 
5772 			/* if map is read-only, track its contents as scalars */
5773 			if (tnum_is_const(reg->var_off) &&
5774 			    bpf_map_is_rdonly(map) &&
5775 			    map->ops->map_direct_value_addr) {
5776 				int map_off = off + reg->var_off.value;
5777 				u64 val = 0;
5778 
5779 				err = bpf_map_direct_read(map, map_off, size,
5780 							  &val);
5781 				if (err)
5782 					return err;
5783 
5784 				regs[value_regno].type = SCALAR_VALUE;
5785 				__mark_reg_known(&regs[value_regno], val);
5786 			} else {
5787 				mark_reg_unknown(env, regs, value_regno);
5788 			}
5789 		}
5790 	} else if (base_type(reg->type) == PTR_TO_MEM) {
5791 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
5792 
5793 		if (type_may_be_null(reg->type)) {
5794 			verbose(env, "R%d invalid mem access '%s'\n", regno,
5795 				reg_type_str(env, reg->type));
5796 			return -EACCES;
5797 		}
5798 
5799 		if (t == BPF_WRITE && rdonly_mem) {
5800 			verbose(env, "R%d cannot write into %s\n",
5801 				regno, reg_type_str(env, reg->type));
5802 			return -EACCES;
5803 		}
5804 
5805 		if (t == BPF_WRITE && value_regno >= 0 &&
5806 		    is_pointer_value(env, value_regno)) {
5807 			verbose(env, "R%d leaks addr into mem\n", value_regno);
5808 			return -EACCES;
5809 		}
5810 
5811 		err = check_mem_region_access(env, regno, off, size,
5812 					      reg->mem_size, false);
5813 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
5814 			mark_reg_unknown(env, regs, value_regno);
5815 	} else if (reg->type == PTR_TO_CTX) {
5816 		enum bpf_reg_type reg_type = SCALAR_VALUE;
5817 		struct btf *btf = NULL;
5818 		u32 btf_id = 0;
5819 
5820 		if (t == BPF_WRITE && value_regno >= 0 &&
5821 		    is_pointer_value(env, value_regno)) {
5822 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
5823 			return -EACCES;
5824 		}
5825 
5826 		err = check_ptr_off_reg(env, reg, regno);
5827 		if (err < 0)
5828 			return err;
5829 
5830 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
5831 				       &btf_id);
5832 		if (err)
5833 			verbose_linfo(env, insn_idx, "; ");
5834 		if (!err && t == BPF_READ && value_regno >= 0) {
5835 			/* ctx access returns either a scalar, or a
5836 			 * PTR_TO_PACKET[_META,_END]. In the latter
5837 			 * case, we know the offset is zero.
5838 			 */
5839 			if (reg_type == SCALAR_VALUE) {
5840 				mark_reg_unknown(env, regs, value_regno);
5841 			} else {
5842 				mark_reg_known_zero(env, regs,
5843 						    value_regno);
5844 				if (type_may_be_null(reg_type))
5845 					regs[value_regno].id = ++env->id_gen;
5846 				/* A load of ctx field could have different
5847 				 * actual load size with the one encoded in the
5848 				 * insn. When the dst is PTR, it is for sure not
5849 				 * a sub-register.
5850 				 */
5851 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
5852 				if (base_type(reg_type) == PTR_TO_BTF_ID) {
5853 					regs[value_regno].btf = btf;
5854 					regs[value_regno].btf_id = btf_id;
5855 				}
5856 			}
5857 			regs[value_regno].type = reg_type;
5858 		}
5859 
5860 	} else if (reg->type == PTR_TO_STACK) {
5861 		/* Basic bounds checks. */
5862 		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
5863 		if (err)
5864 			return err;
5865 
5866 		state = func(env, reg);
5867 		err = update_stack_depth(env, state, off);
5868 		if (err)
5869 			return err;
5870 
5871 		if (t == BPF_READ)
5872 			err = check_stack_read(env, regno, off, size,
5873 					       value_regno);
5874 		else
5875 			err = check_stack_write(env, regno, off, size,
5876 						value_regno, insn_idx);
5877 	} else if (reg_is_pkt_pointer(reg)) {
5878 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
5879 			verbose(env, "cannot write into packet\n");
5880 			return -EACCES;
5881 		}
5882 		if (t == BPF_WRITE && value_regno >= 0 &&
5883 		    is_pointer_value(env, value_regno)) {
5884 			verbose(env, "R%d leaks addr into packet\n",
5885 				value_regno);
5886 			return -EACCES;
5887 		}
5888 		err = check_packet_access(env, regno, off, size, false);
5889 		if (!err && t == BPF_READ && value_regno >= 0)
5890 			mark_reg_unknown(env, regs, value_regno);
5891 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
5892 		if (t == BPF_WRITE && value_regno >= 0 &&
5893 		    is_pointer_value(env, value_regno)) {
5894 			verbose(env, "R%d leaks addr into flow keys\n",
5895 				value_regno);
5896 			return -EACCES;
5897 		}
5898 
5899 		err = check_flow_keys_access(env, off, size);
5900 		if (!err && t == BPF_READ && value_regno >= 0)
5901 			mark_reg_unknown(env, regs, value_regno);
5902 	} else if (type_is_sk_pointer(reg->type)) {
5903 		if (t == BPF_WRITE) {
5904 			verbose(env, "R%d cannot write into %s\n",
5905 				regno, reg_type_str(env, reg->type));
5906 			return -EACCES;
5907 		}
5908 		err = check_sock_access(env, insn_idx, regno, off, size, t);
5909 		if (!err && value_regno >= 0)
5910 			mark_reg_unknown(env, regs, value_regno);
5911 	} else if (reg->type == PTR_TO_TP_BUFFER) {
5912 		err = check_tp_buffer_access(env, reg, regno, off, size);
5913 		if (!err && t == BPF_READ && value_regno >= 0)
5914 			mark_reg_unknown(env, regs, value_regno);
5915 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
5916 		   !type_may_be_null(reg->type)) {
5917 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
5918 					      value_regno);
5919 	} else if (reg->type == CONST_PTR_TO_MAP) {
5920 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
5921 					      value_regno);
5922 	} else if (base_type(reg->type) == PTR_TO_BUF) {
5923 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
5924 		u32 *max_access;
5925 
5926 		if (rdonly_mem) {
5927 			if (t == BPF_WRITE) {
5928 				verbose(env, "R%d cannot write into %s\n",
5929 					regno, reg_type_str(env, reg->type));
5930 				return -EACCES;
5931 			}
5932 			max_access = &env->prog->aux->max_rdonly_access;
5933 		} else {
5934 			max_access = &env->prog->aux->max_rdwr_access;
5935 		}
5936 
5937 		err = check_buffer_access(env, reg, regno, off, size, false,
5938 					  max_access);
5939 
5940 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
5941 			mark_reg_unknown(env, regs, value_regno);
5942 	} else {
5943 		verbose(env, "R%d invalid mem access '%s'\n", regno,
5944 			reg_type_str(env, reg->type));
5945 		return -EACCES;
5946 	}
5947 
5948 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
5949 	    regs[value_regno].type == SCALAR_VALUE) {
5950 		/* b/h/w load zero-extends, mark upper bits as known 0 */
5951 		coerce_reg_to_size(&regs[value_regno], size);
5952 	}
5953 	return err;
5954 }
5955 
5956 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
5957 {
5958 	int load_reg;
5959 	int err;
5960 
5961 	switch (insn->imm) {
5962 	case BPF_ADD:
5963 	case BPF_ADD | BPF_FETCH:
5964 	case BPF_AND:
5965 	case BPF_AND | BPF_FETCH:
5966 	case BPF_OR:
5967 	case BPF_OR | BPF_FETCH:
5968 	case BPF_XOR:
5969 	case BPF_XOR | BPF_FETCH:
5970 	case BPF_XCHG:
5971 	case BPF_CMPXCHG:
5972 		break;
5973 	default:
5974 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
5975 		return -EINVAL;
5976 	}
5977 
5978 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
5979 		verbose(env, "invalid atomic operand size\n");
5980 		return -EINVAL;
5981 	}
5982 
5983 	/* check src1 operand */
5984 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
5985 	if (err)
5986 		return err;
5987 
5988 	/* check src2 operand */
5989 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5990 	if (err)
5991 		return err;
5992 
5993 	if (insn->imm == BPF_CMPXCHG) {
5994 		/* Check comparison of R0 with memory location */
5995 		const u32 aux_reg = BPF_REG_0;
5996 
5997 		err = check_reg_arg(env, aux_reg, SRC_OP);
5998 		if (err)
5999 			return err;
6000 
6001 		if (is_pointer_value(env, aux_reg)) {
6002 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6003 			return -EACCES;
6004 		}
6005 	}
6006 
6007 	if (is_pointer_value(env, insn->src_reg)) {
6008 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6009 		return -EACCES;
6010 	}
6011 
6012 	if (is_ctx_reg(env, insn->dst_reg) ||
6013 	    is_pkt_reg(env, insn->dst_reg) ||
6014 	    is_flow_key_reg(env, insn->dst_reg) ||
6015 	    is_sk_reg(env, insn->dst_reg)) {
6016 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6017 			insn->dst_reg,
6018 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6019 		return -EACCES;
6020 	}
6021 
6022 	if (insn->imm & BPF_FETCH) {
6023 		if (insn->imm == BPF_CMPXCHG)
6024 			load_reg = BPF_REG_0;
6025 		else
6026 			load_reg = insn->src_reg;
6027 
6028 		/* check and record load of old value */
6029 		err = check_reg_arg(env, load_reg, DST_OP);
6030 		if (err)
6031 			return err;
6032 	} else {
6033 		/* This instruction accesses a memory location but doesn't
6034 		 * actually load it into a register.
6035 		 */
6036 		load_reg = -1;
6037 	}
6038 
6039 	/* Check whether we can read the memory, with second call for fetch
6040 	 * case to simulate the register fill.
6041 	 */
6042 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6043 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
6044 	if (!err && load_reg >= 0)
6045 		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6046 				       BPF_SIZE(insn->code), BPF_READ, load_reg,
6047 				       true);
6048 	if (err)
6049 		return err;
6050 
6051 	/* Check whether we can write into the same memory. */
6052 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6053 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
6054 	if (err)
6055 		return err;
6056 
6057 	return 0;
6058 }
6059 
6060 /* When register 'regno' is used to read the stack (either directly or through
6061  * a helper function) make sure that it's within stack boundary and, depending
6062  * on the access type, that all elements of the stack are initialized.
6063  *
6064  * 'off' includes 'regno->off', but not its dynamic part (if any).
6065  *
6066  * All registers that have been spilled on the stack in the slots within the
6067  * read offsets are marked as read.
6068  */
6069 static int check_stack_range_initialized(
6070 		struct bpf_verifier_env *env, int regno, int off,
6071 		int access_size, bool zero_size_allowed,
6072 		enum bpf_access_src type, struct bpf_call_arg_meta *meta)
6073 {
6074 	struct bpf_reg_state *reg = reg_state(env, regno);
6075 	struct bpf_func_state *state = func(env, reg);
6076 	int err, min_off, max_off, i, j, slot, spi;
6077 	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
6078 	enum bpf_access_type bounds_check_type;
6079 	/* Some accesses can write anything into the stack, others are
6080 	 * read-only.
6081 	 */
6082 	bool clobber = false;
6083 
6084 	if (access_size == 0 && !zero_size_allowed) {
6085 		verbose(env, "invalid zero-sized read\n");
6086 		return -EACCES;
6087 	}
6088 
6089 	if (type == ACCESS_HELPER) {
6090 		/* The bounds checks for writes are more permissive than for
6091 		 * reads. However, if raw_mode is not set, we'll do extra
6092 		 * checks below.
6093 		 */
6094 		bounds_check_type = BPF_WRITE;
6095 		clobber = true;
6096 	} else {
6097 		bounds_check_type = BPF_READ;
6098 	}
6099 	err = check_stack_access_within_bounds(env, regno, off, access_size,
6100 					       type, bounds_check_type);
6101 	if (err)
6102 		return err;
6103 
6104 
6105 	if (tnum_is_const(reg->var_off)) {
6106 		min_off = max_off = reg->var_off.value + off;
6107 	} else {
6108 		/* Variable offset is prohibited for unprivileged mode for
6109 		 * simplicity since it requires corresponding support in
6110 		 * Spectre masking for stack ALU.
6111 		 * See also retrieve_ptr_limit().
6112 		 */
6113 		if (!env->bypass_spec_v1) {
6114 			char tn_buf[48];
6115 
6116 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6117 			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
6118 				regno, err_extra, tn_buf);
6119 			return -EACCES;
6120 		}
6121 		/* Only initialized buffer on stack is allowed to be accessed
6122 		 * with variable offset. With uninitialized buffer it's hard to
6123 		 * guarantee that whole memory is marked as initialized on
6124 		 * helper return since specific bounds are unknown what may
6125 		 * cause uninitialized stack leaking.
6126 		 */
6127 		if (meta && meta->raw_mode)
6128 			meta = NULL;
6129 
6130 		min_off = reg->smin_value + off;
6131 		max_off = reg->smax_value + off;
6132 	}
6133 
6134 	if (meta && meta->raw_mode) {
6135 		/* Ensure we won't be overwriting dynptrs when simulating byte
6136 		 * by byte access in check_helper_call using meta.access_size.
6137 		 * This would be a problem if we have a helper in the future
6138 		 * which takes:
6139 		 *
6140 		 *	helper(uninit_mem, len, dynptr)
6141 		 *
6142 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6143 		 * may end up writing to dynptr itself when touching memory from
6144 		 * arg 1. This can be relaxed on a case by case basis for known
6145 		 * safe cases, but reject due to the possibilitiy of aliasing by
6146 		 * default.
6147 		 */
6148 		for (i = min_off; i < max_off + access_size; i++) {
6149 			int stack_off = -i - 1;
6150 
6151 			spi = __get_spi(i);
6152 			/* raw_mode may write past allocated_stack */
6153 			if (state->allocated_stack <= stack_off)
6154 				continue;
6155 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6156 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6157 				return -EACCES;
6158 			}
6159 		}
6160 		meta->access_size = access_size;
6161 		meta->regno = regno;
6162 		return 0;
6163 	}
6164 
6165 	for (i = min_off; i < max_off + access_size; i++) {
6166 		u8 *stype;
6167 
6168 		slot = -i - 1;
6169 		spi = slot / BPF_REG_SIZE;
6170 		if (state->allocated_stack <= slot)
6171 			goto err;
6172 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6173 		if (*stype == STACK_MISC)
6174 			goto mark;
6175 		if ((*stype == STACK_ZERO) ||
6176 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6177 			if (clobber) {
6178 				/* helper can write anything into the stack */
6179 				*stype = STACK_MISC;
6180 			}
6181 			goto mark;
6182 		}
6183 
6184 		if (is_spilled_reg(&state->stack[spi]) &&
6185 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6186 		     env->allow_ptr_leaks)) {
6187 			if (clobber) {
6188 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6189 				for (j = 0; j < BPF_REG_SIZE; j++)
6190 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6191 			}
6192 			goto mark;
6193 		}
6194 
6195 err:
6196 		if (tnum_is_const(reg->var_off)) {
6197 			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
6198 				err_extra, regno, min_off, i - min_off, access_size);
6199 		} else {
6200 			char tn_buf[48];
6201 
6202 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6203 			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
6204 				err_extra, regno, tn_buf, i - min_off, access_size);
6205 		}
6206 		return -EACCES;
6207 mark:
6208 		/* reading any byte out of 8-byte 'spill_slot' will cause
6209 		 * the whole slot to be marked as 'read'
6210 		 */
6211 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
6212 			      state->stack[spi].spilled_ptr.parent,
6213 			      REG_LIVE_READ64);
6214 		/* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
6215 		 * be sure that whether stack slot is written to or not. Hence,
6216 		 * we must still conservatively propagate reads upwards even if
6217 		 * helper may write to the entire memory range.
6218 		 */
6219 	}
6220 	return update_stack_depth(env, state, min_off);
6221 }
6222 
6223 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
6224 				   int access_size, bool zero_size_allowed,
6225 				   struct bpf_call_arg_meta *meta)
6226 {
6227 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6228 	u32 *max_access;
6229 
6230 	switch (base_type(reg->type)) {
6231 	case PTR_TO_PACKET:
6232 	case PTR_TO_PACKET_META:
6233 		return check_packet_access(env, regno, reg->off, access_size,
6234 					   zero_size_allowed);
6235 	case PTR_TO_MAP_KEY:
6236 		if (meta && meta->raw_mode) {
6237 			verbose(env, "R%d cannot write into %s\n", regno,
6238 				reg_type_str(env, reg->type));
6239 			return -EACCES;
6240 		}
6241 		return check_mem_region_access(env, regno, reg->off, access_size,
6242 					       reg->map_ptr->key_size, false);
6243 	case PTR_TO_MAP_VALUE:
6244 		if (check_map_access_type(env, regno, reg->off, access_size,
6245 					  meta && meta->raw_mode ? BPF_WRITE :
6246 					  BPF_READ))
6247 			return -EACCES;
6248 		return check_map_access(env, regno, reg->off, access_size,
6249 					zero_size_allowed, ACCESS_HELPER);
6250 	case PTR_TO_MEM:
6251 		if (type_is_rdonly_mem(reg->type)) {
6252 			if (meta && meta->raw_mode) {
6253 				verbose(env, "R%d cannot write into %s\n", regno,
6254 					reg_type_str(env, reg->type));
6255 				return -EACCES;
6256 			}
6257 		}
6258 		return check_mem_region_access(env, regno, reg->off,
6259 					       access_size, reg->mem_size,
6260 					       zero_size_allowed);
6261 	case PTR_TO_BUF:
6262 		if (type_is_rdonly_mem(reg->type)) {
6263 			if (meta && meta->raw_mode) {
6264 				verbose(env, "R%d cannot write into %s\n", regno,
6265 					reg_type_str(env, reg->type));
6266 				return -EACCES;
6267 			}
6268 
6269 			max_access = &env->prog->aux->max_rdonly_access;
6270 		} else {
6271 			max_access = &env->prog->aux->max_rdwr_access;
6272 		}
6273 		return check_buffer_access(env, reg, regno, reg->off,
6274 					   access_size, zero_size_allowed,
6275 					   max_access);
6276 	case PTR_TO_STACK:
6277 		return check_stack_range_initialized(
6278 				env,
6279 				regno, reg->off, access_size,
6280 				zero_size_allowed, ACCESS_HELPER, meta);
6281 	case PTR_TO_BTF_ID:
6282 		return check_ptr_to_btf_access(env, regs, regno, reg->off,
6283 					       access_size, BPF_READ, -1);
6284 	case PTR_TO_CTX:
6285 		/* in case the function doesn't know how to access the context,
6286 		 * (because we are in a program of type SYSCALL for example), we
6287 		 * can not statically check its size.
6288 		 * Dynamically check it now.
6289 		 */
6290 		if (!env->ops->convert_ctx_access) {
6291 			enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
6292 			int offset = access_size - 1;
6293 
6294 			/* Allow zero-byte read from PTR_TO_CTX */
6295 			if (access_size == 0)
6296 				return zero_size_allowed ? 0 : -EACCES;
6297 
6298 			return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
6299 						atype, -1, false);
6300 		}
6301 
6302 		fallthrough;
6303 	default: /* scalar_value or invalid ptr */
6304 		/* Allow zero-byte read from NULL, regardless of pointer type */
6305 		if (zero_size_allowed && access_size == 0 &&
6306 		    register_is_null(reg))
6307 			return 0;
6308 
6309 		verbose(env, "R%d type=%s ", regno,
6310 			reg_type_str(env, reg->type));
6311 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6312 		return -EACCES;
6313 	}
6314 }
6315 
6316 static int check_mem_size_reg(struct bpf_verifier_env *env,
6317 			      struct bpf_reg_state *reg, u32 regno,
6318 			      bool zero_size_allowed,
6319 			      struct bpf_call_arg_meta *meta)
6320 {
6321 	int err;
6322 
6323 	/* This is used to refine r0 return value bounds for helpers
6324 	 * that enforce this value as an upper bound on return values.
6325 	 * See do_refine_retval_range() for helpers that can refine
6326 	 * the return value. C type of helper is u32 so we pull register
6327 	 * bound from umax_value however, if negative verifier errors
6328 	 * out. Only upper bounds can be learned because retval is an
6329 	 * int type and negative retvals are allowed.
6330 	 */
6331 	meta->msize_max_value = reg->umax_value;
6332 
6333 	/* The register is SCALAR_VALUE; the access check
6334 	 * happens using its boundaries.
6335 	 */
6336 	if (!tnum_is_const(reg->var_off))
6337 		/* For unprivileged variable accesses, disable raw
6338 		 * mode so that the program is required to
6339 		 * initialize all the memory that the helper could
6340 		 * just partially fill up.
6341 		 */
6342 		meta = NULL;
6343 
6344 	if (reg->smin_value < 0) {
6345 		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
6346 			regno);
6347 		return -EACCES;
6348 	}
6349 
6350 	if (reg->umin_value == 0) {
6351 		err = check_helper_mem_access(env, regno - 1, 0,
6352 					      zero_size_allowed,
6353 					      meta);
6354 		if (err)
6355 			return err;
6356 	}
6357 
6358 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
6359 		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6360 			regno);
6361 		return -EACCES;
6362 	}
6363 	err = check_helper_mem_access(env, regno - 1,
6364 				      reg->umax_value,
6365 				      zero_size_allowed, meta);
6366 	if (!err)
6367 		err = mark_chain_precision(env, regno);
6368 	return err;
6369 }
6370 
6371 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6372 		   u32 regno, u32 mem_size)
6373 {
6374 	bool may_be_null = type_may_be_null(reg->type);
6375 	struct bpf_reg_state saved_reg;
6376 	struct bpf_call_arg_meta meta;
6377 	int err;
6378 
6379 	if (register_is_null(reg))
6380 		return 0;
6381 
6382 	memset(&meta, 0, sizeof(meta));
6383 	/* Assuming that the register contains a value check if the memory
6384 	 * access is safe. Temporarily save and restore the register's state as
6385 	 * the conversion shouldn't be visible to a caller.
6386 	 */
6387 	if (may_be_null) {
6388 		saved_reg = *reg;
6389 		mark_ptr_not_null_reg(reg);
6390 	}
6391 
6392 	err = check_helper_mem_access(env, regno, mem_size, true, &meta);
6393 	/* Check access for BPF_WRITE */
6394 	meta.raw_mode = true;
6395 	err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
6396 
6397 	if (may_be_null)
6398 		*reg = saved_reg;
6399 
6400 	return err;
6401 }
6402 
6403 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6404 				    u32 regno)
6405 {
6406 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
6407 	bool may_be_null = type_may_be_null(mem_reg->type);
6408 	struct bpf_reg_state saved_reg;
6409 	struct bpf_call_arg_meta meta;
6410 	int err;
6411 
6412 	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
6413 
6414 	memset(&meta, 0, sizeof(meta));
6415 
6416 	if (may_be_null) {
6417 		saved_reg = *mem_reg;
6418 		mark_ptr_not_null_reg(mem_reg);
6419 	}
6420 
6421 	err = check_mem_size_reg(env, reg, regno, true, &meta);
6422 	/* Check access for BPF_WRITE */
6423 	meta.raw_mode = true;
6424 	err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
6425 
6426 	if (may_be_null)
6427 		*mem_reg = saved_reg;
6428 	return err;
6429 }
6430 
6431 /* Implementation details:
6432  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6433  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6434  * Two bpf_map_lookups (even with the same key) will have different reg->id.
6435  * Two separate bpf_obj_new will also have different reg->id.
6436  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6437  * clears reg->id after value_or_null->value transition, since the verifier only
6438  * cares about the range of access to valid map value pointer and doesn't care
6439  * about actual address of the map element.
6440  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6441  * reg->id > 0 after value_or_null->value transition. By doing so
6442  * two bpf_map_lookups will be considered two different pointers that
6443  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6444  * returned from bpf_obj_new.
6445  * The verifier allows taking only one bpf_spin_lock at a time to avoid
6446  * dead-locks.
6447  * Since only one bpf_spin_lock is allowed the checks are simpler than
6448  * reg_is_refcounted() logic. The verifier needs to remember only
6449  * one spin_lock instead of array of acquired_refs.
6450  * cur_state->active_lock remembers which map value element or allocated
6451  * object got locked and clears it after bpf_spin_unlock.
6452  */
6453 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
6454 			     bool is_lock)
6455 {
6456 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6457 	struct bpf_verifier_state *cur = env->cur_state;
6458 	bool is_const = tnum_is_const(reg->var_off);
6459 	u64 val = reg->var_off.value;
6460 	struct bpf_map *map = NULL;
6461 	struct btf *btf = NULL;
6462 	struct btf_record *rec;
6463 
6464 	if (!is_const) {
6465 		verbose(env,
6466 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
6467 			regno);
6468 		return -EINVAL;
6469 	}
6470 	if (reg->type == PTR_TO_MAP_VALUE) {
6471 		map = reg->map_ptr;
6472 		if (!map->btf) {
6473 			verbose(env,
6474 				"map '%s' has to have BTF in order to use bpf_spin_lock\n",
6475 				map->name);
6476 			return -EINVAL;
6477 		}
6478 	} else {
6479 		btf = reg->btf;
6480 	}
6481 
6482 	rec = reg_btf_record(reg);
6483 	if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
6484 		verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
6485 			map ? map->name : "kptr");
6486 		return -EINVAL;
6487 	}
6488 	if (rec->spin_lock_off != val + reg->off) {
6489 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
6490 			val + reg->off, rec->spin_lock_off);
6491 		return -EINVAL;
6492 	}
6493 	if (is_lock) {
6494 		if (cur->active_lock.ptr) {
6495 			verbose(env,
6496 				"Locking two bpf_spin_locks are not allowed\n");
6497 			return -EINVAL;
6498 		}
6499 		if (map)
6500 			cur->active_lock.ptr = map;
6501 		else
6502 			cur->active_lock.ptr = btf;
6503 		cur->active_lock.id = reg->id;
6504 	} else {
6505 		void *ptr;
6506 
6507 		if (map)
6508 			ptr = map;
6509 		else
6510 			ptr = btf;
6511 
6512 		if (!cur->active_lock.ptr) {
6513 			verbose(env, "bpf_spin_unlock without taking a lock\n");
6514 			return -EINVAL;
6515 		}
6516 		if (cur->active_lock.ptr != ptr ||
6517 		    cur->active_lock.id != reg->id) {
6518 			verbose(env, "bpf_spin_unlock of different lock\n");
6519 			return -EINVAL;
6520 		}
6521 
6522 		invalidate_non_owning_refs(env);
6523 
6524 		cur->active_lock.ptr = NULL;
6525 		cur->active_lock.id = 0;
6526 	}
6527 	return 0;
6528 }
6529 
6530 static int process_timer_func(struct bpf_verifier_env *env, int regno,
6531 			      struct bpf_call_arg_meta *meta)
6532 {
6533 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6534 	bool is_const = tnum_is_const(reg->var_off);
6535 	struct bpf_map *map = reg->map_ptr;
6536 	u64 val = reg->var_off.value;
6537 
6538 	if (!is_const) {
6539 		verbose(env,
6540 			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
6541 			regno);
6542 		return -EINVAL;
6543 	}
6544 	if (!map->btf) {
6545 		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
6546 			map->name);
6547 		return -EINVAL;
6548 	}
6549 	if (!btf_record_has_field(map->record, BPF_TIMER)) {
6550 		verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
6551 		return -EINVAL;
6552 	}
6553 	if (map->record->timer_off != val + reg->off) {
6554 		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
6555 			val + reg->off, map->record->timer_off);
6556 		return -EINVAL;
6557 	}
6558 	if (meta->map_ptr) {
6559 		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
6560 		return -EFAULT;
6561 	}
6562 	meta->map_uid = reg->map_uid;
6563 	meta->map_ptr = map;
6564 	return 0;
6565 }
6566 
6567 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
6568 			     struct bpf_call_arg_meta *meta)
6569 {
6570 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6571 	struct bpf_map *map_ptr = reg->map_ptr;
6572 	struct btf_field *kptr_field;
6573 	u32 kptr_off;
6574 
6575 	if (!tnum_is_const(reg->var_off)) {
6576 		verbose(env,
6577 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
6578 			regno);
6579 		return -EINVAL;
6580 	}
6581 	if (!map_ptr->btf) {
6582 		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
6583 			map_ptr->name);
6584 		return -EINVAL;
6585 	}
6586 	if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
6587 		verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
6588 		return -EINVAL;
6589 	}
6590 
6591 	meta->map_ptr = map_ptr;
6592 	kptr_off = reg->off + reg->var_off.value;
6593 	kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
6594 	if (!kptr_field) {
6595 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
6596 		return -EACCES;
6597 	}
6598 	if (kptr_field->type != BPF_KPTR_REF) {
6599 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
6600 		return -EACCES;
6601 	}
6602 	meta->kptr_field = kptr_field;
6603 	return 0;
6604 }
6605 
6606 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
6607  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
6608  *
6609  * In both cases we deal with the first 8 bytes, but need to mark the next 8
6610  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
6611  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
6612  *
6613  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
6614  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
6615  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
6616  * mutate the view of the dynptr and also possibly destroy it. In the latter
6617  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
6618  * memory that dynptr points to.
6619  *
6620  * The verifier will keep track both levels of mutation (bpf_dynptr's in
6621  * reg->type and the memory's in reg->dynptr.type), but there is no support for
6622  * readonly dynptr view yet, hence only the first case is tracked and checked.
6623  *
6624  * This is consistent with how C applies the const modifier to a struct object,
6625  * where the pointer itself inside bpf_dynptr becomes const but not what it
6626  * points to.
6627  *
6628  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
6629  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
6630  */
6631 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
6632 			       enum bpf_arg_type arg_type)
6633 {
6634 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6635 	int err;
6636 
6637 	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
6638 	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
6639 	 */
6640 	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
6641 		verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
6642 		return -EFAULT;
6643 	}
6644 
6645 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
6646 	 *		 constructing a mutable bpf_dynptr object.
6647 	 *
6648 	 *		 Currently, this is only possible with PTR_TO_STACK
6649 	 *		 pointing to a region of at least 16 bytes which doesn't
6650 	 *		 contain an existing bpf_dynptr.
6651 	 *
6652 	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
6653 	 *		 mutated or destroyed. However, the memory it points to
6654 	 *		 may be mutated.
6655 	 *
6656 	 *  None       - Points to a initialized dynptr that can be mutated and
6657 	 *		 destroyed, including mutation of the memory it points
6658 	 *		 to.
6659 	 */
6660 	if (arg_type & MEM_UNINIT) {
6661 		int i;
6662 
6663 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
6664 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
6665 			return -EINVAL;
6666 		}
6667 
6668 		/* we write BPF_DW bits (8 bytes) at a time */
6669 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
6670 			err = check_mem_access(env, insn_idx, regno,
6671 					       i, BPF_DW, BPF_WRITE, -1, false);
6672 			if (err)
6673 				return err;
6674 		}
6675 
6676 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
6677 	} else /* MEM_RDONLY and None case from above */ {
6678 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
6679 		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
6680 			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
6681 			return -EINVAL;
6682 		}
6683 
6684 		if (!is_dynptr_reg_valid_init(env, reg)) {
6685 			verbose(env,
6686 				"Expected an initialized dynptr as arg #%d\n",
6687 				regno);
6688 			return -EINVAL;
6689 		}
6690 
6691 		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
6692 		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
6693 			verbose(env,
6694 				"Expected a dynptr of type %s as arg #%d\n",
6695 				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
6696 			return -EINVAL;
6697 		}
6698 
6699 		err = mark_dynptr_read(env, reg);
6700 	}
6701 	return err;
6702 }
6703 
6704 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
6705 {
6706 	struct bpf_func_state *state = func(env, reg);
6707 
6708 	return state->stack[spi].spilled_ptr.ref_obj_id;
6709 }
6710 
6711 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
6712 {
6713 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
6714 }
6715 
6716 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
6717 {
6718 	return meta->kfunc_flags & KF_ITER_NEW;
6719 }
6720 
6721 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
6722 {
6723 	return meta->kfunc_flags & KF_ITER_NEXT;
6724 }
6725 
6726 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
6727 {
6728 	return meta->kfunc_flags & KF_ITER_DESTROY;
6729 }
6730 
6731 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
6732 {
6733 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
6734 	 * kfunc is iter state pointer
6735 	 */
6736 	return arg == 0 && is_iter_kfunc(meta);
6737 }
6738 
6739 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
6740 			    struct bpf_kfunc_call_arg_meta *meta)
6741 {
6742 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6743 	const struct btf_type *t;
6744 	const struct btf_param *arg;
6745 	int spi, err, i, nr_slots;
6746 	u32 btf_id;
6747 
6748 	/* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
6749 	arg = &btf_params(meta->func_proto)[0];
6750 	t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);	/* PTR */
6751 	t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);	/* STRUCT */
6752 	nr_slots = t->size / BPF_REG_SIZE;
6753 
6754 	if (is_iter_new_kfunc(meta)) {
6755 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
6756 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
6757 			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
6758 				iter_type_str(meta->btf, btf_id), regno);
6759 			return -EINVAL;
6760 		}
6761 
6762 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
6763 			err = check_mem_access(env, insn_idx, regno,
6764 					       i, BPF_DW, BPF_WRITE, -1, false);
6765 			if (err)
6766 				return err;
6767 		}
6768 
6769 		err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
6770 		if (err)
6771 			return err;
6772 	} else {
6773 		/* iter_next() or iter_destroy() expect initialized iter state*/
6774 		if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
6775 			verbose(env, "expected an initialized iter_%s as arg #%d\n",
6776 				iter_type_str(meta->btf, btf_id), regno);
6777 			return -EINVAL;
6778 		}
6779 
6780 		spi = iter_get_spi(env, reg, nr_slots);
6781 		if (spi < 0)
6782 			return spi;
6783 
6784 		err = mark_iter_read(env, reg, spi, nr_slots);
6785 		if (err)
6786 			return err;
6787 
6788 		/* remember meta->iter info for process_iter_next_call() */
6789 		meta->iter.spi = spi;
6790 		meta->iter.frameno = reg->frameno;
6791 		meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
6792 
6793 		if (is_iter_destroy_kfunc(meta)) {
6794 			err = unmark_stack_slots_iter(env, reg, nr_slots);
6795 			if (err)
6796 				return err;
6797 		}
6798 	}
6799 
6800 	return 0;
6801 }
6802 
6803 /* process_iter_next_call() is called when verifier gets to iterator's next
6804  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
6805  * to it as just "iter_next()" in comments below.
6806  *
6807  * BPF verifier relies on a crucial contract for any iter_next()
6808  * implementation: it should *eventually* return NULL, and once that happens
6809  * it should keep returning NULL. That is, once iterator exhausts elements to
6810  * iterate, it should never reset or spuriously return new elements.
6811  *
6812  * With the assumption of such contract, process_iter_next_call() simulates
6813  * a fork in the verifier state to validate loop logic correctness and safety
6814  * without having to simulate infinite amount of iterations.
6815  *
6816  * In current state, we first assume that iter_next() returned NULL and
6817  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
6818  * conditions we should not form an infinite loop and should eventually reach
6819  * exit.
6820  *
6821  * Besides that, we also fork current state and enqueue it for later
6822  * verification. In a forked state we keep iterator state as ACTIVE
6823  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
6824  * also bump iteration depth to prevent erroneous infinite loop detection
6825  * later on (see iter_active_depths_differ() comment for details). In this
6826  * state we assume that we'll eventually loop back to another iter_next()
6827  * calls (it could be in exactly same location or in some other instruction,
6828  * it doesn't matter, we don't make any unnecessary assumptions about this,
6829  * everything revolves around iterator state in a stack slot, not which
6830  * instruction is calling iter_next()). When that happens, we either will come
6831  * to iter_next() with equivalent state and can conclude that next iteration
6832  * will proceed in exactly the same way as we just verified, so it's safe to
6833  * assume that loop converges. If not, we'll go on another iteration
6834  * simulation with a different input state, until all possible starting states
6835  * are validated or we reach maximum number of instructions limit.
6836  *
6837  * This way, we will either exhaustively discover all possible input states
6838  * that iterator loop can start with and eventually will converge, or we'll
6839  * effectively regress into bounded loop simulation logic and either reach
6840  * maximum number of instructions if loop is not provably convergent, or there
6841  * is some statically known limit on number of iterations (e.g., if there is
6842  * an explicit `if n > 100 then break;` statement somewhere in the loop).
6843  *
6844  * One very subtle but very important aspect is that we *always* simulate NULL
6845  * condition first (as the current state) before we simulate non-NULL case.
6846  * This has to do with intricacies of scalar precision tracking. By simulating
6847  * "exit condition" of iter_next() returning NULL first, we make sure all the
6848  * relevant precision marks *that will be set **after** we exit iterator loop*
6849  * are propagated backwards to common parent state of NULL and non-NULL
6850  * branches. Thanks to that, state equivalence checks done later in forked
6851  * state, when reaching iter_next() for ACTIVE iterator, can assume that
6852  * precision marks are finalized and won't change. Because simulating another
6853  * ACTIVE iterator iteration won't change them (because given same input
6854  * states we'll end up with exactly same output states which we are currently
6855  * comparing; and verification after the loop already propagated back what
6856  * needs to be **additionally** tracked as precise). It's subtle, grok
6857  * precision tracking for more intuitive understanding.
6858  */
6859 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
6860 				  struct bpf_kfunc_call_arg_meta *meta)
6861 {
6862 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
6863 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
6864 	struct bpf_reg_state *cur_iter, *queued_iter;
6865 	int iter_frameno = meta->iter.frameno;
6866 	int iter_spi = meta->iter.spi;
6867 
6868 	BTF_TYPE_EMIT(struct bpf_iter);
6869 
6870 	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
6871 
6872 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
6873 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
6874 		verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
6875 			cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
6876 		return -EFAULT;
6877 	}
6878 
6879 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
6880 		/* branch out active iter state */
6881 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
6882 		if (!queued_st)
6883 			return -ENOMEM;
6884 
6885 		queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
6886 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
6887 		queued_iter->iter.depth++;
6888 
6889 		queued_fr = queued_st->frame[queued_st->curframe];
6890 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
6891 	}
6892 
6893 	/* switch to DRAINED state, but keep the depth unchanged */
6894 	/* mark current iter state as drained and assume returned NULL */
6895 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
6896 	__mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
6897 
6898 	return 0;
6899 }
6900 
6901 static bool arg_type_is_mem_size(enum bpf_arg_type type)
6902 {
6903 	return type == ARG_CONST_SIZE ||
6904 	       type == ARG_CONST_SIZE_OR_ZERO;
6905 }
6906 
6907 static bool arg_type_is_release(enum bpf_arg_type type)
6908 {
6909 	return type & OBJ_RELEASE;
6910 }
6911 
6912 static bool arg_type_is_dynptr(enum bpf_arg_type type)
6913 {
6914 	return base_type(type) == ARG_PTR_TO_DYNPTR;
6915 }
6916 
6917 static int int_ptr_type_to_size(enum bpf_arg_type type)
6918 {
6919 	if (type == ARG_PTR_TO_INT)
6920 		return sizeof(u32);
6921 	else if (type == ARG_PTR_TO_LONG)
6922 		return sizeof(u64);
6923 
6924 	return -EINVAL;
6925 }
6926 
6927 static int resolve_map_arg_type(struct bpf_verifier_env *env,
6928 				 const struct bpf_call_arg_meta *meta,
6929 				 enum bpf_arg_type *arg_type)
6930 {
6931 	if (!meta->map_ptr) {
6932 		/* kernel subsystem misconfigured verifier */
6933 		verbose(env, "invalid map_ptr to access map->type\n");
6934 		return -EACCES;
6935 	}
6936 
6937 	switch (meta->map_ptr->map_type) {
6938 	case BPF_MAP_TYPE_SOCKMAP:
6939 	case BPF_MAP_TYPE_SOCKHASH:
6940 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
6941 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
6942 		} else {
6943 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
6944 			return -EINVAL;
6945 		}
6946 		break;
6947 	case BPF_MAP_TYPE_BLOOM_FILTER:
6948 		if (meta->func_id == BPF_FUNC_map_peek_elem)
6949 			*arg_type = ARG_PTR_TO_MAP_VALUE;
6950 		break;
6951 	default:
6952 		break;
6953 	}
6954 	return 0;
6955 }
6956 
6957 struct bpf_reg_types {
6958 	const enum bpf_reg_type types[10];
6959 	u32 *btf_id;
6960 };
6961 
6962 static const struct bpf_reg_types sock_types = {
6963 	.types = {
6964 		PTR_TO_SOCK_COMMON,
6965 		PTR_TO_SOCKET,
6966 		PTR_TO_TCP_SOCK,
6967 		PTR_TO_XDP_SOCK,
6968 	},
6969 };
6970 
6971 #ifdef CONFIG_NET
6972 static const struct bpf_reg_types btf_id_sock_common_types = {
6973 	.types = {
6974 		PTR_TO_SOCK_COMMON,
6975 		PTR_TO_SOCKET,
6976 		PTR_TO_TCP_SOCK,
6977 		PTR_TO_XDP_SOCK,
6978 		PTR_TO_BTF_ID,
6979 		PTR_TO_BTF_ID | PTR_TRUSTED,
6980 	},
6981 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6982 };
6983 #endif
6984 
6985 static const struct bpf_reg_types mem_types = {
6986 	.types = {
6987 		PTR_TO_STACK,
6988 		PTR_TO_PACKET,
6989 		PTR_TO_PACKET_META,
6990 		PTR_TO_MAP_KEY,
6991 		PTR_TO_MAP_VALUE,
6992 		PTR_TO_MEM,
6993 		PTR_TO_MEM | MEM_RINGBUF,
6994 		PTR_TO_BUF,
6995 		PTR_TO_BTF_ID | PTR_TRUSTED,
6996 	},
6997 };
6998 
6999 static const struct bpf_reg_types int_ptr_types = {
7000 	.types = {
7001 		PTR_TO_STACK,
7002 		PTR_TO_PACKET,
7003 		PTR_TO_PACKET_META,
7004 		PTR_TO_MAP_KEY,
7005 		PTR_TO_MAP_VALUE,
7006 	},
7007 };
7008 
7009 static const struct bpf_reg_types spin_lock_types = {
7010 	.types = {
7011 		PTR_TO_MAP_VALUE,
7012 		PTR_TO_BTF_ID | MEM_ALLOC,
7013 	}
7014 };
7015 
7016 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7017 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7018 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7019 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7020 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7021 static const struct bpf_reg_types btf_ptr_types = {
7022 	.types = {
7023 		PTR_TO_BTF_ID,
7024 		PTR_TO_BTF_ID | PTR_TRUSTED,
7025 		PTR_TO_BTF_ID | MEM_RCU,
7026 	},
7027 };
7028 static const struct bpf_reg_types percpu_btf_ptr_types = {
7029 	.types = {
7030 		PTR_TO_BTF_ID | MEM_PERCPU,
7031 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7032 	}
7033 };
7034 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7035 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7036 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7037 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7038 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
7039 static const struct bpf_reg_types dynptr_types = {
7040 	.types = {
7041 		PTR_TO_STACK,
7042 		CONST_PTR_TO_DYNPTR,
7043 	}
7044 };
7045 
7046 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7047 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7048 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7049 	[ARG_CONST_SIZE]		= &scalar_types,
7050 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7051 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7052 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7053 	[ARG_PTR_TO_CTX]		= &context_types,
7054 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7055 #ifdef CONFIG_NET
7056 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7057 #endif
7058 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7059 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7060 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7061 	[ARG_PTR_TO_MEM]		= &mem_types,
7062 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7063 	[ARG_PTR_TO_INT]		= &int_ptr_types,
7064 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
7065 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7066 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7067 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7068 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7069 	[ARG_PTR_TO_TIMER]		= &timer_types,
7070 	[ARG_PTR_TO_KPTR]		= &kptr_types,
7071 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7072 };
7073 
7074 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
7075 			  enum bpf_arg_type arg_type,
7076 			  const u32 *arg_btf_id,
7077 			  struct bpf_call_arg_meta *meta)
7078 {
7079 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7080 	enum bpf_reg_type expected, type = reg->type;
7081 	const struct bpf_reg_types *compatible;
7082 	int i, j;
7083 
7084 	compatible = compatible_reg_types[base_type(arg_type)];
7085 	if (!compatible) {
7086 		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
7087 		return -EFAULT;
7088 	}
7089 
7090 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7091 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7092 	 *
7093 	 * Same for MAYBE_NULL:
7094 	 *
7095 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7096 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7097 	 *
7098 	 * Therefore we fold these flags depending on the arg_type before comparison.
7099 	 */
7100 	if (arg_type & MEM_RDONLY)
7101 		type &= ~MEM_RDONLY;
7102 	if (arg_type & PTR_MAYBE_NULL)
7103 		type &= ~PTR_MAYBE_NULL;
7104 
7105 	if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC)
7106 		type &= ~MEM_ALLOC;
7107 
7108 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7109 		expected = compatible->types[i];
7110 		if (expected == NOT_INIT)
7111 			break;
7112 
7113 		if (type == expected)
7114 			goto found;
7115 	}
7116 
7117 	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
7118 	for (j = 0; j + 1 < i; j++)
7119 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7120 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7121 	return -EACCES;
7122 
7123 found:
7124 	if (base_type(reg->type) != PTR_TO_BTF_ID)
7125 		return 0;
7126 
7127 	if (compatible == &mem_types) {
7128 		if (!(arg_type & MEM_RDONLY)) {
7129 			verbose(env,
7130 				"%s() may write into memory pointed by R%d type=%s\n",
7131 				func_id_name(meta->func_id),
7132 				regno, reg_type_str(env, reg->type));
7133 			return -EACCES;
7134 		}
7135 		return 0;
7136 	}
7137 
7138 	switch ((int)reg->type) {
7139 	case PTR_TO_BTF_ID:
7140 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7141 	case PTR_TO_BTF_ID | MEM_RCU:
7142 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7143 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7144 	{
7145 		/* For bpf_sk_release, it needs to match against first member
7146 		 * 'struct sock_common', hence make an exception for it. This
7147 		 * allows bpf_sk_release to work for multiple socket types.
7148 		 */
7149 		bool strict_type_match = arg_type_is_release(arg_type) &&
7150 					 meta->func_id != BPF_FUNC_sk_release;
7151 
7152 		if (type_may_be_null(reg->type) &&
7153 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7154 			verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
7155 			return -EACCES;
7156 		}
7157 
7158 		if (!arg_btf_id) {
7159 			if (!compatible->btf_id) {
7160 				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
7161 				return -EFAULT;
7162 			}
7163 			arg_btf_id = compatible->btf_id;
7164 		}
7165 
7166 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7167 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
7168 				return -EACCES;
7169 		} else {
7170 			if (arg_btf_id == BPF_PTR_POISON) {
7171 				verbose(env, "verifier internal error:");
7172 				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
7173 					regno);
7174 				return -EACCES;
7175 			}
7176 
7177 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
7178 						  btf_vmlinux, *arg_btf_id,
7179 						  strict_type_match)) {
7180 				verbose(env, "R%d is of type %s but %s is expected\n",
7181 					regno, btf_type_name(reg->btf, reg->btf_id),
7182 					btf_type_name(btf_vmlinux, *arg_btf_id));
7183 				return -EACCES;
7184 			}
7185 		}
7186 		break;
7187 	}
7188 	case PTR_TO_BTF_ID | MEM_ALLOC:
7189 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7190 		    meta->func_id != BPF_FUNC_kptr_xchg) {
7191 			verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
7192 			return -EFAULT;
7193 		}
7194 		/* Handled by helper specific checks */
7195 		break;
7196 	case PTR_TO_BTF_ID | MEM_PERCPU:
7197 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7198 		/* Handled by helper specific checks */
7199 		break;
7200 	default:
7201 		verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
7202 		return -EFAULT;
7203 	}
7204 	return 0;
7205 }
7206 
7207 static struct btf_field *
7208 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7209 {
7210 	struct btf_field *field;
7211 	struct btf_record *rec;
7212 
7213 	rec = reg_btf_record(reg);
7214 	if (!rec)
7215 		return NULL;
7216 
7217 	field = btf_record_find(rec, off, fields);
7218 	if (!field)
7219 		return NULL;
7220 
7221 	return field;
7222 }
7223 
7224 int check_func_arg_reg_off(struct bpf_verifier_env *env,
7225 			   const struct bpf_reg_state *reg, int regno,
7226 			   enum bpf_arg_type arg_type)
7227 {
7228 	u32 type = reg->type;
7229 
7230 	/* When referenced register is passed to release function, its fixed
7231 	 * offset must be 0.
7232 	 *
7233 	 * We will check arg_type_is_release reg has ref_obj_id when storing
7234 	 * meta->release_regno.
7235 	 */
7236 	if (arg_type_is_release(arg_type)) {
7237 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
7238 		 * may not directly point to the object being released, but to
7239 		 * dynptr pointing to such object, which might be at some offset
7240 		 * on the stack. In that case, we simply to fallback to the
7241 		 * default handling.
7242 		 */
7243 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
7244 			return 0;
7245 
7246 		if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
7247 			if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
7248 				return __check_ptr_off_reg(env, reg, regno, true);
7249 
7250 			verbose(env, "R%d must have zero offset when passed to release func\n",
7251 				regno);
7252 			verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
7253 				btf_type_name(reg->btf, reg->btf_id), reg->off);
7254 			return -EINVAL;
7255 		}
7256 
7257 		/* Doing check_ptr_off_reg check for the offset will catch this
7258 		 * because fixed_off_ok is false, but checking here allows us
7259 		 * to give the user a better error message.
7260 		 */
7261 		if (reg->off) {
7262 			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
7263 				regno);
7264 			return -EINVAL;
7265 		}
7266 		return __check_ptr_off_reg(env, reg, regno, false);
7267 	}
7268 
7269 	switch (type) {
7270 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
7271 	case PTR_TO_STACK:
7272 	case PTR_TO_PACKET:
7273 	case PTR_TO_PACKET_META:
7274 	case PTR_TO_MAP_KEY:
7275 	case PTR_TO_MAP_VALUE:
7276 	case PTR_TO_MEM:
7277 	case PTR_TO_MEM | MEM_RDONLY:
7278 	case PTR_TO_MEM | MEM_RINGBUF:
7279 	case PTR_TO_BUF:
7280 	case PTR_TO_BUF | MEM_RDONLY:
7281 	case SCALAR_VALUE:
7282 		return 0;
7283 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
7284 	 * fixed offset.
7285 	 */
7286 	case PTR_TO_BTF_ID:
7287 	case PTR_TO_BTF_ID | MEM_ALLOC:
7288 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7289 	case PTR_TO_BTF_ID | MEM_RCU:
7290 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
7291 		/* When referenced PTR_TO_BTF_ID is passed to release function,
7292 		 * its fixed offset must be 0. In the other cases, fixed offset
7293 		 * can be non-zero. This was already checked above. So pass
7294 		 * fixed_off_ok as true to allow fixed offset for all other
7295 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
7296 		 * still need to do checks instead of returning.
7297 		 */
7298 		return __check_ptr_off_reg(env, reg, regno, true);
7299 	default:
7300 		return __check_ptr_off_reg(env, reg, regno, false);
7301 	}
7302 }
7303 
7304 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
7305 						const struct bpf_func_proto *fn,
7306 						struct bpf_reg_state *regs)
7307 {
7308 	struct bpf_reg_state *state = NULL;
7309 	int i;
7310 
7311 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
7312 		if (arg_type_is_dynptr(fn->arg_type[i])) {
7313 			if (state) {
7314 				verbose(env, "verifier internal error: multiple dynptr args\n");
7315 				return NULL;
7316 			}
7317 			state = &regs[BPF_REG_1 + i];
7318 		}
7319 
7320 	if (!state)
7321 		verbose(env, "verifier internal error: no dynptr arg found\n");
7322 
7323 	return state;
7324 }
7325 
7326 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
7327 {
7328 	struct bpf_func_state *state = func(env, reg);
7329 	int spi;
7330 
7331 	if (reg->type == CONST_PTR_TO_DYNPTR)
7332 		return reg->id;
7333 	spi = dynptr_get_spi(env, reg);
7334 	if (spi < 0)
7335 		return spi;
7336 	return state->stack[spi].spilled_ptr.id;
7337 }
7338 
7339 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
7340 {
7341 	struct bpf_func_state *state = func(env, reg);
7342 	int spi;
7343 
7344 	if (reg->type == CONST_PTR_TO_DYNPTR)
7345 		return reg->ref_obj_id;
7346 	spi = dynptr_get_spi(env, reg);
7347 	if (spi < 0)
7348 		return spi;
7349 	return state->stack[spi].spilled_ptr.ref_obj_id;
7350 }
7351 
7352 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
7353 					    struct bpf_reg_state *reg)
7354 {
7355 	struct bpf_func_state *state = func(env, reg);
7356 	int spi;
7357 
7358 	if (reg->type == CONST_PTR_TO_DYNPTR)
7359 		return reg->dynptr.type;
7360 
7361 	spi = __get_spi(reg->off);
7362 	if (spi < 0) {
7363 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
7364 		return BPF_DYNPTR_TYPE_INVALID;
7365 	}
7366 
7367 	return state->stack[spi].spilled_ptr.dynptr.type;
7368 }
7369 
7370 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
7371 			  struct bpf_call_arg_meta *meta,
7372 			  const struct bpf_func_proto *fn,
7373 			  int insn_idx)
7374 {
7375 	u32 regno = BPF_REG_1 + arg;
7376 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7377 	enum bpf_arg_type arg_type = fn->arg_type[arg];
7378 	enum bpf_reg_type type = reg->type;
7379 	u32 *arg_btf_id = NULL;
7380 	int err = 0;
7381 
7382 	if (arg_type == ARG_DONTCARE)
7383 		return 0;
7384 
7385 	err = check_reg_arg(env, regno, SRC_OP);
7386 	if (err)
7387 		return err;
7388 
7389 	if (arg_type == ARG_ANYTHING) {
7390 		if (is_pointer_value(env, regno)) {
7391 			verbose(env, "R%d leaks addr into helper function\n",
7392 				regno);
7393 			return -EACCES;
7394 		}
7395 		return 0;
7396 	}
7397 
7398 	if (type_is_pkt_pointer(type) &&
7399 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
7400 		verbose(env, "helper access to the packet is not allowed\n");
7401 		return -EACCES;
7402 	}
7403 
7404 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
7405 		err = resolve_map_arg_type(env, meta, &arg_type);
7406 		if (err)
7407 			return err;
7408 	}
7409 
7410 	if (register_is_null(reg) && type_may_be_null(arg_type))
7411 		/* A NULL register has a SCALAR_VALUE type, so skip
7412 		 * type checking.
7413 		 */
7414 		goto skip_type_check;
7415 
7416 	/* arg_btf_id and arg_size are in a union. */
7417 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
7418 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
7419 		arg_btf_id = fn->arg_btf_id[arg];
7420 
7421 	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
7422 	if (err)
7423 		return err;
7424 
7425 	err = check_func_arg_reg_off(env, reg, regno, arg_type);
7426 	if (err)
7427 		return err;
7428 
7429 skip_type_check:
7430 	if (arg_type_is_release(arg_type)) {
7431 		if (arg_type_is_dynptr(arg_type)) {
7432 			struct bpf_func_state *state = func(env, reg);
7433 			int spi;
7434 
7435 			/* Only dynptr created on stack can be released, thus
7436 			 * the get_spi and stack state checks for spilled_ptr
7437 			 * should only be done before process_dynptr_func for
7438 			 * PTR_TO_STACK.
7439 			 */
7440 			if (reg->type == PTR_TO_STACK) {
7441 				spi = dynptr_get_spi(env, reg);
7442 				if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
7443 					verbose(env, "arg %d is an unacquired reference\n", regno);
7444 					return -EINVAL;
7445 				}
7446 			} else {
7447 				verbose(env, "cannot release unowned const bpf_dynptr\n");
7448 				return -EINVAL;
7449 			}
7450 		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
7451 			verbose(env, "R%d must be referenced when passed to release function\n",
7452 				regno);
7453 			return -EINVAL;
7454 		}
7455 		if (meta->release_regno) {
7456 			verbose(env, "verifier internal error: more than one release argument\n");
7457 			return -EFAULT;
7458 		}
7459 		meta->release_regno = regno;
7460 	}
7461 
7462 	if (reg->ref_obj_id) {
7463 		if (meta->ref_obj_id) {
7464 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
7465 				regno, reg->ref_obj_id,
7466 				meta->ref_obj_id);
7467 			return -EFAULT;
7468 		}
7469 		meta->ref_obj_id = reg->ref_obj_id;
7470 	}
7471 
7472 	switch (base_type(arg_type)) {
7473 	case ARG_CONST_MAP_PTR:
7474 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
7475 		if (meta->map_ptr) {
7476 			/* Use map_uid (which is unique id of inner map) to reject:
7477 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
7478 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
7479 			 * if (inner_map1 && inner_map2) {
7480 			 *     timer = bpf_map_lookup_elem(inner_map1);
7481 			 *     if (timer)
7482 			 *         // mismatch would have been allowed
7483 			 *         bpf_timer_init(timer, inner_map2);
7484 			 * }
7485 			 *
7486 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
7487 			 */
7488 			if (meta->map_ptr != reg->map_ptr ||
7489 			    meta->map_uid != reg->map_uid) {
7490 				verbose(env,
7491 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
7492 					meta->map_uid, reg->map_uid);
7493 				return -EINVAL;
7494 			}
7495 		}
7496 		meta->map_ptr = reg->map_ptr;
7497 		meta->map_uid = reg->map_uid;
7498 		break;
7499 	case ARG_PTR_TO_MAP_KEY:
7500 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
7501 		 * check that [key, key + map->key_size) are within
7502 		 * stack limits and initialized
7503 		 */
7504 		if (!meta->map_ptr) {
7505 			/* in function declaration map_ptr must come before
7506 			 * map_key, so that it's verified and known before
7507 			 * we have to check map_key here. Otherwise it means
7508 			 * that kernel subsystem misconfigured verifier
7509 			 */
7510 			verbose(env, "invalid map_ptr to access map->key\n");
7511 			return -EACCES;
7512 		}
7513 		err = check_helper_mem_access(env, regno,
7514 					      meta->map_ptr->key_size, false,
7515 					      NULL);
7516 		break;
7517 	case ARG_PTR_TO_MAP_VALUE:
7518 		if (type_may_be_null(arg_type) && register_is_null(reg))
7519 			return 0;
7520 
7521 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
7522 		 * check [value, value + map->value_size) validity
7523 		 */
7524 		if (!meta->map_ptr) {
7525 			/* kernel subsystem misconfigured verifier */
7526 			verbose(env, "invalid map_ptr to access map->value\n");
7527 			return -EACCES;
7528 		}
7529 		meta->raw_mode = arg_type & MEM_UNINIT;
7530 		err = check_helper_mem_access(env, regno,
7531 					      meta->map_ptr->value_size, false,
7532 					      meta);
7533 		break;
7534 	case ARG_PTR_TO_PERCPU_BTF_ID:
7535 		if (!reg->btf_id) {
7536 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
7537 			return -EACCES;
7538 		}
7539 		meta->ret_btf = reg->btf;
7540 		meta->ret_btf_id = reg->btf_id;
7541 		break;
7542 	case ARG_PTR_TO_SPIN_LOCK:
7543 		if (in_rbtree_lock_required_cb(env)) {
7544 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
7545 			return -EACCES;
7546 		}
7547 		if (meta->func_id == BPF_FUNC_spin_lock) {
7548 			err = process_spin_lock(env, regno, true);
7549 			if (err)
7550 				return err;
7551 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
7552 			err = process_spin_lock(env, regno, false);
7553 			if (err)
7554 				return err;
7555 		} else {
7556 			verbose(env, "verifier internal error\n");
7557 			return -EFAULT;
7558 		}
7559 		break;
7560 	case ARG_PTR_TO_TIMER:
7561 		err = process_timer_func(env, regno, meta);
7562 		if (err)
7563 			return err;
7564 		break;
7565 	case ARG_PTR_TO_FUNC:
7566 		meta->subprogno = reg->subprogno;
7567 		break;
7568 	case ARG_PTR_TO_MEM:
7569 		/* The access to this pointer is only checked when we hit the
7570 		 * next is_mem_size argument below.
7571 		 */
7572 		meta->raw_mode = arg_type & MEM_UNINIT;
7573 		if (arg_type & MEM_FIXED_SIZE) {
7574 			err = check_helper_mem_access(env, regno,
7575 						      fn->arg_size[arg], false,
7576 						      meta);
7577 		}
7578 		break;
7579 	case ARG_CONST_SIZE:
7580 		err = check_mem_size_reg(env, reg, regno, false, meta);
7581 		break;
7582 	case ARG_CONST_SIZE_OR_ZERO:
7583 		err = check_mem_size_reg(env, reg, regno, true, meta);
7584 		break;
7585 	case ARG_PTR_TO_DYNPTR:
7586 		err = process_dynptr_func(env, regno, insn_idx, arg_type);
7587 		if (err)
7588 			return err;
7589 		break;
7590 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
7591 		if (!tnum_is_const(reg->var_off)) {
7592 			verbose(env, "R%d is not a known constant'\n",
7593 				regno);
7594 			return -EACCES;
7595 		}
7596 		meta->mem_size = reg->var_off.value;
7597 		err = mark_chain_precision(env, regno);
7598 		if (err)
7599 			return err;
7600 		break;
7601 	case ARG_PTR_TO_INT:
7602 	case ARG_PTR_TO_LONG:
7603 	{
7604 		int size = int_ptr_type_to_size(arg_type);
7605 
7606 		err = check_helper_mem_access(env, regno, size, false, meta);
7607 		if (err)
7608 			return err;
7609 		err = check_ptr_alignment(env, reg, 0, size, true);
7610 		break;
7611 	}
7612 	case ARG_PTR_TO_CONST_STR:
7613 	{
7614 		struct bpf_map *map = reg->map_ptr;
7615 		int map_off;
7616 		u64 map_addr;
7617 		char *str_ptr;
7618 
7619 		if (!bpf_map_is_rdonly(map)) {
7620 			verbose(env, "R%d does not point to a readonly map'\n", regno);
7621 			return -EACCES;
7622 		}
7623 
7624 		if (!tnum_is_const(reg->var_off)) {
7625 			verbose(env, "R%d is not a constant address'\n", regno);
7626 			return -EACCES;
7627 		}
7628 
7629 		if (!map->ops->map_direct_value_addr) {
7630 			verbose(env, "no direct value access support for this map type\n");
7631 			return -EACCES;
7632 		}
7633 
7634 		err = check_map_access(env, regno, reg->off,
7635 				       map->value_size - reg->off, false,
7636 				       ACCESS_HELPER);
7637 		if (err)
7638 			return err;
7639 
7640 		map_off = reg->off + reg->var_off.value;
7641 		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
7642 		if (err) {
7643 			verbose(env, "direct value access on string failed\n");
7644 			return err;
7645 		}
7646 
7647 		str_ptr = (char *)(long)(map_addr);
7648 		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
7649 			verbose(env, "string is not zero-terminated\n");
7650 			return -EINVAL;
7651 		}
7652 		break;
7653 	}
7654 	case ARG_PTR_TO_KPTR:
7655 		err = process_kptr_func(env, regno, meta);
7656 		if (err)
7657 			return err;
7658 		break;
7659 	}
7660 
7661 	return err;
7662 }
7663 
7664 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
7665 {
7666 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
7667 	enum bpf_prog_type type = resolve_prog_type(env->prog);
7668 
7669 	if (func_id != BPF_FUNC_map_update_elem)
7670 		return false;
7671 
7672 	/* It's not possible to get access to a locked struct sock in these
7673 	 * contexts, so updating is safe.
7674 	 */
7675 	switch (type) {
7676 	case BPF_PROG_TYPE_TRACING:
7677 		if (eatype == BPF_TRACE_ITER)
7678 			return true;
7679 		break;
7680 	case BPF_PROG_TYPE_SOCKET_FILTER:
7681 	case BPF_PROG_TYPE_SCHED_CLS:
7682 	case BPF_PROG_TYPE_SCHED_ACT:
7683 	case BPF_PROG_TYPE_XDP:
7684 	case BPF_PROG_TYPE_SK_REUSEPORT:
7685 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
7686 	case BPF_PROG_TYPE_SK_LOOKUP:
7687 		return true;
7688 	default:
7689 		break;
7690 	}
7691 
7692 	verbose(env, "cannot update sockmap in this context\n");
7693 	return false;
7694 }
7695 
7696 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
7697 {
7698 	return env->prog->jit_requested &&
7699 	       bpf_jit_supports_subprog_tailcalls();
7700 }
7701 
7702 static int check_map_func_compatibility(struct bpf_verifier_env *env,
7703 					struct bpf_map *map, int func_id)
7704 {
7705 	if (!map)
7706 		return 0;
7707 
7708 	/* We need a two way check, first is from map perspective ... */
7709 	switch (map->map_type) {
7710 	case BPF_MAP_TYPE_PROG_ARRAY:
7711 		if (func_id != BPF_FUNC_tail_call)
7712 			goto error;
7713 		break;
7714 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
7715 		if (func_id != BPF_FUNC_perf_event_read &&
7716 		    func_id != BPF_FUNC_perf_event_output &&
7717 		    func_id != BPF_FUNC_skb_output &&
7718 		    func_id != BPF_FUNC_perf_event_read_value &&
7719 		    func_id != BPF_FUNC_xdp_output)
7720 			goto error;
7721 		break;
7722 	case BPF_MAP_TYPE_RINGBUF:
7723 		if (func_id != BPF_FUNC_ringbuf_output &&
7724 		    func_id != BPF_FUNC_ringbuf_reserve &&
7725 		    func_id != BPF_FUNC_ringbuf_query &&
7726 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
7727 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
7728 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
7729 			goto error;
7730 		break;
7731 	case BPF_MAP_TYPE_USER_RINGBUF:
7732 		if (func_id != BPF_FUNC_user_ringbuf_drain)
7733 			goto error;
7734 		break;
7735 	case BPF_MAP_TYPE_STACK_TRACE:
7736 		if (func_id != BPF_FUNC_get_stackid)
7737 			goto error;
7738 		break;
7739 	case BPF_MAP_TYPE_CGROUP_ARRAY:
7740 		if (func_id != BPF_FUNC_skb_under_cgroup &&
7741 		    func_id != BPF_FUNC_current_task_under_cgroup)
7742 			goto error;
7743 		break;
7744 	case BPF_MAP_TYPE_CGROUP_STORAGE:
7745 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
7746 		if (func_id != BPF_FUNC_get_local_storage)
7747 			goto error;
7748 		break;
7749 	case BPF_MAP_TYPE_DEVMAP:
7750 	case BPF_MAP_TYPE_DEVMAP_HASH:
7751 		if (func_id != BPF_FUNC_redirect_map &&
7752 		    func_id != BPF_FUNC_map_lookup_elem)
7753 			goto error;
7754 		break;
7755 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
7756 	 * appear.
7757 	 */
7758 	case BPF_MAP_TYPE_CPUMAP:
7759 		if (func_id != BPF_FUNC_redirect_map)
7760 			goto error;
7761 		break;
7762 	case BPF_MAP_TYPE_XSKMAP:
7763 		if (func_id != BPF_FUNC_redirect_map &&
7764 		    func_id != BPF_FUNC_map_lookup_elem)
7765 			goto error;
7766 		break;
7767 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
7768 	case BPF_MAP_TYPE_HASH_OF_MAPS:
7769 		if (func_id != BPF_FUNC_map_lookup_elem)
7770 			goto error;
7771 		break;
7772 	case BPF_MAP_TYPE_SOCKMAP:
7773 		if (func_id != BPF_FUNC_sk_redirect_map &&
7774 		    func_id != BPF_FUNC_sock_map_update &&
7775 		    func_id != BPF_FUNC_map_delete_elem &&
7776 		    func_id != BPF_FUNC_msg_redirect_map &&
7777 		    func_id != BPF_FUNC_sk_select_reuseport &&
7778 		    func_id != BPF_FUNC_map_lookup_elem &&
7779 		    !may_update_sockmap(env, func_id))
7780 			goto error;
7781 		break;
7782 	case BPF_MAP_TYPE_SOCKHASH:
7783 		if (func_id != BPF_FUNC_sk_redirect_hash &&
7784 		    func_id != BPF_FUNC_sock_hash_update &&
7785 		    func_id != BPF_FUNC_map_delete_elem &&
7786 		    func_id != BPF_FUNC_msg_redirect_hash &&
7787 		    func_id != BPF_FUNC_sk_select_reuseport &&
7788 		    func_id != BPF_FUNC_map_lookup_elem &&
7789 		    !may_update_sockmap(env, func_id))
7790 			goto error;
7791 		break;
7792 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
7793 		if (func_id != BPF_FUNC_sk_select_reuseport)
7794 			goto error;
7795 		break;
7796 	case BPF_MAP_TYPE_QUEUE:
7797 	case BPF_MAP_TYPE_STACK:
7798 		if (func_id != BPF_FUNC_map_peek_elem &&
7799 		    func_id != BPF_FUNC_map_pop_elem &&
7800 		    func_id != BPF_FUNC_map_push_elem)
7801 			goto error;
7802 		break;
7803 	case BPF_MAP_TYPE_SK_STORAGE:
7804 		if (func_id != BPF_FUNC_sk_storage_get &&
7805 		    func_id != BPF_FUNC_sk_storage_delete &&
7806 		    func_id != BPF_FUNC_kptr_xchg)
7807 			goto error;
7808 		break;
7809 	case BPF_MAP_TYPE_INODE_STORAGE:
7810 		if (func_id != BPF_FUNC_inode_storage_get &&
7811 		    func_id != BPF_FUNC_inode_storage_delete &&
7812 		    func_id != BPF_FUNC_kptr_xchg)
7813 			goto error;
7814 		break;
7815 	case BPF_MAP_TYPE_TASK_STORAGE:
7816 		if (func_id != BPF_FUNC_task_storage_get &&
7817 		    func_id != BPF_FUNC_task_storage_delete &&
7818 		    func_id != BPF_FUNC_kptr_xchg)
7819 			goto error;
7820 		break;
7821 	case BPF_MAP_TYPE_CGRP_STORAGE:
7822 		if (func_id != BPF_FUNC_cgrp_storage_get &&
7823 		    func_id != BPF_FUNC_cgrp_storage_delete &&
7824 		    func_id != BPF_FUNC_kptr_xchg)
7825 			goto error;
7826 		break;
7827 	case BPF_MAP_TYPE_BLOOM_FILTER:
7828 		if (func_id != BPF_FUNC_map_peek_elem &&
7829 		    func_id != BPF_FUNC_map_push_elem)
7830 			goto error;
7831 		break;
7832 	default:
7833 		break;
7834 	}
7835 
7836 	/* ... and second from the function itself. */
7837 	switch (func_id) {
7838 	case BPF_FUNC_tail_call:
7839 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
7840 			goto error;
7841 		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
7842 			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
7843 			return -EINVAL;
7844 		}
7845 		break;
7846 	case BPF_FUNC_perf_event_read:
7847 	case BPF_FUNC_perf_event_output:
7848 	case BPF_FUNC_perf_event_read_value:
7849 	case BPF_FUNC_skb_output:
7850 	case BPF_FUNC_xdp_output:
7851 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
7852 			goto error;
7853 		break;
7854 	case BPF_FUNC_ringbuf_output:
7855 	case BPF_FUNC_ringbuf_reserve:
7856 	case BPF_FUNC_ringbuf_query:
7857 	case BPF_FUNC_ringbuf_reserve_dynptr:
7858 	case BPF_FUNC_ringbuf_submit_dynptr:
7859 	case BPF_FUNC_ringbuf_discard_dynptr:
7860 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
7861 			goto error;
7862 		break;
7863 	case BPF_FUNC_user_ringbuf_drain:
7864 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
7865 			goto error;
7866 		break;
7867 	case BPF_FUNC_get_stackid:
7868 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
7869 			goto error;
7870 		break;
7871 	case BPF_FUNC_current_task_under_cgroup:
7872 	case BPF_FUNC_skb_under_cgroup:
7873 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
7874 			goto error;
7875 		break;
7876 	case BPF_FUNC_redirect_map:
7877 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
7878 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
7879 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
7880 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
7881 			goto error;
7882 		break;
7883 	case BPF_FUNC_sk_redirect_map:
7884 	case BPF_FUNC_msg_redirect_map:
7885 	case BPF_FUNC_sock_map_update:
7886 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
7887 			goto error;
7888 		break;
7889 	case BPF_FUNC_sk_redirect_hash:
7890 	case BPF_FUNC_msg_redirect_hash:
7891 	case BPF_FUNC_sock_hash_update:
7892 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
7893 			goto error;
7894 		break;
7895 	case BPF_FUNC_get_local_storage:
7896 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
7897 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
7898 			goto error;
7899 		break;
7900 	case BPF_FUNC_sk_select_reuseport:
7901 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
7902 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
7903 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
7904 			goto error;
7905 		break;
7906 	case BPF_FUNC_map_pop_elem:
7907 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7908 		    map->map_type != BPF_MAP_TYPE_STACK)
7909 			goto error;
7910 		break;
7911 	case BPF_FUNC_map_peek_elem:
7912 	case BPF_FUNC_map_push_elem:
7913 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7914 		    map->map_type != BPF_MAP_TYPE_STACK &&
7915 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
7916 			goto error;
7917 		break;
7918 	case BPF_FUNC_map_lookup_percpu_elem:
7919 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
7920 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
7921 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
7922 			goto error;
7923 		break;
7924 	case BPF_FUNC_sk_storage_get:
7925 	case BPF_FUNC_sk_storage_delete:
7926 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
7927 			goto error;
7928 		break;
7929 	case BPF_FUNC_inode_storage_get:
7930 	case BPF_FUNC_inode_storage_delete:
7931 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
7932 			goto error;
7933 		break;
7934 	case BPF_FUNC_task_storage_get:
7935 	case BPF_FUNC_task_storage_delete:
7936 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
7937 			goto error;
7938 		break;
7939 	case BPF_FUNC_cgrp_storage_get:
7940 	case BPF_FUNC_cgrp_storage_delete:
7941 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
7942 			goto error;
7943 		break;
7944 	default:
7945 		break;
7946 	}
7947 
7948 	return 0;
7949 error:
7950 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
7951 		map->map_type, func_id_name(func_id), func_id);
7952 	return -EINVAL;
7953 }
7954 
7955 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
7956 {
7957 	int count = 0;
7958 
7959 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
7960 		count++;
7961 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
7962 		count++;
7963 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
7964 		count++;
7965 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
7966 		count++;
7967 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
7968 		count++;
7969 
7970 	/* We only support one arg being in raw mode at the moment,
7971 	 * which is sufficient for the helper functions we have
7972 	 * right now.
7973 	 */
7974 	return count <= 1;
7975 }
7976 
7977 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
7978 {
7979 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
7980 	bool has_size = fn->arg_size[arg] != 0;
7981 	bool is_next_size = false;
7982 
7983 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
7984 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
7985 
7986 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
7987 		return is_next_size;
7988 
7989 	return has_size == is_next_size || is_next_size == is_fixed;
7990 }
7991 
7992 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
7993 {
7994 	/* bpf_xxx(..., buf, len) call will access 'len'
7995 	 * bytes from memory 'buf'. Both arg types need
7996 	 * to be paired, so make sure there's no buggy
7997 	 * helper function specification.
7998 	 */
7999 	if (arg_type_is_mem_size(fn->arg1_type) ||
8000 	    check_args_pair_invalid(fn, 0) ||
8001 	    check_args_pair_invalid(fn, 1) ||
8002 	    check_args_pair_invalid(fn, 2) ||
8003 	    check_args_pair_invalid(fn, 3) ||
8004 	    check_args_pair_invalid(fn, 4))
8005 		return false;
8006 
8007 	return true;
8008 }
8009 
8010 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8011 {
8012 	int i;
8013 
8014 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8015 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8016 			return !!fn->arg_btf_id[i];
8017 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8018 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8019 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8020 		    /* arg_btf_id and arg_size are in a union. */
8021 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8022 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8023 			return false;
8024 	}
8025 
8026 	return true;
8027 }
8028 
8029 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
8030 {
8031 	return check_raw_mode_ok(fn) &&
8032 	       check_arg_pair_ok(fn) &&
8033 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8034 }
8035 
8036 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8037  * are now invalid, so turn them into unknown SCALAR_VALUE.
8038  *
8039  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8040  * since these slices point to packet data.
8041  */
8042 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8043 {
8044 	struct bpf_func_state *state;
8045 	struct bpf_reg_state *reg;
8046 
8047 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8048 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8049 			mark_reg_invalid(env, reg);
8050 	}));
8051 }
8052 
8053 enum {
8054 	AT_PKT_END = -1,
8055 	BEYOND_PKT_END = -2,
8056 };
8057 
8058 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8059 {
8060 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8061 	struct bpf_reg_state *reg = &state->regs[regn];
8062 
8063 	if (reg->type != PTR_TO_PACKET)
8064 		/* PTR_TO_PACKET_META is not supported yet */
8065 		return;
8066 
8067 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8068 	 * How far beyond pkt_end it goes is unknown.
8069 	 * if (!range_open) it's the case of pkt >= pkt_end
8070 	 * if (range_open) it's the case of pkt > pkt_end
8071 	 * hence this pointer is at least 1 byte bigger than pkt_end
8072 	 */
8073 	if (range_open)
8074 		reg->range = BEYOND_PKT_END;
8075 	else
8076 		reg->range = AT_PKT_END;
8077 }
8078 
8079 /* The pointer with the specified id has released its reference to kernel
8080  * resources. Identify all copies of the same pointer and clear the reference.
8081  */
8082 static int release_reference(struct bpf_verifier_env *env,
8083 			     int ref_obj_id)
8084 {
8085 	struct bpf_func_state *state;
8086 	struct bpf_reg_state *reg;
8087 	int err;
8088 
8089 	err = release_reference_state(cur_func(env), ref_obj_id);
8090 	if (err)
8091 		return err;
8092 
8093 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8094 		if (reg->ref_obj_id == ref_obj_id)
8095 			mark_reg_invalid(env, reg);
8096 	}));
8097 
8098 	return 0;
8099 }
8100 
8101 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8102 {
8103 	struct bpf_func_state *unused;
8104 	struct bpf_reg_state *reg;
8105 
8106 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8107 		if (type_is_non_owning_ref(reg->type))
8108 			mark_reg_invalid(env, reg);
8109 	}));
8110 }
8111 
8112 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
8113 				    struct bpf_reg_state *regs)
8114 {
8115 	int i;
8116 
8117 	/* after the call registers r0 - r5 were scratched */
8118 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
8119 		mark_reg_not_init(env, regs, caller_saved[i]);
8120 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8121 	}
8122 }
8123 
8124 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
8125 				   struct bpf_func_state *caller,
8126 				   struct bpf_func_state *callee,
8127 				   int insn_idx);
8128 
8129 static int set_callee_state(struct bpf_verifier_env *env,
8130 			    struct bpf_func_state *caller,
8131 			    struct bpf_func_state *callee, int insn_idx);
8132 
8133 static bool is_callback_calling_kfunc(u32 btf_id);
8134 
8135 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8136 			     int *insn_idx, int subprog,
8137 			     set_callee_state_fn set_callee_state_cb)
8138 {
8139 	struct bpf_verifier_state *state = env->cur_state;
8140 	struct bpf_func_info_aux *func_info_aux;
8141 	struct bpf_func_state *caller, *callee;
8142 	int err;
8143 	bool is_global = false;
8144 
8145 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
8146 		verbose(env, "the call stack of %d frames is too deep\n",
8147 			state->curframe + 2);
8148 		return -E2BIG;
8149 	}
8150 
8151 	caller = state->frame[state->curframe];
8152 	if (state->frame[state->curframe + 1]) {
8153 		verbose(env, "verifier bug. Frame %d already allocated\n",
8154 			state->curframe + 1);
8155 		return -EFAULT;
8156 	}
8157 
8158 	func_info_aux = env->prog->aux->func_info_aux;
8159 	if (func_info_aux)
8160 		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
8161 	err = btf_check_subprog_call(env, subprog, caller->regs);
8162 	if (err == -EFAULT)
8163 		return err;
8164 	if (is_global) {
8165 		if (err) {
8166 			verbose(env, "Caller passes invalid args into func#%d\n",
8167 				subprog);
8168 			return err;
8169 		} else {
8170 			if (env->log.level & BPF_LOG_LEVEL)
8171 				verbose(env,
8172 					"Func#%d is global and valid. Skipping.\n",
8173 					subprog);
8174 			clear_caller_saved_regs(env, caller->regs);
8175 
8176 			/* All global functions return a 64-bit SCALAR_VALUE */
8177 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
8178 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8179 
8180 			/* continue with next insn after call */
8181 			return 0;
8182 		}
8183 	}
8184 
8185 	/* set_callee_state is used for direct subprog calls, but we are
8186 	 * interested in validating only BPF helpers that can call subprogs as
8187 	 * callbacks
8188 	 */
8189 	if (set_callee_state_cb != set_callee_state) {
8190 		if (bpf_pseudo_kfunc_call(insn) &&
8191 		    !is_callback_calling_kfunc(insn->imm)) {
8192 			verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
8193 				func_id_name(insn->imm), insn->imm);
8194 			return -EFAULT;
8195 		} else if (!bpf_pseudo_kfunc_call(insn) &&
8196 			   !is_callback_calling_function(insn->imm)) { /* helper */
8197 			verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
8198 				func_id_name(insn->imm), insn->imm);
8199 			return -EFAULT;
8200 		}
8201 	}
8202 
8203 	if (insn->code == (BPF_JMP | BPF_CALL) &&
8204 	    insn->src_reg == 0 &&
8205 	    insn->imm == BPF_FUNC_timer_set_callback) {
8206 		struct bpf_verifier_state *async_cb;
8207 
8208 		/* there is no real recursion here. timer callbacks are async */
8209 		env->subprog_info[subprog].is_async_cb = true;
8210 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
8211 					 *insn_idx, subprog);
8212 		if (!async_cb)
8213 			return -EFAULT;
8214 		callee = async_cb->frame[0];
8215 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
8216 
8217 		/* Convert bpf_timer_set_callback() args into timer callback args */
8218 		err = set_callee_state_cb(env, caller, callee, *insn_idx);
8219 		if (err)
8220 			return err;
8221 
8222 		clear_caller_saved_regs(env, caller->regs);
8223 		mark_reg_unknown(env, caller->regs, BPF_REG_0);
8224 		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8225 		/* continue with next insn after call */
8226 		return 0;
8227 	}
8228 
8229 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
8230 	if (!callee)
8231 		return -ENOMEM;
8232 	state->frame[state->curframe + 1] = callee;
8233 
8234 	/* callee cannot access r0, r6 - r9 for reading and has to write
8235 	 * into its own stack before reading from it.
8236 	 * callee can read/write into caller's stack
8237 	 */
8238 	init_func_state(env, callee,
8239 			/* remember the callsite, it will be used by bpf_exit */
8240 			*insn_idx /* callsite */,
8241 			state->curframe + 1 /* frameno within this callchain */,
8242 			subprog /* subprog number within this prog */);
8243 
8244 	/* Transfer references to the callee */
8245 	err = copy_reference_state(callee, caller);
8246 	if (err)
8247 		goto err_out;
8248 
8249 	err = set_callee_state_cb(env, caller, callee, *insn_idx);
8250 	if (err)
8251 		goto err_out;
8252 
8253 	clear_caller_saved_regs(env, caller->regs);
8254 
8255 	/* only increment it after check_reg_arg() finished */
8256 	state->curframe++;
8257 
8258 	/* and go analyze first insn of the callee */
8259 	*insn_idx = env->subprog_info[subprog].start - 1;
8260 
8261 	if (env->log.level & BPF_LOG_LEVEL) {
8262 		verbose(env, "caller:\n");
8263 		print_verifier_state(env, caller, true);
8264 		verbose(env, "callee:\n");
8265 		print_verifier_state(env, callee, true);
8266 	}
8267 	return 0;
8268 
8269 err_out:
8270 	free_func_state(callee);
8271 	state->frame[state->curframe + 1] = NULL;
8272 	return err;
8273 }
8274 
8275 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
8276 				   struct bpf_func_state *caller,
8277 				   struct bpf_func_state *callee)
8278 {
8279 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
8280 	 *      void *callback_ctx, u64 flags);
8281 	 * callback_fn(struct bpf_map *map, void *key, void *value,
8282 	 *      void *callback_ctx);
8283 	 */
8284 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
8285 
8286 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
8287 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
8288 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
8289 
8290 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
8291 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
8292 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
8293 
8294 	/* pointer to stack or null */
8295 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
8296 
8297 	/* unused */
8298 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8299 	return 0;
8300 }
8301 
8302 static int set_callee_state(struct bpf_verifier_env *env,
8303 			    struct bpf_func_state *caller,
8304 			    struct bpf_func_state *callee, int insn_idx)
8305 {
8306 	int i;
8307 
8308 	/* copy r1 - r5 args that callee can access.  The copy includes parent
8309 	 * pointers, which connects us up to the liveness chain
8310 	 */
8311 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
8312 		callee->regs[i] = caller->regs[i];
8313 	return 0;
8314 }
8315 
8316 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8317 			   int *insn_idx)
8318 {
8319 	int subprog, target_insn;
8320 
8321 	target_insn = *insn_idx + insn->imm + 1;
8322 	subprog = find_subprog(env, target_insn);
8323 	if (subprog < 0) {
8324 		verbose(env, "verifier bug. No program starts at insn %d\n",
8325 			target_insn);
8326 		return -EFAULT;
8327 	}
8328 
8329 	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
8330 }
8331 
8332 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
8333 				       struct bpf_func_state *caller,
8334 				       struct bpf_func_state *callee,
8335 				       int insn_idx)
8336 {
8337 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
8338 	struct bpf_map *map;
8339 	int err;
8340 
8341 	if (bpf_map_ptr_poisoned(insn_aux)) {
8342 		verbose(env, "tail_call abusing map_ptr\n");
8343 		return -EINVAL;
8344 	}
8345 
8346 	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
8347 	if (!map->ops->map_set_for_each_callback_args ||
8348 	    !map->ops->map_for_each_callback) {
8349 		verbose(env, "callback function not allowed for map\n");
8350 		return -ENOTSUPP;
8351 	}
8352 
8353 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
8354 	if (err)
8355 		return err;
8356 
8357 	callee->in_callback_fn = true;
8358 	callee->callback_ret_range = tnum_range(0, 1);
8359 	return 0;
8360 }
8361 
8362 static int set_loop_callback_state(struct bpf_verifier_env *env,
8363 				   struct bpf_func_state *caller,
8364 				   struct bpf_func_state *callee,
8365 				   int insn_idx)
8366 {
8367 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
8368 	 *	    u64 flags);
8369 	 * callback_fn(u32 index, void *callback_ctx);
8370 	 */
8371 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
8372 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
8373 
8374 	/* unused */
8375 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
8376 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8377 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8378 
8379 	callee->in_callback_fn = true;
8380 	callee->callback_ret_range = tnum_range(0, 1);
8381 	return 0;
8382 }
8383 
8384 static int set_timer_callback_state(struct bpf_verifier_env *env,
8385 				    struct bpf_func_state *caller,
8386 				    struct bpf_func_state *callee,
8387 				    int insn_idx)
8388 {
8389 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
8390 
8391 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
8392 	 * callback_fn(struct bpf_map *map, void *key, void *value);
8393 	 */
8394 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
8395 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
8396 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
8397 
8398 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
8399 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
8400 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
8401 
8402 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
8403 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
8404 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
8405 
8406 	/* unused */
8407 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8408 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8409 	callee->in_async_callback_fn = true;
8410 	callee->callback_ret_range = tnum_range(0, 1);
8411 	return 0;
8412 }
8413 
8414 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
8415 				       struct bpf_func_state *caller,
8416 				       struct bpf_func_state *callee,
8417 				       int insn_idx)
8418 {
8419 	/* bpf_find_vma(struct task_struct *task, u64 addr,
8420 	 *               void *callback_fn, void *callback_ctx, u64 flags)
8421 	 * (callback_fn)(struct task_struct *task,
8422 	 *               struct vm_area_struct *vma, void *callback_ctx);
8423 	 */
8424 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
8425 
8426 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
8427 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
8428 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
8429 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
8430 
8431 	/* pointer to stack or null */
8432 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
8433 
8434 	/* unused */
8435 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8436 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8437 	callee->in_callback_fn = true;
8438 	callee->callback_ret_range = tnum_range(0, 1);
8439 	return 0;
8440 }
8441 
8442 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
8443 					   struct bpf_func_state *caller,
8444 					   struct bpf_func_state *callee,
8445 					   int insn_idx)
8446 {
8447 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
8448 	 *			  callback_ctx, u64 flags);
8449 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
8450 	 */
8451 	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
8452 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
8453 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
8454 
8455 	/* unused */
8456 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
8457 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8458 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8459 
8460 	callee->in_callback_fn = true;
8461 	callee->callback_ret_range = tnum_range(0, 1);
8462 	return 0;
8463 }
8464 
8465 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
8466 					 struct bpf_func_state *caller,
8467 					 struct bpf_func_state *callee,
8468 					 int insn_idx)
8469 {
8470 	/* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
8471 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
8472 	 *
8473 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
8474 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
8475 	 * by this point, so look at 'root'
8476 	 */
8477 	struct btf_field *field;
8478 
8479 	field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
8480 				      BPF_RB_ROOT);
8481 	if (!field || !field->graph_root.value_btf_id)
8482 		return -EFAULT;
8483 
8484 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
8485 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
8486 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
8487 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
8488 
8489 	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
8490 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8491 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8492 	callee->in_callback_fn = true;
8493 	callee->callback_ret_range = tnum_range(0, 1);
8494 	return 0;
8495 }
8496 
8497 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
8498 
8499 /* Are we currently verifying the callback for a rbtree helper that must
8500  * be called with lock held? If so, no need to complain about unreleased
8501  * lock
8502  */
8503 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
8504 {
8505 	struct bpf_verifier_state *state = env->cur_state;
8506 	struct bpf_insn *insn = env->prog->insnsi;
8507 	struct bpf_func_state *callee;
8508 	int kfunc_btf_id;
8509 
8510 	if (!state->curframe)
8511 		return false;
8512 
8513 	callee = state->frame[state->curframe];
8514 
8515 	if (!callee->in_callback_fn)
8516 		return false;
8517 
8518 	kfunc_btf_id = insn[callee->callsite].imm;
8519 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
8520 }
8521 
8522 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
8523 {
8524 	struct bpf_verifier_state *state = env->cur_state;
8525 	struct bpf_func_state *caller, *callee;
8526 	struct bpf_reg_state *r0;
8527 	int err;
8528 
8529 	callee = state->frame[state->curframe];
8530 	r0 = &callee->regs[BPF_REG_0];
8531 	if (r0->type == PTR_TO_STACK) {
8532 		/* technically it's ok to return caller's stack pointer
8533 		 * (or caller's caller's pointer) back to the caller,
8534 		 * since these pointers are valid. Only current stack
8535 		 * pointer will be invalid as soon as function exits,
8536 		 * but let's be conservative
8537 		 */
8538 		verbose(env, "cannot return stack pointer to the caller\n");
8539 		return -EINVAL;
8540 	}
8541 
8542 	caller = state->frame[state->curframe - 1];
8543 	if (callee->in_callback_fn) {
8544 		/* enforce R0 return value range [0, 1]. */
8545 		struct tnum range = callee->callback_ret_range;
8546 
8547 		if (r0->type != SCALAR_VALUE) {
8548 			verbose(env, "R0 not a scalar value\n");
8549 			return -EACCES;
8550 		}
8551 		if (!tnum_in(range, r0->var_off)) {
8552 			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
8553 			return -EINVAL;
8554 		}
8555 	} else {
8556 		/* return to the caller whatever r0 had in the callee */
8557 		caller->regs[BPF_REG_0] = *r0;
8558 	}
8559 
8560 	/* callback_fn frame should have released its own additions to parent's
8561 	 * reference state at this point, or check_reference_leak would
8562 	 * complain, hence it must be the same as the caller. There is no need
8563 	 * to copy it back.
8564 	 */
8565 	if (!callee->in_callback_fn) {
8566 		/* Transfer references to the caller */
8567 		err = copy_reference_state(caller, callee);
8568 		if (err)
8569 			return err;
8570 	}
8571 
8572 	*insn_idx = callee->callsite + 1;
8573 	if (env->log.level & BPF_LOG_LEVEL) {
8574 		verbose(env, "returning from callee:\n");
8575 		print_verifier_state(env, callee, true);
8576 		verbose(env, "to caller at %d:\n", *insn_idx);
8577 		print_verifier_state(env, caller, true);
8578 	}
8579 	/* clear everything in the callee */
8580 	free_func_state(callee);
8581 	state->frame[state->curframe--] = NULL;
8582 	return 0;
8583 }
8584 
8585 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
8586 				   int func_id,
8587 				   struct bpf_call_arg_meta *meta)
8588 {
8589 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
8590 
8591 	if (ret_type != RET_INTEGER ||
8592 	    (func_id != BPF_FUNC_get_stack &&
8593 	     func_id != BPF_FUNC_get_task_stack &&
8594 	     func_id != BPF_FUNC_probe_read_str &&
8595 	     func_id != BPF_FUNC_probe_read_kernel_str &&
8596 	     func_id != BPF_FUNC_probe_read_user_str))
8597 		return;
8598 
8599 	ret_reg->smax_value = meta->msize_max_value;
8600 	ret_reg->s32_max_value = meta->msize_max_value;
8601 	ret_reg->smin_value = -MAX_ERRNO;
8602 	ret_reg->s32_min_value = -MAX_ERRNO;
8603 	reg_bounds_sync(ret_reg);
8604 }
8605 
8606 static int
8607 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8608 		int func_id, int insn_idx)
8609 {
8610 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
8611 	struct bpf_map *map = meta->map_ptr;
8612 
8613 	if (func_id != BPF_FUNC_tail_call &&
8614 	    func_id != BPF_FUNC_map_lookup_elem &&
8615 	    func_id != BPF_FUNC_map_update_elem &&
8616 	    func_id != BPF_FUNC_map_delete_elem &&
8617 	    func_id != BPF_FUNC_map_push_elem &&
8618 	    func_id != BPF_FUNC_map_pop_elem &&
8619 	    func_id != BPF_FUNC_map_peek_elem &&
8620 	    func_id != BPF_FUNC_for_each_map_elem &&
8621 	    func_id != BPF_FUNC_redirect_map &&
8622 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
8623 		return 0;
8624 
8625 	if (map == NULL) {
8626 		verbose(env, "kernel subsystem misconfigured verifier\n");
8627 		return -EINVAL;
8628 	}
8629 
8630 	/* In case of read-only, some additional restrictions
8631 	 * need to be applied in order to prevent altering the
8632 	 * state of the map from program side.
8633 	 */
8634 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
8635 	    (func_id == BPF_FUNC_map_delete_elem ||
8636 	     func_id == BPF_FUNC_map_update_elem ||
8637 	     func_id == BPF_FUNC_map_push_elem ||
8638 	     func_id == BPF_FUNC_map_pop_elem)) {
8639 		verbose(env, "write into map forbidden\n");
8640 		return -EACCES;
8641 	}
8642 
8643 	if (!BPF_MAP_PTR(aux->map_ptr_state))
8644 		bpf_map_ptr_store(aux, meta->map_ptr,
8645 				  !meta->map_ptr->bypass_spec_v1);
8646 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
8647 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
8648 				  !meta->map_ptr->bypass_spec_v1);
8649 	return 0;
8650 }
8651 
8652 static int
8653 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8654 		int func_id, int insn_idx)
8655 {
8656 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
8657 	struct bpf_reg_state *regs = cur_regs(env), *reg;
8658 	struct bpf_map *map = meta->map_ptr;
8659 	u64 val, max;
8660 	int err;
8661 
8662 	if (func_id != BPF_FUNC_tail_call)
8663 		return 0;
8664 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
8665 		verbose(env, "kernel subsystem misconfigured verifier\n");
8666 		return -EINVAL;
8667 	}
8668 
8669 	reg = &regs[BPF_REG_3];
8670 	val = reg->var_off.value;
8671 	max = map->max_entries;
8672 
8673 	if (!(register_is_const(reg) && val < max)) {
8674 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8675 		return 0;
8676 	}
8677 
8678 	err = mark_chain_precision(env, BPF_REG_3);
8679 	if (err)
8680 		return err;
8681 	if (bpf_map_key_unseen(aux))
8682 		bpf_map_key_store(aux, val);
8683 	else if (!bpf_map_key_poisoned(aux) &&
8684 		  bpf_map_key_immediate(aux) != val)
8685 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8686 	return 0;
8687 }
8688 
8689 static int check_reference_leak(struct bpf_verifier_env *env)
8690 {
8691 	struct bpf_func_state *state = cur_func(env);
8692 	bool refs_lingering = false;
8693 	int i;
8694 
8695 	if (state->frameno && !state->in_callback_fn)
8696 		return 0;
8697 
8698 	for (i = 0; i < state->acquired_refs; i++) {
8699 		if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
8700 			continue;
8701 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
8702 			state->refs[i].id, state->refs[i].insn_idx);
8703 		refs_lingering = true;
8704 	}
8705 	return refs_lingering ? -EINVAL : 0;
8706 }
8707 
8708 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
8709 				   struct bpf_reg_state *regs)
8710 {
8711 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
8712 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
8713 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
8714 	struct bpf_bprintf_data data = {};
8715 	int err, fmt_map_off, num_args;
8716 	u64 fmt_addr;
8717 	char *fmt;
8718 
8719 	/* data must be an array of u64 */
8720 	if (data_len_reg->var_off.value % 8)
8721 		return -EINVAL;
8722 	num_args = data_len_reg->var_off.value / 8;
8723 
8724 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
8725 	 * and map_direct_value_addr is set.
8726 	 */
8727 	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
8728 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
8729 						  fmt_map_off);
8730 	if (err) {
8731 		verbose(env, "verifier bug\n");
8732 		return -EFAULT;
8733 	}
8734 	fmt = (char *)(long)fmt_addr + fmt_map_off;
8735 
8736 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
8737 	 * can focus on validating the format specifiers.
8738 	 */
8739 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
8740 	if (err < 0)
8741 		verbose(env, "Invalid format string\n");
8742 
8743 	return err;
8744 }
8745 
8746 static int check_get_func_ip(struct bpf_verifier_env *env)
8747 {
8748 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8749 	int func_id = BPF_FUNC_get_func_ip;
8750 
8751 	if (type == BPF_PROG_TYPE_TRACING) {
8752 		if (!bpf_prog_has_trampoline(env->prog)) {
8753 			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
8754 				func_id_name(func_id), func_id);
8755 			return -ENOTSUPP;
8756 		}
8757 		return 0;
8758 	} else if (type == BPF_PROG_TYPE_KPROBE) {
8759 		return 0;
8760 	}
8761 
8762 	verbose(env, "func %s#%d not supported for program type %d\n",
8763 		func_id_name(func_id), func_id, type);
8764 	return -ENOTSUPP;
8765 }
8766 
8767 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
8768 {
8769 	return &env->insn_aux_data[env->insn_idx];
8770 }
8771 
8772 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
8773 {
8774 	struct bpf_reg_state *regs = cur_regs(env);
8775 	struct bpf_reg_state *reg = &regs[BPF_REG_4];
8776 	bool reg_is_null = register_is_null(reg);
8777 
8778 	if (reg_is_null)
8779 		mark_chain_precision(env, BPF_REG_4);
8780 
8781 	return reg_is_null;
8782 }
8783 
8784 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
8785 {
8786 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
8787 
8788 	if (!state->initialized) {
8789 		state->initialized = 1;
8790 		state->fit_for_inline = loop_flag_is_zero(env);
8791 		state->callback_subprogno = subprogno;
8792 		return;
8793 	}
8794 
8795 	if (!state->fit_for_inline)
8796 		return;
8797 
8798 	state->fit_for_inline = (loop_flag_is_zero(env) &&
8799 				 state->callback_subprogno == subprogno);
8800 }
8801 
8802 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8803 			     int *insn_idx_p)
8804 {
8805 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8806 	const struct bpf_func_proto *fn = NULL;
8807 	enum bpf_return_type ret_type;
8808 	enum bpf_type_flag ret_flag;
8809 	struct bpf_reg_state *regs;
8810 	struct bpf_call_arg_meta meta;
8811 	int insn_idx = *insn_idx_p;
8812 	bool changes_data;
8813 	int i, err, func_id;
8814 
8815 	/* find function prototype */
8816 	func_id = insn->imm;
8817 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
8818 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
8819 			func_id);
8820 		return -EINVAL;
8821 	}
8822 
8823 	if (env->ops->get_func_proto)
8824 		fn = env->ops->get_func_proto(func_id, env->prog);
8825 	if (!fn) {
8826 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
8827 			func_id);
8828 		return -EINVAL;
8829 	}
8830 
8831 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
8832 	if (!env->prog->gpl_compatible && fn->gpl_only) {
8833 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
8834 		return -EINVAL;
8835 	}
8836 
8837 	if (fn->allowed && !fn->allowed(env->prog)) {
8838 		verbose(env, "helper call is not allowed in probe\n");
8839 		return -EINVAL;
8840 	}
8841 
8842 	if (!env->prog->aux->sleepable && fn->might_sleep) {
8843 		verbose(env, "helper call might sleep in a non-sleepable prog\n");
8844 		return -EINVAL;
8845 	}
8846 
8847 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
8848 	changes_data = bpf_helper_changes_pkt_data(fn->func);
8849 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
8850 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
8851 			func_id_name(func_id), func_id);
8852 		return -EINVAL;
8853 	}
8854 
8855 	memset(&meta, 0, sizeof(meta));
8856 	meta.pkt_access = fn->pkt_access;
8857 
8858 	err = check_func_proto(fn, func_id);
8859 	if (err) {
8860 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
8861 			func_id_name(func_id), func_id);
8862 		return err;
8863 	}
8864 
8865 	if (env->cur_state->active_rcu_lock) {
8866 		if (fn->might_sleep) {
8867 			verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
8868 				func_id_name(func_id), func_id);
8869 			return -EINVAL;
8870 		}
8871 
8872 		if (env->prog->aux->sleepable && is_storage_get_function(func_id))
8873 			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
8874 	}
8875 
8876 	meta.func_id = func_id;
8877 	/* check args */
8878 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
8879 		err = check_func_arg(env, i, &meta, fn, insn_idx);
8880 		if (err)
8881 			return err;
8882 	}
8883 
8884 	err = record_func_map(env, &meta, func_id, insn_idx);
8885 	if (err)
8886 		return err;
8887 
8888 	err = record_func_key(env, &meta, func_id, insn_idx);
8889 	if (err)
8890 		return err;
8891 
8892 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
8893 	 * is inferred from register state.
8894 	 */
8895 	for (i = 0; i < meta.access_size; i++) {
8896 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
8897 				       BPF_WRITE, -1, false);
8898 		if (err)
8899 			return err;
8900 	}
8901 
8902 	regs = cur_regs(env);
8903 
8904 	if (meta.release_regno) {
8905 		err = -EINVAL;
8906 		/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
8907 		 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
8908 		 * is safe to do directly.
8909 		 */
8910 		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
8911 			if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
8912 				verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
8913 				return -EFAULT;
8914 			}
8915 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
8916 		} else if (meta.ref_obj_id) {
8917 			err = release_reference(env, meta.ref_obj_id);
8918 		} else if (register_is_null(&regs[meta.release_regno])) {
8919 			/* meta.ref_obj_id can only be 0 if register that is meant to be
8920 			 * released is NULL, which must be > R0.
8921 			 */
8922 			err = 0;
8923 		}
8924 		if (err) {
8925 			verbose(env, "func %s#%d reference has not been acquired before\n",
8926 				func_id_name(func_id), func_id);
8927 			return err;
8928 		}
8929 	}
8930 
8931 	switch (func_id) {
8932 	case BPF_FUNC_tail_call:
8933 		err = check_reference_leak(env);
8934 		if (err) {
8935 			verbose(env, "tail_call would lead to reference leak\n");
8936 			return err;
8937 		}
8938 		break;
8939 	case BPF_FUNC_get_local_storage:
8940 		/* check that flags argument in get_local_storage(map, flags) is 0,
8941 		 * this is required because get_local_storage() can't return an error.
8942 		 */
8943 		if (!register_is_null(&regs[BPF_REG_2])) {
8944 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
8945 			return -EINVAL;
8946 		}
8947 		break;
8948 	case BPF_FUNC_for_each_map_elem:
8949 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8950 					set_map_elem_callback_state);
8951 		break;
8952 	case BPF_FUNC_timer_set_callback:
8953 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8954 					set_timer_callback_state);
8955 		break;
8956 	case BPF_FUNC_find_vma:
8957 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8958 					set_find_vma_callback_state);
8959 		break;
8960 	case BPF_FUNC_snprintf:
8961 		err = check_bpf_snprintf_call(env, regs);
8962 		break;
8963 	case BPF_FUNC_loop:
8964 		update_loop_inline_state(env, meta.subprogno);
8965 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8966 					set_loop_callback_state);
8967 		break;
8968 	case BPF_FUNC_dynptr_from_mem:
8969 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
8970 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
8971 				reg_type_str(env, regs[BPF_REG_1].type));
8972 			return -EACCES;
8973 		}
8974 		break;
8975 	case BPF_FUNC_set_retval:
8976 		if (prog_type == BPF_PROG_TYPE_LSM &&
8977 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
8978 			if (!env->prog->aux->attach_func_proto->type) {
8979 				/* Make sure programs that attach to void
8980 				 * hooks don't try to modify return value.
8981 				 */
8982 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
8983 				return -EINVAL;
8984 			}
8985 		}
8986 		break;
8987 	case BPF_FUNC_dynptr_data:
8988 	{
8989 		struct bpf_reg_state *reg;
8990 		int id, ref_obj_id;
8991 
8992 		reg = get_dynptr_arg_reg(env, fn, regs);
8993 		if (!reg)
8994 			return -EFAULT;
8995 
8996 
8997 		if (meta.dynptr_id) {
8998 			verbose(env, "verifier internal error: meta.dynptr_id already set\n");
8999 			return -EFAULT;
9000 		}
9001 		if (meta.ref_obj_id) {
9002 			verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
9003 			return -EFAULT;
9004 		}
9005 
9006 		id = dynptr_id(env, reg);
9007 		if (id < 0) {
9008 			verbose(env, "verifier internal error: failed to obtain dynptr id\n");
9009 			return id;
9010 		}
9011 
9012 		ref_obj_id = dynptr_ref_obj_id(env, reg);
9013 		if (ref_obj_id < 0) {
9014 			verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
9015 			return ref_obj_id;
9016 		}
9017 
9018 		meta.dynptr_id = id;
9019 		meta.ref_obj_id = ref_obj_id;
9020 
9021 		break;
9022 	}
9023 	case BPF_FUNC_dynptr_write:
9024 	{
9025 		enum bpf_dynptr_type dynptr_type;
9026 		struct bpf_reg_state *reg;
9027 
9028 		reg = get_dynptr_arg_reg(env, fn, regs);
9029 		if (!reg)
9030 			return -EFAULT;
9031 
9032 		dynptr_type = dynptr_get_type(env, reg);
9033 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
9034 			return -EFAULT;
9035 
9036 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
9037 			/* this will trigger clear_all_pkt_pointers(), which will
9038 			 * invalidate all dynptr slices associated with the skb
9039 			 */
9040 			changes_data = true;
9041 
9042 		break;
9043 	}
9044 	case BPF_FUNC_user_ringbuf_drain:
9045 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9046 					set_user_ringbuf_callback_state);
9047 		break;
9048 	}
9049 
9050 	if (err)
9051 		return err;
9052 
9053 	/* reset caller saved regs */
9054 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9055 		mark_reg_not_init(env, regs, caller_saved[i]);
9056 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9057 	}
9058 
9059 	/* helper call returns 64-bit value. */
9060 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9061 
9062 	/* update return register (already marked as written above) */
9063 	ret_type = fn->ret_type;
9064 	ret_flag = type_flag(ret_type);
9065 
9066 	switch (base_type(ret_type)) {
9067 	case RET_INTEGER:
9068 		/* sets type to SCALAR_VALUE */
9069 		mark_reg_unknown(env, regs, BPF_REG_0);
9070 		break;
9071 	case RET_VOID:
9072 		regs[BPF_REG_0].type = NOT_INIT;
9073 		break;
9074 	case RET_PTR_TO_MAP_VALUE:
9075 		/* There is no offset yet applied, variable or fixed */
9076 		mark_reg_known_zero(env, regs, BPF_REG_0);
9077 		/* remember map_ptr, so that check_map_access()
9078 		 * can check 'value_size' boundary of memory access
9079 		 * to map element returned from bpf_map_lookup_elem()
9080 		 */
9081 		if (meta.map_ptr == NULL) {
9082 			verbose(env,
9083 				"kernel subsystem misconfigured verifier\n");
9084 			return -EINVAL;
9085 		}
9086 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
9087 		regs[BPF_REG_0].map_uid = meta.map_uid;
9088 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
9089 		if (!type_may_be_null(ret_type) &&
9090 		    btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
9091 			regs[BPF_REG_0].id = ++env->id_gen;
9092 		}
9093 		break;
9094 	case RET_PTR_TO_SOCKET:
9095 		mark_reg_known_zero(env, regs, BPF_REG_0);
9096 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
9097 		break;
9098 	case RET_PTR_TO_SOCK_COMMON:
9099 		mark_reg_known_zero(env, regs, BPF_REG_0);
9100 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
9101 		break;
9102 	case RET_PTR_TO_TCP_SOCK:
9103 		mark_reg_known_zero(env, regs, BPF_REG_0);
9104 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
9105 		break;
9106 	case RET_PTR_TO_MEM:
9107 		mark_reg_known_zero(env, regs, BPF_REG_0);
9108 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
9109 		regs[BPF_REG_0].mem_size = meta.mem_size;
9110 		break;
9111 	case RET_PTR_TO_MEM_OR_BTF_ID:
9112 	{
9113 		const struct btf_type *t;
9114 
9115 		mark_reg_known_zero(env, regs, BPF_REG_0);
9116 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
9117 		if (!btf_type_is_struct(t)) {
9118 			u32 tsize;
9119 			const struct btf_type *ret;
9120 			const char *tname;
9121 
9122 			/* resolve the type size of ksym. */
9123 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
9124 			if (IS_ERR(ret)) {
9125 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
9126 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
9127 					tname, PTR_ERR(ret));
9128 				return -EINVAL;
9129 			}
9130 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
9131 			regs[BPF_REG_0].mem_size = tsize;
9132 		} else {
9133 			/* MEM_RDONLY may be carried from ret_flag, but it
9134 			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
9135 			 * it will confuse the check of PTR_TO_BTF_ID in
9136 			 * check_mem_access().
9137 			 */
9138 			ret_flag &= ~MEM_RDONLY;
9139 
9140 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
9141 			regs[BPF_REG_0].btf = meta.ret_btf;
9142 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
9143 		}
9144 		break;
9145 	}
9146 	case RET_PTR_TO_BTF_ID:
9147 	{
9148 		struct btf *ret_btf;
9149 		int ret_btf_id;
9150 
9151 		mark_reg_known_zero(env, regs, BPF_REG_0);
9152 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
9153 		if (func_id == BPF_FUNC_kptr_xchg) {
9154 			ret_btf = meta.kptr_field->kptr.btf;
9155 			ret_btf_id = meta.kptr_field->kptr.btf_id;
9156 			if (!btf_is_kernel(ret_btf))
9157 				regs[BPF_REG_0].type |= MEM_ALLOC;
9158 		} else {
9159 			if (fn->ret_btf_id == BPF_PTR_POISON) {
9160 				verbose(env, "verifier internal error:");
9161 				verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
9162 					func_id_name(func_id));
9163 				return -EINVAL;
9164 			}
9165 			ret_btf = btf_vmlinux;
9166 			ret_btf_id = *fn->ret_btf_id;
9167 		}
9168 		if (ret_btf_id == 0) {
9169 			verbose(env, "invalid return type %u of func %s#%d\n",
9170 				base_type(ret_type), func_id_name(func_id),
9171 				func_id);
9172 			return -EINVAL;
9173 		}
9174 		regs[BPF_REG_0].btf = ret_btf;
9175 		regs[BPF_REG_0].btf_id = ret_btf_id;
9176 		break;
9177 	}
9178 	default:
9179 		verbose(env, "unknown return type %u of func %s#%d\n",
9180 			base_type(ret_type), func_id_name(func_id), func_id);
9181 		return -EINVAL;
9182 	}
9183 
9184 	if (type_may_be_null(regs[BPF_REG_0].type))
9185 		regs[BPF_REG_0].id = ++env->id_gen;
9186 
9187 	if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
9188 		verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
9189 			func_id_name(func_id), func_id);
9190 		return -EFAULT;
9191 	}
9192 
9193 	if (is_dynptr_ref_function(func_id))
9194 		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
9195 
9196 	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
9197 		/* For release_reference() */
9198 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
9199 	} else if (is_acquire_function(func_id, meta.map_ptr)) {
9200 		int id = acquire_reference_state(env, insn_idx);
9201 
9202 		if (id < 0)
9203 			return id;
9204 		/* For mark_ptr_or_null_reg() */
9205 		regs[BPF_REG_0].id = id;
9206 		/* For release_reference() */
9207 		regs[BPF_REG_0].ref_obj_id = id;
9208 	}
9209 
9210 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
9211 
9212 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
9213 	if (err)
9214 		return err;
9215 
9216 	if ((func_id == BPF_FUNC_get_stack ||
9217 	     func_id == BPF_FUNC_get_task_stack) &&
9218 	    !env->prog->has_callchain_buf) {
9219 		const char *err_str;
9220 
9221 #ifdef CONFIG_PERF_EVENTS
9222 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
9223 		err_str = "cannot get callchain buffer for func %s#%d\n";
9224 #else
9225 		err = -ENOTSUPP;
9226 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
9227 #endif
9228 		if (err) {
9229 			verbose(env, err_str, func_id_name(func_id), func_id);
9230 			return err;
9231 		}
9232 
9233 		env->prog->has_callchain_buf = true;
9234 	}
9235 
9236 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
9237 		env->prog->call_get_stack = true;
9238 
9239 	if (func_id == BPF_FUNC_get_func_ip) {
9240 		if (check_get_func_ip(env))
9241 			return -ENOTSUPP;
9242 		env->prog->call_get_func_ip = true;
9243 	}
9244 
9245 	if (changes_data)
9246 		clear_all_pkt_pointers(env);
9247 	return 0;
9248 }
9249 
9250 /* mark_btf_func_reg_size() is used when the reg size is determined by
9251  * the BTF func_proto's return value size and argument.
9252  */
9253 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
9254 				   size_t reg_size)
9255 {
9256 	struct bpf_reg_state *reg = &cur_regs(env)[regno];
9257 
9258 	if (regno == BPF_REG_0) {
9259 		/* Function return value */
9260 		reg->live |= REG_LIVE_WRITTEN;
9261 		reg->subreg_def = reg_size == sizeof(u64) ?
9262 			DEF_NOT_SUBREG : env->insn_idx + 1;
9263 	} else {
9264 		/* Function argument */
9265 		if (reg_size == sizeof(u64)) {
9266 			mark_insn_zext(env, reg);
9267 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
9268 		} else {
9269 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
9270 		}
9271 	}
9272 }
9273 
9274 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
9275 {
9276 	return meta->kfunc_flags & KF_ACQUIRE;
9277 }
9278 
9279 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
9280 {
9281 	return meta->kfunc_flags & KF_RET_NULL;
9282 }
9283 
9284 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
9285 {
9286 	return meta->kfunc_flags & KF_RELEASE;
9287 }
9288 
9289 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
9290 {
9291 	return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
9292 }
9293 
9294 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
9295 {
9296 	return meta->kfunc_flags & KF_SLEEPABLE;
9297 }
9298 
9299 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
9300 {
9301 	return meta->kfunc_flags & KF_DESTRUCTIVE;
9302 }
9303 
9304 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
9305 {
9306 	return meta->kfunc_flags & KF_RCU;
9307 }
9308 
9309 static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
9310 {
9311 	return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
9312 }
9313 
9314 static bool __kfunc_param_match_suffix(const struct btf *btf,
9315 				       const struct btf_param *arg,
9316 				       const char *suffix)
9317 {
9318 	int suffix_len = strlen(suffix), len;
9319 	const char *param_name;
9320 
9321 	/* In the future, this can be ported to use BTF tagging */
9322 	param_name = btf_name_by_offset(btf, arg->name_off);
9323 	if (str_is_empty(param_name))
9324 		return false;
9325 	len = strlen(param_name);
9326 	if (len < suffix_len)
9327 		return false;
9328 	param_name += len - suffix_len;
9329 	return !strncmp(param_name, suffix, suffix_len);
9330 }
9331 
9332 static bool is_kfunc_arg_mem_size(const struct btf *btf,
9333 				  const struct btf_param *arg,
9334 				  const struct bpf_reg_state *reg)
9335 {
9336 	const struct btf_type *t;
9337 
9338 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
9339 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
9340 		return false;
9341 
9342 	return __kfunc_param_match_suffix(btf, arg, "__sz");
9343 }
9344 
9345 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
9346 					const struct btf_param *arg,
9347 					const struct bpf_reg_state *reg)
9348 {
9349 	const struct btf_type *t;
9350 
9351 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
9352 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
9353 		return false;
9354 
9355 	return __kfunc_param_match_suffix(btf, arg, "__szk");
9356 }
9357 
9358 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
9359 {
9360 	return __kfunc_param_match_suffix(btf, arg, "__k");
9361 }
9362 
9363 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
9364 {
9365 	return __kfunc_param_match_suffix(btf, arg, "__ign");
9366 }
9367 
9368 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
9369 {
9370 	return __kfunc_param_match_suffix(btf, arg, "__alloc");
9371 }
9372 
9373 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
9374 {
9375 	return __kfunc_param_match_suffix(btf, arg, "__uninit");
9376 }
9377 
9378 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
9379 					  const struct btf_param *arg,
9380 					  const char *name)
9381 {
9382 	int len, target_len = strlen(name);
9383 	const char *param_name;
9384 
9385 	param_name = btf_name_by_offset(btf, arg->name_off);
9386 	if (str_is_empty(param_name))
9387 		return false;
9388 	len = strlen(param_name);
9389 	if (len != target_len)
9390 		return false;
9391 	if (strcmp(param_name, name))
9392 		return false;
9393 
9394 	return true;
9395 }
9396 
9397 enum {
9398 	KF_ARG_DYNPTR_ID,
9399 	KF_ARG_LIST_HEAD_ID,
9400 	KF_ARG_LIST_NODE_ID,
9401 	KF_ARG_RB_ROOT_ID,
9402 	KF_ARG_RB_NODE_ID,
9403 };
9404 
9405 BTF_ID_LIST(kf_arg_btf_ids)
9406 BTF_ID(struct, bpf_dynptr_kern)
9407 BTF_ID(struct, bpf_list_head)
9408 BTF_ID(struct, bpf_list_node)
9409 BTF_ID(struct, bpf_rb_root)
9410 BTF_ID(struct, bpf_rb_node)
9411 
9412 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
9413 				    const struct btf_param *arg, int type)
9414 {
9415 	const struct btf_type *t;
9416 	u32 res_id;
9417 
9418 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
9419 	if (!t)
9420 		return false;
9421 	if (!btf_type_is_ptr(t))
9422 		return false;
9423 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
9424 	if (!t)
9425 		return false;
9426 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
9427 }
9428 
9429 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
9430 {
9431 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
9432 }
9433 
9434 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
9435 {
9436 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
9437 }
9438 
9439 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
9440 {
9441 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
9442 }
9443 
9444 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
9445 {
9446 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
9447 }
9448 
9449 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
9450 {
9451 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
9452 }
9453 
9454 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
9455 				  const struct btf_param *arg)
9456 {
9457 	const struct btf_type *t;
9458 
9459 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
9460 	if (!t)
9461 		return false;
9462 
9463 	return true;
9464 }
9465 
9466 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
9467 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
9468 					const struct btf *btf,
9469 					const struct btf_type *t, int rec)
9470 {
9471 	const struct btf_type *member_type;
9472 	const struct btf_member *member;
9473 	u32 i;
9474 
9475 	if (!btf_type_is_struct(t))
9476 		return false;
9477 
9478 	for_each_member(i, t, member) {
9479 		const struct btf_array *array;
9480 
9481 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
9482 		if (btf_type_is_struct(member_type)) {
9483 			if (rec >= 3) {
9484 				verbose(env, "max struct nesting depth exceeded\n");
9485 				return false;
9486 			}
9487 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
9488 				return false;
9489 			continue;
9490 		}
9491 		if (btf_type_is_array(member_type)) {
9492 			array = btf_array(member_type);
9493 			if (!array->nelems)
9494 				return false;
9495 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
9496 			if (!btf_type_is_scalar(member_type))
9497 				return false;
9498 			continue;
9499 		}
9500 		if (!btf_type_is_scalar(member_type))
9501 			return false;
9502 	}
9503 	return true;
9504 }
9505 
9506 
9507 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
9508 #ifdef CONFIG_NET
9509 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
9510 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
9511 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
9512 #endif
9513 };
9514 
9515 enum kfunc_ptr_arg_type {
9516 	KF_ARG_PTR_TO_CTX,
9517 	KF_ARG_PTR_TO_ALLOC_BTF_ID,  /* Allocated object */
9518 	KF_ARG_PTR_TO_KPTR,	     /* PTR_TO_KPTR but type specific */
9519 	KF_ARG_PTR_TO_DYNPTR,
9520 	KF_ARG_PTR_TO_ITER,
9521 	KF_ARG_PTR_TO_LIST_HEAD,
9522 	KF_ARG_PTR_TO_LIST_NODE,
9523 	KF_ARG_PTR_TO_BTF_ID,	     /* Also covers reg2btf_ids conversions */
9524 	KF_ARG_PTR_TO_MEM,
9525 	KF_ARG_PTR_TO_MEM_SIZE,	     /* Size derived from next argument, skip it */
9526 	KF_ARG_PTR_TO_CALLBACK,
9527 	KF_ARG_PTR_TO_RB_ROOT,
9528 	KF_ARG_PTR_TO_RB_NODE,
9529 };
9530 
9531 enum special_kfunc_type {
9532 	KF_bpf_obj_new_impl,
9533 	KF_bpf_obj_drop_impl,
9534 	KF_bpf_list_push_front,
9535 	KF_bpf_list_push_back,
9536 	KF_bpf_list_pop_front,
9537 	KF_bpf_list_pop_back,
9538 	KF_bpf_cast_to_kern_ctx,
9539 	KF_bpf_rdonly_cast,
9540 	KF_bpf_rcu_read_lock,
9541 	KF_bpf_rcu_read_unlock,
9542 	KF_bpf_rbtree_remove,
9543 	KF_bpf_rbtree_add,
9544 	KF_bpf_rbtree_first,
9545 	KF_bpf_dynptr_from_skb,
9546 	KF_bpf_dynptr_from_xdp,
9547 	KF_bpf_dynptr_slice,
9548 	KF_bpf_dynptr_slice_rdwr,
9549 };
9550 
9551 BTF_SET_START(special_kfunc_set)
9552 BTF_ID(func, bpf_obj_new_impl)
9553 BTF_ID(func, bpf_obj_drop_impl)
9554 BTF_ID(func, bpf_list_push_front)
9555 BTF_ID(func, bpf_list_push_back)
9556 BTF_ID(func, bpf_list_pop_front)
9557 BTF_ID(func, bpf_list_pop_back)
9558 BTF_ID(func, bpf_cast_to_kern_ctx)
9559 BTF_ID(func, bpf_rdonly_cast)
9560 BTF_ID(func, bpf_rbtree_remove)
9561 BTF_ID(func, bpf_rbtree_add)
9562 BTF_ID(func, bpf_rbtree_first)
9563 BTF_ID(func, bpf_dynptr_from_skb)
9564 BTF_ID(func, bpf_dynptr_from_xdp)
9565 BTF_ID(func, bpf_dynptr_slice)
9566 BTF_ID(func, bpf_dynptr_slice_rdwr)
9567 BTF_SET_END(special_kfunc_set)
9568 
9569 BTF_ID_LIST(special_kfunc_list)
9570 BTF_ID(func, bpf_obj_new_impl)
9571 BTF_ID(func, bpf_obj_drop_impl)
9572 BTF_ID(func, bpf_list_push_front)
9573 BTF_ID(func, bpf_list_push_back)
9574 BTF_ID(func, bpf_list_pop_front)
9575 BTF_ID(func, bpf_list_pop_back)
9576 BTF_ID(func, bpf_cast_to_kern_ctx)
9577 BTF_ID(func, bpf_rdonly_cast)
9578 BTF_ID(func, bpf_rcu_read_lock)
9579 BTF_ID(func, bpf_rcu_read_unlock)
9580 BTF_ID(func, bpf_rbtree_remove)
9581 BTF_ID(func, bpf_rbtree_add)
9582 BTF_ID(func, bpf_rbtree_first)
9583 BTF_ID(func, bpf_dynptr_from_skb)
9584 BTF_ID(func, bpf_dynptr_from_xdp)
9585 BTF_ID(func, bpf_dynptr_slice)
9586 BTF_ID(func, bpf_dynptr_slice_rdwr)
9587 
9588 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
9589 {
9590 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
9591 }
9592 
9593 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
9594 {
9595 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
9596 }
9597 
9598 static enum kfunc_ptr_arg_type
9599 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
9600 		       struct bpf_kfunc_call_arg_meta *meta,
9601 		       const struct btf_type *t, const struct btf_type *ref_t,
9602 		       const char *ref_tname, const struct btf_param *args,
9603 		       int argno, int nargs)
9604 {
9605 	u32 regno = argno + 1;
9606 	struct bpf_reg_state *regs = cur_regs(env);
9607 	struct bpf_reg_state *reg = &regs[regno];
9608 	bool arg_mem_size = false;
9609 
9610 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
9611 		return KF_ARG_PTR_TO_CTX;
9612 
9613 	/* In this function, we verify the kfunc's BTF as per the argument type,
9614 	 * leaving the rest of the verification with respect to the register
9615 	 * type to our caller. When a set of conditions hold in the BTF type of
9616 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
9617 	 */
9618 	if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
9619 		return KF_ARG_PTR_TO_CTX;
9620 
9621 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
9622 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
9623 
9624 	if (is_kfunc_arg_kptr_get(meta, argno)) {
9625 		if (!btf_type_is_ptr(ref_t)) {
9626 			verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
9627 			return -EINVAL;
9628 		}
9629 		ref_t = btf_type_by_id(meta->btf, ref_t->type);
9630 		ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
9631 		if (!btf_type_is_struct(ref_t)) {
9632 			verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
9633 				meta->func_name, btf_type_str(ref_t), ref_tname);
9634 			return -EINVAL;
9635 		}
9636 		return KF_ARG_PTR_TO_KPTR;
9637 	}
9638 
9639 	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
9640 		return KF_ARG_PTR_TO_DYNPTR;
9641 
9642 	if (is_kfunc_arg_iter(meta, argno))
9643 		return KF_ARG_PTR_TO_ITER;
9644 
9645 	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
9646 		return KF_ARG_PTR_TO_LIST_HEAD;
9647 
9648 	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
9649 		return KF_ARG_PTR_TO_LIST_NODE;
9650 
9651 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
9652 		return KF_ARG_PTR_TO_RB_ROOT;
9653 
9654 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
9655 		return KF_ARG_PTR_TO_RB_NODE;
9656 
9657 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
9658 		if (!btf_type_is_struct(ref_t)) {
9659 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
9660 				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9661 			return -EINVAL;
9662 		}
9663 		return KF_ARG_PTR_TO_BTF_ID;
9664 	}
9665 
9666 	if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
9667 		return KF_ARG_PTR_TO_CALLBACK;
9668 
9669 
9670 	if (argno + 1 < nargs &&
9671 	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
9672 	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
9673 		arg_mem_size = true;
9674 
9675 	/* This is the catch all argument type of register types supported by
9676 	 * check_helper_mem_access. However, we only allow when argument type is
9677 	 * pointer to scalar, or struct composed (recursively) of scalars. When
9678 	 * arg_mem_size is true, the pointer can be void *.
9679 	 */
9680 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
9681 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
9682 		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
9683 			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
9684 		return -EINVAL;
9685 	}
9686 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
9687 }
9688 
9689 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
9690 					struct bpf_reg_state *reg,
9691 					const struct btf_type *ref_t,
9692 					const char *ref_tname, u32 ref_id,
9693 					struct bpf_kfunc_call_arg_meta *meta,
9694 					int argno)
9695 {
9696 	const struct btf_type *reg_ref_t;
9697 	bool strict_type_match = false;
9698 	const struct btf *reg_btf;
9699 	const char *reg_ref_tname;
9700 	u32 reg_ref_id;
9701 
9702 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
9703 		reg_btf = reg->btf;
9704 		reg_ref_id = reg->btf_id;
9705 	} else {
9706 		reg_btf = btf_vmlinux;
9707 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
9708 	}
9709 
9710 	/* Enforce strict type matching for calls to kfuncs that are acquiring
9711 	 * or releasing a reference, or are no-cast aliases. We do _not_
9712 	 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
9713 	 * as we want to enable BPF programs to pass types that are bitwise
9714 	 * equivalent without forcing them to explicitly cast with something
9715 	 * like bpf_cast_to_kern_ctx().
9716 	 *
9717 	 * For example, say we had a type like the following:
9718 	 *
9719 	 * struct bpf_cpumask {
9720 	 *	cpumask_t cpumask;
9721 	 *	refcount_t usage;
9722 	 * };
9723 	 *
9724 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
9725 	 * to a struct cpumask, so it would be safe to pass a struct
9726 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
9727 	 *
9728 	 * The philosophy here is similar to how we allow scalars of different
9729 	 * types to be passed to kfuncs as long as the size is the same. The
9730 	 * only difference here is that we're simply allowing
9731 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
9732 	 * resolve types.
9733 	 */
9734 	if (is_kfunc_acquire(meta) ||
9735 	    (is_kfunc_release(meta) && reg->ref_obj_id) ||
9736 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
9737 		strict_type_match = true;
9738 
9739 	WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
9740 
9741 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
9742 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
9743 	if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
9744 		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
9745 			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
9746 			btf_type_str(reg_ref_t), reg_ref_tname);
9747 		return -EINVAL;
9748 	}
9749 	return 0;
9750 }
9751 
9752 static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
9753 				      struct bpf_reg_state *reg,
9754 				      const struct btf_type *ref_t,
9755 				      const char *ref_tname,
9756 				      struct bpf_kfunc_call_arg_meta *meta,
9757 				      int argno)
9758 {
9759 	struct btf_field *kptr_field;
9760 
9761 	/* check_func_arg_reg_off allows var_off for
9762 	 * PTR_TO_MAP_VALUE, but we need fixed offset to find
9763 	 * off_desc.
9764 	 */
9765 	if (!tnum_is_const(reg->var_off)) {
9766 		verbose(env, "arg#0 must have constant offset\n");
9767 		return -EINVAL;
9768 	}
9769 
9770 	kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
9771 	if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
9772 		verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
9773 			reg->off + reg->var_off.value);
9774 		return -EINVAL;
9775 	}
9776 
9777 	if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
9778 				  kptr_field->kptr.btf_id, true)) {
9779 		verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
9780 			meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9781 		return -EINVAL;
9782 	}
9783 	return 0;
9784 }
9785 
9786 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9787 {
9788 	struct bpf_verifier_state *state = env->cur_state;
9789 
9790 	if (!state->active_lock.ptr) {
9791 		verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
9792 		return -EFAULT;
9793 	}
9794 
9795 	if (type_flag(reg->type) & NON_OWN_REF) {
9796 		verbose(env, "verifier internal error: NON_OWN_REF already set\n");
9797 		return -EFAULT;
9798 	}
9799 
9800 	reg->type |= NON_OWN_REF;
9801 	return 0;
9802 }
9803 
9804 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
9805 {
9806 	struct bpf_func_state *state, *unused;
9807 	struct bpf_reg_state *reg;
9808 	int i;
9809 
9810 	state = cur_func(env);
9811 
9812 	if (!ref_obj_id) {
9813 		verbose(env, "verifier internal error: ref_obj_id is zero for "
9814 			     "owning -> non-owning conversion\n");
9815 		return -EFAULT;
9816 	}
9817 
9818 	for (i = 0; i < state->acquired_refs; i++) {
9819 		if (state->refs[i].id != ref_obj_id)
9820 			continue;
9821 
9822 		/* Clear ref_obj_id here so release_reference doesn't clobber
9823 		 * the whole reg
9824 		 */
9825 		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9826 			if (reg->ref_obj_id == ref_obj_id) {
9827 				reg->ref_obj_id = 0;
9828 				ref_set_non_owning(env, reg);
9829 			}
9830 		}));
9831 		return 0;
9832 	}
9833 
9834 	verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
9835 	return -EFAULT;
9836 }
9837 
9838 /* Implementation details:
9839  *
9840  * Each register points to some region of memory, which we define as an
9841  * allocation. Each allocation may embed a bpf_spin_lock which protects any
9842  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
9843  * allocation. The lock and the data it protects are colocated in the same
9844  * memory region.
9845  *
9846  * Hence, everytime a register holds a pointer value pointing to such
9847  * allocation, the verifier preserves a unique reg->id for it.
9848  *
9849  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
9850  * bpf_spin_lock is called.
9851  *
9852  * To enable this, lock state in the verifier captures two values:
9853  *	active_lock.ptr = Register's type specific pointer
9854  *	active_lock.id  = A unique ID for each register pointer value
9855  *
9856  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
9857  * supported register types.
9858  *
9859  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
9860  * allocated objects is the reg->btf pointer.
9861  *
9862  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
9863  * can establish the provenance of the map value statically for each distinct
9864  * lookup into such maps. They always contain a single map value hence unique
9865  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
9866  *
9867  * So, in case of global variables, they use array maps with max_entries = 1,
9868  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
9869  * into the same map value as max_entries is 1, as described above).
9870  *
9871  * In case of inner map lookups, the inner map pointer has same map_ptr as the
9872  * outer map pointer (in verifier context), but each lookup into an inner map
9873  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
9874  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
9875  * will get different reg->id assigned to each lookup, hence different
9876  * active_lock.id.
9877  *
9878  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
9879  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
9880  * returned from bpf_obj_new. Each allocation receives a new reg->id.
9881  */
9882 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9883 {
9884 	void *ptr;
9885 	u32 id;
9886 
9887 	switch ((int)reg->type) {
9888 	case PTR_TO_MAP_VALUE:
9889 		ptr = reg->map_ptr;
9890 		break;
9891 	case PTR_TO_BTF_ID | MEM_ALLOC:
9892 		ptr = reg->btf;
9893 		break;
9894 	default:
9895 		verbose(env, "verifier internal error: unknown reg type for lock check\n");
9896 		return -EFAULT;
9897 	}
9898 	id = reg->id;
9899 
9900 	if (!env->cur_state->active_lock.ptr)
9901 		return -EINVAL;
9902 	if (env->cur_state->active_lock.ptr != ptr ||
9903 	    env->cur_state->active_lock.id != id) {
9904 		verbose(env, "held lock and object are not in the same allocation\n");
9905 		return -EINVAL;
9906 	}
9907 	return 0;
9908 }
9909 
9910 static bool is_bpf_list_api_kfunc(u32 btf_id)
9911 {
9912 	return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9913 	       btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
9914 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
9915 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
9916 }
9917 
9918 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
9919 {
9920 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
9921 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9922 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
9923 }
9924 
9925 static bool is_bpf_graph_api_kfunc(u32 btf_id)
9926 {
9927 	return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
9928 }
9929 
9930 static bool is_callback_calling_kfunc(u32 btf_id)
9931 {
9932 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
9933 }
9934 
9935 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
9936 {
9937 	return is_bpf_rbtree_api_kfunc(btf_id);
9938 }
9939 
9940 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
9941 					  enum btf_field_type head_field_type,
9942 					  u32 kfunc_btf_id)
9943 {
9944 	bool ret;
9945 
9946 	switch (head_field_type) {
9947 	case BPF_LIST_HEAD:
9948 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
9949 		break;
9950 	case BPF_RB_ROOT:
9951 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
9952 		break;
9953 	default:
9954 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
9955 			btf_field_type_name(head_field_type));
9956 		return false;
9957 	}
9958 
9959 	if (!ret)
9960 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
9961 			btf_field_type_name(head_field_type));
9962 	return ret;
9963 }
9964 
9965 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
9966 					  enum btf_field_type node_field_type,
9967 					  u32 kfunc_btf_id)
9968 {
9969 	bool ret;
9970 
9971 	switch (node_field_type) {
9972 	case BPF_LIST_NODE:
9973 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9974 		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
9975 		break;
9976 	case BPF_RB_NODE:
9977 		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9978 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
9979 		break;
9980 	default:
9981 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
9982 			btf_field_type_name(node_field_type));
9983 		return false;
9984 	}
9985 
9986 	if (!ret)
9987 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
9988 			btf_field_type_name(node_field_type));
9989 	return ret;
9990 }
9991 
9992 static int
9993 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
9994 				   struct bpf_reg_state *reg, u32 regno,
9995 				   struct bpf_kfunc_call_arg_meta *meta,
9996 				   enum btf_field_type head_field_type,
9997 				   struct btf_field **head_field)
9998 {
9999 	const char *head_type_name;
10000 	struct btf_field *field;
10001 	struct btf_record *rec;
10002 	u32 head_off;
10003 
10004 	if (meta->btf != btf_vmlinux) {
10005 		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
10006 		return -EFAULT;
10007 	}
10008 
10009 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
10010 		return -EFAULT;
10011 
10012 	head_type_name = btf_field_type_name(head_field_type);
10013 	if (!tnum_is_const(reg->var_off)) {
10014 		verbose(env,
10015 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
10016 			regno, head_type_name);
10017 		return -EINVAL;
10018 	}
10019 
10020 	rec = reg_btf_record(reg);
10021 	head_off = reg->off + reg->var_off.value;
10022 	field = btf_record_find(rec, head_off, head_field_type);
10023 	if (!field) {
10024 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
10025 		return -EINVAL;
10026 	}
10027 
10028 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
10029 	if (check_reg_allocation_locked(env, reg)) {
10030 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
10031 			rec->spin_lock_off, head_type_name);
10032 		return -EINVAL;
10033 	}
10034 
10035 	if (*head_field) {
10036 		verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
10037 		return -EFAULT;
10038 	}
10039 	*head_field = field;
10040 	return 0;
10041 }
10042 
10043 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
10044 					   struct bpf_reg_state *reg, u32 regno,
10045 					   struct bpf_kfunc_call_arg_meta *meta)
10046 {
10047 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
10048 							  &meta->arg_list_head.field);
10049 }
10050 
10051 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
10052 					     struct bpf_reg_state *reg, u32 regno,
10053 					     struct bpf_kfunc_call_arg_meta *meta)
10054 {
10055 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
10056 							  &meta->arg_rbtree_root.field);
10057 }
10058 
10059 static int
10060 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
10061 				   struct bpf_reg_state *reg, u32 regno,
10062 				   struct bpf_kfunc_call_arg_meta *meta,
10063 				   enum btf_field_type head_field_type,
10064 				   enum btf_field_type node_field_type,
10065 				   struct btf_field **node_field)
10066 {
10067 	const char *node_type_name;
10068 	const struct btf_type *et, *t;
10069 	struct btf_field *field;
10070 	u32 node_off;
10071 
10072 	if (meta->btf != btf_vmlinux) {
10073 		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
10074 		return -EFAULT;
10075 	}
10076 
10077 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
10078 		return -EFAULT;
10079 
10080 	node_type_name = btf_field_type_name(node_field_type);
10081 	if (!tnum_is_const(reg->var_off)) {
10082 		verbose(env,
10083 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
10084 			regno, node_type_name);
10085 		return -EINVAL;
10086 	}
10087 
10088 	node_off = reg->off + reg->var_off.value;
10089 	field = reg_find_field_offset(reg, node_off, node_field_type);
10090 	if (!field || field->offset != node_off) {
10091 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
10092 		return -EINVAL;
10093 	}
10094 
10095 	field = *node_field;
10096 
10097 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
10098 	t = btf_type_by_id(reg->btf, reg->btf_id);
10099 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
10100 				  field->graph_root.value_btf_id, true)) {
10101 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
10102 			"in struct %s, but arg is at offset=%d in struct %s\n",
10103 			btf_field_type_name(head_field_type),
10104 			btf_field_type_name(node_field_type),
10105 			field->graph_root.node_offset,
10106 			btf_name_by_offset(field->graph_root.btf, et->name_off),
10107 			node_off, btf_name_by_offset(reg->btf, t->name_off));
10108 		return -EINVAL;
10109 	}
10110 
10111 	if (node_off != field->graph_root.node_offset) {
10112 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
10113 			node_off, btf_field_type_name(node_field_type),
10114 			field->graph_root.node_offset,
10115 			btf_name_by_offset(field->graph_root.btf, et->name_off));
10116 		return -EINVAL;
10117 	}
10118 
10119 	return 0;
10120 }
10121 
10122 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
10123 					   struct bpf_reg_state *reg, u32 regno,
10124 					   struct bpf_kfunc_call_arg_meta *meta)
10125 {
10126 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
10127 						  BPF_LIST_HEAD, BPF_LIST_NODE,
10128 						  &meta->arg_list_head.field);
10129 }
10130 
10131 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
10132 					     struct bpf_reg_state *reg, u32 regno,
10133 					     struct bpf_kfunc_call_arg_meta *meta)
10134 {
10135 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
10136 						  BPF_RB_ROOT, BPF_RB_NODE,
10137 						  &meta->arg_rbtree_root.field);
10138 }
10139 
10140 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
10141 			    int insn_idx)
10142 {
10143 	const char *func_name = meta->func_name, *ref_tname;
10144 	const struct btf *btf = meta->btf;
10145 	const struct btf_param *args;
10146 	u32 i, nargs;
10147 	int ret;
10148 
10149 	args = (const struct btf_param *)(meta->func_proto + 1);
10150 	nargs = btf_type_vlen(meta->func_proto);
10151 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
10152 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
10153 			MAX_BPF_FUNC_REG_ARGS);
10154 		return -EINVAL;
10155 	}
10156 
10157 	/* Check that BTF function arguments match actual types that the
10158 	 * verifier sees.
10159 	 */
10160 	for (i = 0; i < nargs; i++) {
10161 		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
10162 		const struct btf_type *t, *ref_t, *resolve_ret;
10163 		enum bpf_arg_type arg_type = ARG_DONTCARE;
10164 		u32 regno = i + 1, ref_id, type_size;
10165 		bool is_ret_buf_sz = false;
10166 		int kf_arg_type;
10167 
10168 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
10169 
10170 		if (is_kfunc_arg_ignore(btf, &args[i]))
10171 			continue;
10172 
10173 		if (btf_type_is_scalar(t)) {
10174 			if (reg->type != SCALAR_VALUE) {
10175 				verbose(env, "R%d is not a scalar\n", regno);
10176 				return -EINVAL;
10177 			}
10178 
10179 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
10180 				if (meta->arg_constant.found) {
10181 					verbose(env, "verifier internal error: only one constant argument permitted\n");
10182 					return -EFAULT;
10183 				}
10184 				if (!tnum_is_const(reg->var_off)) {
10185 					verbose(env, "R%d must be a known constant\n", regno);
10186 					return -EINVAL;
10187 				}
10188 				ret = mark_chain_precision(env, regno);
10189 				if (ret < 0)
10190 					return ret;
10191 				meta->arg_constant.found = true;
10192 				meta->arg_constant.value = reg->var_off.value;
10193 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
10194 				meta->r0_rdonly = true;
10195 				is_ret_buf_sz = true;
10196 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
10197 				is_ret_buf_sz = true;
10198 			}
10199 
10200 			if (is_ret_buf_sz) {
10201 				if (meta->r0_size) {
10202 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
10203 					return -EINVAL;
10204 				}
10205 
10206 				if (!tnum_is_const(reg->var_off)) {
10207 					verbose(env, "R%d is not a const\n", regno);
10208 					return -EINVAL;
10209 				}
10210 
10211 				meta->r0_size = reg->var_off.value;
10212 				ret = mark_chain_precision(env, regno);
10213 				if (ret)
10214 					return ret;
10215 			}
10216 			continue;
10217 		}
10218 
10219 		if (!btf_type_is_ptr(t)) {
10220 			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
10221 			return -EINVAL;
10222 		}
10223 
10224 		if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
10225 		    (register_is_null(reg) || type_may_be_null(reg->type))) {
10226 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
10227 			return -EACCES;
10228 		}
10229 
10230 		if (reg->ref_obj_id) {
10231 			if (is_kfunc_release(meta) && meta->ref_obj_id) {
10232 				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
10233 					regno, reg->ref_obj_id,
10234 					meta->ref_obj_id);
10235 				return -EFAULT;
10236 			}
10237 			meta->ref_obj_id = reg->ref_obj_id;
10238 			if (is_kfunc_release(meta))
10239 				meta->release_regno = regno;
10240 		}
10241 
10242 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
10243 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
10244 
10245 		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
10246 		if (kf_arg_type < 0)
10247 			return kf_arg_type;
10248 
10249 		switch (kf_arg_type) {
10250 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
10251 		case KF_ARG_PTR_TO_BTF_ID:
10252 			if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
10253 				break;
10254 
10255 			if (!is_trusted_reg(reg)) {
10256 				if (!is_kfunc_rcu(meta)) {
10257 					verbose(env, "R%d must be referenced or trusted\n", regno);
10258 					return -EINVAL;
10259 				}
10260 				if (!is_rcu_reg(reg)) {
10261 					verbose(env, "R%d must be a rcu pointer\n", regno);
10262 					return -EINVAL;
10263 				}
10264 			}
10265 
10266 			fallthrough;
10267 		case KF_ARG_PTR_TO_CTX:
10268 			/* Trusted arguments have the same offset checks as release arguments */
10269 			arg_type |= OBJ_RELEASE;
10270 			break;
10271 		case KF_ARG_PTR_TO_KPTR:
10272 		case KF_ARG_PTR_TO_DYNPTR:
10273 		case KF_ARG_PTR_TO_ITER:
10274 		case KF_ARG_PTR_TO_LIST_HEAD:
10275 		case KF_ARG_PTR_TO_LIST_NODE:
10276 		case KF_ARG_PTR_TO_RB_ROOT:
10277 		case KF_ARG_PTR_TO_RB_NODE:
10278 		case KF_ARG_PTR_TO_MEM:
10279 		case KF_ARG_PTR_TO_MEM_SIZE:
10280 		case KF_ARG_PTR_TO_CALLBACK:
10281 			/* Trusted by default */
10282 			break;
10283 		default:
10284 			WARN_ON_ONCE(1);
10285 			return -EFAULT;
10286 		}
10287 
10288 		if (is_kfunc_release(meta) && reg->ref_obj_id)
10289 			arg_type |= OBJ_RELEASE;
10290 		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
10291 		if (ret < 0)
10292 			return ret;
10293 
10294 		switch (kf_arg_type) {
10295 		case KF_ARG_PTR_TO_CTX:
10296 			if (reg->type != PTR_TO_CTX) {
10297 				verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
10298 				return -EINVAL;
10299 			}
10300 
10301 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
10302 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
10303 				if (ret < 0)
10304 					return -EINVAL;
10305 				meta->ret_btf_id  = ret;
10306 			}
10307 			break;
10308 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
10309 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
10310 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
10311 				return -EINVAL;
10312 			}
10313 			if (!reg->ref_obj_id) {
10314 				verbose(env, "allocated object must be referenced\n");
10315 				return -EINVAL;
10316 			}
10317 			if (meta->btf == btf_vmlinux &&
10318 			    meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
10319 				meta->arg_obj_drop.btf = reg->btf;
10320 				meta->arg_obj_drop.btf_id = reg->btf_id;
10321 			}
10322 			break;
10323 		case KF_ARG_PTR_TO_KPTR:
10324 			if (reg->type != PTR_TO_MAP_VALUE) {
10325 				verbose(env, "arg#0 expected pointer to map value\n");
10326 				return -EINVAL;
10327 			}
10328 			ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
10329 			if (ret < 0)
10330 				return ret;
10331 			break;
10332 		case KF_ARG_PTR_TO_DYNPTR:
10333 		{
10334 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
10335 
10336 			if (reg->type != PTR_TO_STACK &&
10337 			    reg->type != CONST_PTR_TO_DYNPTR) {
10338 				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
10339 				return -EINVAL;
10340 			}
10341 
10342 			if (reg->type == CONST_PTR_TO_DYNPTR)
10343 				dynptr_arg_type |= MEM_RDONLY;
10344 
10345 			if (is_kfunc_arg_uninit(btf, &args[i]))
10346 				dynptr_arg_type |= MEM_UNINIT;
10347 
10348 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
10349 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
10350 			else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
10351 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
10352 
10353 			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
10354 			if (ret < 0)
10355 				return ret;
10356 
10357 			if (!(dynptr_arg_type & MEM_UNINIT)) {
10358 				int id = dynptr_id(env, reg);
10359 
10360 				if (id < 0) {
10361 					verbose(env, "verifier internal error: failed to obtain dynptr id\n");
10362 					return id;
10363 				}
10364 				meta->initialized_dynptr.id = id;
10365 				meta->initialized_dynptr.type = dynptr_get_type(env, reg);
10366 			}
10367 
10368 			break;
10369 		}
10370 		case KF_ARG_PTR_TO_ITER:
10371 			ret = process_iter_arg(env, regno, insn_idx, meta);
10372 			if (ret < 0)
10373 				return ret;
10374 			break;
10375 		case KF_ARG_PTR_TO_LIST_HEAD:
10376 			if (reg->type != PTR_TO_MAP_VALUE &&
10377 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
10378 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
10379 				return -EINVAL;
10380 			}
10381 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
10382 				verbose(env, "allocated object must be referenced\n");
10383 				return -EINVAL;
10384 			}
10385 			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
10386 			if (ret < 0)
10387 				return ret;
10388 			break;
10389 		case KF_ARG_PTR_TO_RB_ROOT:
10390 			if (reg->type != PTR_TO_MAP_VALUE &&
10391 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
10392 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
10393 				return -EINVAL;
10394 			}
10395 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
10396 				verbose(env, "allocated object must be referenced\n");
10397 				return -EINVAL;
10398 			}
10399 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
10400 			if (ret < 0)
10401 				return ret;
10402 			break;
10403 		case KF_ARG_PTR_TO_LIST_NODE:
10404 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
10405 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
10406 				return -EINVAL;
10407 			}
10408 			if (!reg->ref_obj_id) {
10409 				verbose(env, "allocated object must be referenced\n");
10410 				return -EINVAL;
10411 			}
10412 			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
10413 			if (ret < 0)
10414 				return ret;
10415 			break;
10416 		case KF_ARG_PTR_TO_RB_NODE:
10417 			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
10418 				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
10419 					verbose(env, "rbtree_remove node input must be non-owning ref\n");
10420 					return -EINVAL;
10421 				}
10422 				if (in_rbtree_lock_required_cb(env)) {
10423 					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
10424 					return -EINVAL;
10425 				}
10426 			} else {
10427 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
10428 					verbose(env, "arg#%d expected pointer to allocated object\n", i);
10429 					return -EINVAL;
10430 				}
10431 				if (!reg->ref_obj_id) {
10432 					verbose(env, "allocated object must be referenced\n");
10433 					return -EINVAL;
10434 				}
10435 			}
10436 
10437 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
10438 			if (ret < 0)
10439 				return ret;
10440 			break;
10441 		case KF_ARG_PTR_TO_BTF_ID:
10442 			/* Only base_type is checked, further checks are done here */
10443 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
10444 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
10445 			    !reg2btf_ids[base_type(reg->type)]) {
10446 				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
10447 				verbose(env, "expected %s or socket\n",
10448 					reg_type_str(env, base_type(reg->type) |
10449 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
10450 				return -EINVAL;
10451 			}
10452 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
10453 			if (ret < 0)
10454 				return ret;
10455 			break;
10456 		case KF_ARG_PTR_TO_MEM:
10457 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
10458 			if (IS_ERR(resolve_ret)) {
10459 				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
10460 					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
10461 				return -EINVAL;
10462 			}
10463 			ret = check_mem_reg(env, reg, regno, type_size);
10464 			if (ret < 0)
10465 				return ret;
10466 			break;
10467 		case KF_ARG_PTR_TO_MEM_SIZE:
10468 		{
10469 			struct bpf_reg_state *size_reg = &regs[regno + 1];
10470 			const struct btf_param *size_arg = &args[i + 1];
10471 
10472 			ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
10473 			if (ret < 0) {
10474 				verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
10475 				return ret;
10476 			}
10477 
10478 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
10479 				if (meta->arg_constant.found) {
10480 					verbose(env, "verifier internal error: only one constant argument permitted\n");
10481 					return -EFAULT;
10482 				}
10483 				if (!tnum_is_const(size_reg->var_off)) {
10484 					verbose(env, "R%d must be a known constant\n", regno + 1);
10485 					return -EINVAL;
10486 				}
10487 				meta->arg_constant.found = true;
10488 				meta->arg_constant.value = size_reg->var_off.value;
10489 			}
10490 
10491 			/* Skip next '__sz' or '__szk' argument */
10492 			i++;
10493 			break;
10494 		}
10495 		case KF_ARG_PTR_TO_CALLBACK:
10496 			meta->subprogno = reg->subprogno;
10497 			break;
10498 		}
10499 	}
10500 
10501 	if (is_kfunc_release(meta) && !meta->release_regno) {
10502 		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
10503 			func_name);
10504 		return -EINVAL;
10505 	}
10506 
10507 	return 0;
10508 }
10509 
10510 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
10511 			    struct bpf_insn *insn,
10512 			    struct bpf_kfunc_call_arg_meta *meta,
10513 			    const char **kfunc_name)
10514 {
10515 	const struct btf_type *func, *func_proto;
10516 	u32 func_id, *kfunc_flags;
10517 	const char *func_name;
10518 	struct btf *desc_btf;
10519 
10520 	if (kfunc_name)
10521 		*kfunc_name = NULL;
10522 
10523 	if (!insn->imm)
10524 		return -EINVAL;
10525 
10526 	desc_btf = find_kfunc_desc_btf(env, insn->off);
10527 	if (IS_ERR(desc_btf))
10528 		return PTR_ERR(desc_btf);
10529 
10530 	func_id = insn->imm;
10531 	func = btf_type_by_id(desc_btf, func_id);
10532 	func_name = btf_name_by_offset(desc_btf, func->name_off);
10533 	if (kfunc_name)
10534 		*kfunc_name = func_name;
10535 	func_proto = btf_type_by_id(desc_btf, func->type);
10536 
10537 	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
10538 	if (!kfunc_flags) {
10539 		return -EACCES;
10540 	}
10541 
10542 	memset(meta, 0, sizeof(*meta));
10543 	meta->btf = desc_btf;
10544 	meta->func_id = func_id;
10545 	meta->kfunc_flags = *kfunc_flags;
10546 	meta->func_proto = func_proto;
10547 	meta->func_name = func_name;
10548 
10549 	return 0;
10550 }
10551 
10552 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10553 			    int *insn_idx_p)
10554 {
10555 	const struct btf_type *t, *ptr_type;
10556 	u32 i, nargs, ptr_type_id, release_ref_obj_id;
10557 	struct bpf_reg_state *regs = cur_regs(env);
10558 	const char *func_name, *ptr_type_name;
10559 	bool sleepable, rcu_lock, rcu_unlock;
10560 	struct bpf_kfunc_call_arg_meta meta;
10561 	struct bpf_insn_aux_data *insn_aux;
10562 	int err, insn_idx = *insn_idx_p;
10563 	const struct btf_param *args;
10564 	const struct btf_type *ret_t;
10565 	struct btf *desc_btf;
10566 
10567 	/* skip for now, but return error when we find this in fixup_kfunc_call */
10568 	if (!insn->imm)
10569 		return 0;
10570 
10571 	err = fetch_kfunc_meta(env, insn, &meta, &func_name);
10572 	if (err == -EACCES && func_name)
10573 		verbose(env, "calling kernel function %s is not allowed\n", func_name);
10574 	if (err)
10575 		return err;
10576 	desc_btf = meta.btf;
10577 	insn_aux = &env->insn_aux_data[insn_idx];
10578 
10579 	insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
10580 
10581 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
10582 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
10583 		return -EACCES;
10584 	}
10585 
10586 	sleepable = is_kfunc_sleepable(&meta);
10587 	if (sleepable && !env->prog->aux->sleepable) {
10588 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
10589 		return -EACCES;
10590 	}
10591 
10592 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
10593 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
10594 
10595 	if (env->cur_state->active_rcu_lock) {
10596 		struct bpf_func_state *state;
10597 		struct bpf_reg_state *reg;
10598 
10599 		if (rcu_lock) {
10600 			verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
10601 			return -EINVAL;
10602 		} else if (rcu_unlock) {
10603 			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10604 				if (reg->type & MEM_RCU) {
10605 					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
10606 					reg->type |= PTR_UNTRUSTED;
10607 				}
10608 			}));
10609 			env->cur_state->active_rcu_lock = false;
10610 		} else if (sleepable) {
10611 			verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
10612 			return -EACCES;
10613 		}
10614 	} else if (rcu_lock) {
10615 		env->cur_state->active_rcu_lock = true;
10616 	} else if (rcu_unlock) {
10617 		verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
10618 		return -EINVAL;
10619 	}
10620 
10621 	/* Check the arguments */
10622 	err = check_kfunc_args(env, &meta, insn_idx);
10623 	if (err < 0)
10624 		return err;
10625 	/* In case of release function, we get register number of refcounted
10626 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
10627 	 */
10628 	if (meta.release_regno) {
10629 		err = release_reference(env, regs[meta.release_regno].ref_obj_id);
10630 		if (err) {
10631 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10632 				func_name, meta.func_id);
10633 			return err;
10634 		}
10635 	}
10636 
10637 	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
10638 	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
10639 	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
10640 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
10641 		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
10642 		if (err) {
10643 			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
10644 				func_name, meta.func_id);
10645 			return err;
10646 		}
10647 
10648 		err = release_reference(env, release_ref_obj_id);
10649 		if (err) {
10650 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10651 				func_name, meta.func_id);
10652 			return err;
10653 		}
10654 	}
10655 
10656 	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
10657 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10658 					set_rbtree_add_callback_state);
10659 		if (err) {
10660 			verbose(env, "kfunc %s#%d failed callback verification\n",
10661 				func_name, meta.func_id);
10662 			return err;
10663 		}
10664 	}
10665 
10666 	for (i = 0; i < CALLER_SAVED_REGS; i++)
10667 		mark_reg_not_init(env, regs, caller_saved[i]);
10668 
10669 	/* Check return type */
10670 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
10671 
10672 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
10673 		/* Only exception is bpf_obj_new_impl */
10674 		if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
10675 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
10676 			return -EINVAL;
10677 		}
10678 	}
10679 
10680 	if (btf_type_is_scalar(t)) {
10681 		mark_reg_unknown(env, regs, BPF_REG_0);
10682 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
10683 	} else if (btf_type_is_ptr(t)) {
10684 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
10685 
10686 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
10687 			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
10688 				struct btf *ret_btf;
10689 				u32 ret_btf_id;
10690 
10691 				if (unlikely(!bpf_global_ma_set))
10692 					return -ENOMEM;
10693 
10694 				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
10695 					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
10696 					return -EINVAL;
10697 				}
10698 
10699 				ret_btf = env->prog->aux->btf;
10700 				ret_btf_id = meta.arg_constant.value;
10701 
10702 				/* This may be NULL due to user not supplying a BTF */
10703 				if (!ret_btf) {
10704 					verbose(env, "bpf_obj_new requires prog BTF\n");
10705 					return -EINVAL;
10706 				}
10707 
10708 				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
10709 				if (!ret_t || !__btf_type_is_struct(ret_t)) {
10710 					verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
10711 					return -EINVAL;
10712 				}
10713 
10714 				mark_reg_known_zero(env, regs, BPF_REG_0);
10715 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
10716 				regs[BPF_REG_0].btf = ret_btf;
10717 				regs[BPF_REG_0].btf_id = ret_btf_id;
10718 
10719 				insn_aux->obj_new_size = ret_t->size;
10720 				insn_aux->kptr_struct_meta =
10721 					btf_find_struct_meta(ret_btf, ret_btf_id);
10722 			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
10723 				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
10724 				struct btf_field *field = meta.arg_list_head.field;
10725 
10726 				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
10727 			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10728 				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10729 				struct btf_field *field = meta.arg_rbtree_root.field;
10730 
10731 				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
10732 			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
10733 				mark_reg_known_zero(env, regs, BPF_REG_0);
10734 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
10735 				regs[BPF_REG_0].btf = desc_btf;
10736 				regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10737 			} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
10738 				ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
10739 				if (!ret_t || !btf_type_is_struct(ret_t)) {
10740 					verbose(env,
10741 						"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
10742 					return -EINVAL;
10743 				}
10744 
10745 				mark_reg_known_zero(env, regs, BPF_REG_0);
10746 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
10747 				regs[BPF_REG_0].btf = desc_btf;
10748 				regs[BPF_REG_0].btf_id = meta.arg_constant.value;
10749 			} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
10750 				   meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
10751 				enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
10752 
10753 				mark_reg_known_zero(env, regs, BPF_REG_0);
10754 
10755 				if (!meta.arg_constant.found) {
10756 					verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
10757 					return -EFAULT;
10758 				}
10759 
10760 				regs[BPF_REG_0].mem_size = meta.arg_constant.value;
10761 
10762 				/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
10763 				regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
10764 
10765 				if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
10766 					regs[BPF_REG_0].type |= MEM_RDONLY;
10767 				} else {
10768 					/* this will set env->seen_direct_write to true */
10769 					if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
10770 						verbose(env, "the prog does not allow writes to packet data\n");
10771 						return -EINVAL;
10772 					}
10773 				}
10774 
10775 				if (!meta.initialized_dynptr.id) {
10776 					verbose(env, "verifier internal error: no dynptr id\n");
10777 					return -EFAULT;
10778 				}
10779 				regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
10780 
10781 				/* we don't need to set BPF_REG_0's ref obj id
10782 				 * because packet slices are not refcounted (see
10783 				 * dynptr_type_refcounted)
10784 				 */
10785 			} else {
10786 				verbose(env, "kernel function %s unhandled dynamic return type\n",
10787 					meta.func_name);
10788 				return -EFAULT;
10789 			}
10790 		} else if (!__btf_type_is_struct(ptr_type)) {
10791 			if (!meta.r0_size) {
10792 				__u32 sz;
10793 
10794 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
10795 					meta.r0_size = sz;
10796 					meta.r0_rdonly = true;
10797 				}
10798 			}
10799 			if (!meta.r0_size) {
10800 				ptr_type_name = btf_name_by_offset(desc_btf,
10801 								   ptr_type->name_off);
10802 				verbose(env,
10803 					"kernel function %s returns pointer type %s %s is not supported\n",
10804 					func_name,
10805 					btf_type_str(ptr_type),
10806 					ptr_type_name);
10807 				return -EINVAL;
10808 			}
10809 
10810 			mark_reg_known_zero(env, regs, BPF_REG_0);
10811 			regs[BPF_REG_0].type = PTR_TO_MEM;
10812 			regs[BPF_REG_0].mem_size = meta.r0_size;
10813 
10814 			if (meta.r0_rdonly)
10815 				regs[BPF_REG_0].type |= MEM_RDONLY;
10816 
10817 			/* Ensures we don't access the memory after a release_reference() */
10818 			if (meta.ref_obj_id)
10819 				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10820 		} else {
10821 			mark_reg_known_zero(env, regs, BPF_REG_0);
10822 			regs[BPF_REG_0].btf = desc_btf;
10823 			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
10824 			regs[BPF_REG_0].btf_id = ptr_type_id;
10825 		}
10826 
10827 		if (is_kfunc_ret_null(&meta)) {
10828 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
10829 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
10830 			regs[BPF_REG_0].id = ++env->id_gen;
10831 		}
10832 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
10833 		if (is_kfunc_acquire(&meta)) {
10834 			int id = acquire_reference_state(env, insn_idx);
10835 
10836 			if (id < 0)
10837 				return id;
10838 			if (is_kfunc_ret_null(&meta))
10839 				regs[BPF_REG_0].id = id;
10840 			regs[BPF_REG_0].ref_obj_id = id;
10841 		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10842 			ref_set_non_owning(env, &regs[BPF_REG_0]);
10843 		}
10844 
10845 		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
10846 			invalidate_non_owning_refs(env);
10847 
10848 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
10849 			regs[BPF_REG_0].id = ++env->id_gen;
10850 	} else if (btf_type_is_void(t)) {
10851 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
10852 			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
10853 				insn_aux->kptr_struct_meta =
10854 					btf_find_struct_meta(meta.arg_obj_drop.btf,
10855 							     meta.arg_obj_drop.btf_id);
10856 			}
10857 		}
10858 	}
10859 
10860 	nargs = btf_type_vlen(meta.func_proto);
10861 	args = (const struct btf_param *)(meta.func_proto + 1);
10862 	for (i = 0; i < nargs; i++) {
10863 		u32 regno = i + 1;
10864 
10865 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
10866 		if (btf_type_is_ptr(t))
10867 			mark_btf_func_reg_size(env, regno, sizeof(void *));
10868 		else
10869 			/* scalar. ensured by btf_check_kfunc_arg_match() */
10870 			mark_btf_func_reg_size(env, regno, t->size);
10871 	}
10872 
10873 	if (is_iter_next_kfunc(&meta)) {
10874 		err = process_iter_next_call(env, insn_idx, &meta);
10875 		if (err)
10876 			return err;
10877 	}
10878 
10879 	return 0;
10880 }
10881 
10882 static bool signed_add_overflows(s64 a, s64 b)
10883 {
10884 	/* Do the add in u64, where overflow is well-defined */
10885 	s64 res = (s64)((u64)a + (u64)b);
10886 
10887 	if (b < 0)
10888 		return res > a;
10889 	return res < a;
10890 }
10891 
10892 static bool signed_add32_overflows(s32 a, s32 b)
10893 {
10894 	/* Do the add in u32, where overflow is well-defined */
10895 	s32 res = (s32)((u32)a + (u32)b);
10896 
10897 	if (b < 0)
10898 		return res > a;
10899 	return res < a;
10900 }
10901 
10902 static bool signed_sub_overflows(s64 a, s64 b)
10903 {
10904 	/* Do the sub in u64, where overflow is well-defined */
10905 	s64 res = (s64)((u64)a - (u64)b);
10906 
10907 	if (b < 0)
10908 		return res < a;
10909 	return res > a;
10910 }
10911 
10912 static bool signed_sub32_overflows(s32 a, s32 b)
10913 {
10914 	/* Do the sub in u32, where overflow is well-defined */
10915 	s32 res = (s32)((u32)a - (u32)b);
10916 
10917 	if (b < 0)
10918 		return res < a;
10919 	return res > a;
10920 }
10921 
10922 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
10923 				  const struct bpf_reg_state *reg,
10924 				  enum bpf_reg_type type)
10925 {
10926 	bool known = tnum_is_const(reg->var_off);
10927 	s64 val = reg->var_off.value;
10928 	s64 smin = reg->smin_value;
10929 
10930 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
10931 		verbose(env, "math between %s pointer and %lld is not allowed\n",
10932 			reg_type_str(env, type), val);
10933 		return false;
10934 	}
10935 
10936 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
10937 		verbose(env, "%s pointer offset %d is not allowed\n",
10938 			reg_type_str(env, type), reg->off);
10939 		return false;
10940 	}
10941 
10942 	if (smin == S64_MIN) {
10943 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
10944 			reg_type_str(env, type));
10945 		return false;
10946 	}
10947 
10948 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
10949 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
10950 			smin, reg_type_str(env, type));
10951 		return false;
10952 	}
10953 
10954 	return true;
10955 }
10956 
10957 enum {
10958 	REASON_BOUNDS	= -1,
10959 	REASON_TYPE	= -2,
10960 	REASON_PATHS	= -3,
10961 	REASON_LIMIT	= -4,
10962 	REASON_STACK	= -5,
10963 };
10964 
10965 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
10966 			      u32 *alu_limit, bool mask_to_left)
10967 {
10968 	u32 max = 0, ptr_limit = 0;
10969 
10970 	switch (ptr_reg->type) {
10971 	case PTR_TO_STACK:
10972 		/* Offset 0 is out-of-bounds, but acceptable start for the
10973 		 * left direction, see BPF_REG_FP. Also, unknown scalar
10974 		 * offset where we would need to deal with min/max bounds is
10975 		 * currently prohibited for unprivileged.
10976 		 */
10977 		max = MAX_BPF_STACK + mask_to_left;
10978 		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
10979 		break;
10980 	case PTR_TO_MAP_VALUE:
10981 		max = ptr_reg->map_ptr->value_size;
10982 		ptr_limit = (mask_to_left ?
10983 			     ptr_reg->smin_value :
10984 			     ptr_reg->umax_value) + ptr_reg->off;
10985 		break;
10986 	default:
10987 		return REASON_TYPE;
10988 	}
10989 
10990 	if (ptr_limit >= max)
10991 		return REASON_LIMIT;
10992 	*alu_limit = ptr_limit;
10993 	return 0;
10994 }
10995 
10996 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
10997 				    const struct bpf_insn *insn)
10998 {
10999 	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
11000 }
11001 
11002 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
11003 				       u32 alu_state, u32 alu_limit)
11004 {
11005 	/* If we arrived here from different branches with different
11006 	 * state or limits to sanitize, then this won't work.
11007 	 */
11008 	if (aux->alu_state &&
11009 	    (aux->alu_state != alu_state ||
11010 	     aux->alu_limit != alu_limit))
11011 		return REASON_PATHS;
11012 
11013 	/* Corresponding fixup done in do_misc_fixups(). */
11014 	aux->alu_state = alu_state;
11015 	aux->alu_limit = alu_limit;
11016 	return 0;
11017 }
11018 
11019 static int sanitize_val_alu(struct bpf_verifier_env *env,
11020 			    struct bpf_insn *insn)
11021 {
11022 	struct bpf_insn_aux_data *aux = cur_aux(env);
11023 
11024 	if (can_skip_alu_sanitation(env, insn))
11025 		return 0;
11026 
11027 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
11028 }
11029 
11030 static bool sanitize_needed(u8 opcode)
11031 {
11032 	return opcode == BPF_ADD || opcode == BPF_SUB;
11033 }
11034 
11035 struct bpf_sanitize_info {
11036 	struct bpf_insn_aux_data aux;
11037 	bool mask_to_left;
11038 };
11039 
11040 static struct bpf_verifier_state *
11041 sanitize_speculative_path(struct bpf_verifier_env *env,
11042 			  const struct bpf_insn *insn,
11043 			  u32 next_idx, u32 curr_idx)
11044 {
11045 	struct bpf_verifier_state *branch;
11046 	struct bpf_reg_state *regs;
11047 
11048 	branch = push_stack(env, next_idx, curr_idx, true);
11049 	if (branch && insn) {
11050 		regs = branch->frame[branch->curframe]->regs;
11051 		if (BPF_SRC(insn->code) == BPF_K) {
11052 			mark_reg_unknown(env, regs, insn->dst_reg);
11053 		} else if (BPF_SRC(insn->code) == BPF_X) {
11054 			mark_reg_unknown(env, regs, insn->dst_reg);
11055 			mark_reg_unknown(env, regs, insn->src_reg);
11056 		}
11057 	}
11058 	return branch;
11059 }
11060 
11061 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
11062 			    struct bpf_insn *insn,
11063 			    const struct bpf_reg_state *ptr_reg,
11064 			    const struct bpf_reg_state *off_reg,
11065 			    struct bpf_reg_state *dst_reg,
11066 			    struct bpf_sanitize_info *info,
11067 			    const bool commit_window)
11068 {
11069 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
11070 	struct bpf_verifier_state *vstate = env->cur_state;
11071 	bool off_is_imm = tnum_is_const(off_reg->var_off);
11072 	bool off_is_neg = off_reg->smin_value < 0;
11073 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
11074 	u8 opcode = BPF_OP(insn->code);
11075 	u32 alu_state, alu_limit;
11076 	struct bpf_reg_state tmp;
11077 	bool ret;
11078 	int err;
11079 
11080 	if (can_skip_alu_sanitation(env, insn))
11081 		return 0;
11082 
11083 	/* We already marked aux for masking from non-speculative
11084 	 * paths, thus we got here in the first place. We only care
11085 	 * to explore bad access from here.
11086 	 */
11087 	if (vstate->speculative)
11088 		goto do_sim;
11089 
11090 	if (!commit_window) {
11091 		if (!tnum_is_const(off_reg->var_off) &&
11092 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
11093 			return REASON_BOUNDS;
11094 
11095 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
11096 				     (opcode == BPF_SUB && !off_is_neg);
11097 	}
11098 
11099 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
11100 	if (err < 0)
11101 		return err;
11102 
11103 	if (commit_window) {
11104 		/* In commit phase we narrow the masking window based on
11105 		 * the observed pointer move after the simulated operation.
11106 		 */
11107 		alu_state = info->aux.alu_state;
11108 		alu_limit = abs(info->aux.alu_limit - alu_limit);
11109 	} else {
11110 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
11111 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
11112 		alu_state |= ptr_is_dst_reg ?
11113 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
11114 
11115 		/* Limit pruning on unknown scalars to enable deep search for
11116 		 * potential masking differences from other program paths.
11117 		 */
11118 		if (!off_is_imm)
11119 			env->explore_alu_limits = true;
11120 	}
11121 
11122 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
11123 	if (err < 0)
11124 		return err;
11125 do_sim:
11126 	/* If we're in commit phase, we're done here given we already
11127 	 * pushed the truncated dst_reg into the speculative verification
11128 	 * stack.
11129 	 *
11130 	 * Also, when register is a known constant, we rewrite register-based
11131 	 * operation to immediate-based, and thus do not need masking (and as
11132 	 * a consequence, do not need to simulate the zero-truncation either).
11133 	 */
11134 	if (commit_window || off_is_imm)
11135 		return 0;
11136 
11137 	/* Simulate and find potential out-of-bounds access under
11138 	 * speculative execution from truncation as a result of
11139 	 * masking when off was not within expected range. If off
11140 	 * sits in dst, then we temporarily need to move ptr there
11141 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
11142 	 * for cases where we use K-based arithmetic in one direction
11143 	 * and truncated reg-based in the other in order to explore
11144 	 * bad access.
11145 	 */
11146 	if (!ptr_is_dst_reg) {
11147 		tmp = *dst_reg;
11148 		copy_register_state(dst_reg, ptr_reg);
11149 	}
11150 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
11151 					env->insn_idx);
11152 	if (!ptr_is_dst_reg && ret)
11153 		*dst_reg = tmp;
11154 	return !ret ? REASON_STACK : 0;
11155 }
11156 
11157 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
11158 {
11159 	struct bpf_verifier_state *vstate = env->cur_state;
11160 
11161 	/* If we simulate paths under speculation, we don't update the
11162 	 * insn as 'seen' such that when we verify unreachable paths in
11163 	 * the non-speculative domain, sanitize_dead_code() can still
11164 	 * rewrite/sanitize them.
11165 	 */
11166 	if (!vstate->speculative)
11167 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
11168 }
11169 
11170 static int sanitize_err(struct bpf_verifier_env *env,
11171 			const struct bpf_insn *insn, int reason,
11172 			const struct bpf_reg_state *off_reg,
11173 			const struct bpf_reg_state *dst_reg)
11174 {
11175 	static const char *err = "pointer arithmetic with it prohibited for !root";
11176 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
11177 	u32 dst = insn->dst_reg, src = insn->src_reg;
11178 
11179 	switch (reason) {
11180 	case REASON_BOUNDS:
11181 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
11182 			off_reg == dst_reg ? dst : src, err);
11183 		break;
11184 	case REASON_TYPE:
11185 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
11186 			off_reg == dst_reg ? src : dst, err);
11187 		break;
11188 	case REASON_PATHS:
11189 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
11190 			dst, op, err);
11191 		break;
11192 	case REASON_LIMIT:
11193 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
11194 			dst, op, err);
11195 		break;
11196 	case REASON_STACK:
11197 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
11198 			dst, err);
11199 		break;
11200 	default:
11201 		verbose(env, "verifier internal error: unknown reason (%d)\n",
11202 			reason);
11203 		break;
11204 	}
11205 
11206 	return -EACCES;
11207 }
11208 
11209 /* check that stack access falls within stack limits and that 'reg' doesn't
11210  * have a variable offset.
11211  *
11212  * Variable offset is prohibited for unprivileged mode for simplicity since it
11213  * requires corresponding support in Spectre masking for stack ALU.  See also
11214  * retrieve_ptr_limit().
11215  *
11216  *
11217  * 'off' includes 'reg->off'.
11218  */
11219 static int check_stack_access_for_ptr_arithmetic(
11220 				struct bpf_verifier_env *env,
11221 				int regno,
11222 				const struct bpf_reg_state *reg,
11223 				int off)
11224 {
11225 	if (!tnum_is_const(reg->var_off)) {
11226 		char tn_buf[48];
11227 
11228 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
11229 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
11230 			regno, tn_buf, off);
11231 		return -EACCES;
11232 	}
11233 
11234 	if (off >= 0 || off < -MAX_BPF_STACK) {
11235 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
11236 			"prohibited for !root; off=%d\n", regno, off);
11237 		return -EACCES;
11238 	}
11239 
11240 	return 0;
11241 }
11242 
11243 static int sanitize_check_bounds(struct bpf_verifier_env *env,
11244 				 const struct bpf_insn *insn,
11245 				 const struct bpf_reg_state *dst_reg)
11246 {
11247 	u32 dst = insn->dst_reg;
11248 
11249 	/* For unprivileged we require that resulting offset must be in bounds
11250 	 * in order to be able to sanitize access later on.
11251 	 */
11252 	if (env->bypass_spec_v1)
11253 		return 0;
11254 
11255 	switch (dst_reg->type) {
11256 	case PTR_TO_STACK:
11257 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
11258 					dst_reg->off + dst_reg->var_off.value))
11259 			return -EACCES;
11260 		break;
11261 	case PTR_TO_MAP_VALUE:
11262 		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
11263 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
11264 				"prohibited for !root\n", dst);
11265 			return -EACCES;
11266 		}
11267 		break;
11268 	default:
11269 		break;
11270 	}
11271 
11272 	return 0;
11273 }
11274 
11275 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
11276  * Caller should also handle BPF_MOV case separately.
11277  * If we return -EACCES, caller may want to try again treating pointer as a
11278  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
11279  */
11280 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
11281 				   struct bpf_insn *insn,
11282 				   const struct bpf_reg_state *ptr_reg,
11283 				   const struct bpf_reg_state *off_reg)
11284 {
11285 	struct bpf_verifier_state *vstate = env->cur_state;
11286 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
11287 	struct bpf_reg_state *regs = state->regs, *dst_reg;
11288 	bool known = tnum_is_const(off_reg->var_off);
11289 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
11290 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
11291 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
11292 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
11293 	struct bpf_sanitize_info info = {};
11294 	u8 opcode = BPF_OP(insn->code);
11295 	u32 dst = insn->dst_reg;
11296 	int ret;
11297 
11298 	dst_reg = &regs[dst];
11299 
11300 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
11301 	    smin_val > smax_val || umin_val > umax_val) {
11302 		/* Taint dst register if offset had invalid bounds derived from
11303 		 * e.g. dead branches.
11304 		 */
11305 		__mark_reg_unknown(env, dst_reg);
11306 		return 0;
11307 	}
11308 
11309 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
11310 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
11311 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
11312 			__mark_reg_unknown(env, dst_reg);
11313 			return 0;
11314 		}
11315 
11316 		verbose(env,
11317 			"R%d 32-bit pointer arithmetic prohibited\n",
11318 			dst);
11319 		return -EACCES;
11320 	}
11321 
11322 	if (ptr_reg->type & PTR_MAYBE_NULL) {
11323 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
11324 			dst, reg_type_str(env, ptr_reg->type));
11325 		return -EACCES;
11326 	}
11327 
11328 	switch (base_type(ptr_reg->type)) {
11329 	case CONST_PTR_TO_MAP:
11330 		/* smin_val represents the known value */
11331 		if (known && smin_val == 0 && opcode == BPF_ADD)
11332 			break;
11333 		fallthrough;
11334 	case PTR_TO_PACKET_END:
11335 	case PTR_TO_SOCKET:
11336 	case PTR_TO_SOCK_COMMON:
11337 	case PTR_TO_TCP_SOCK:
11338 	case PTR_TO_XDP_SOCK:
11339 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
11340 			dst, reg_type_str(env, ptr_reg->type));
11341 		return -EACCES;
11342 	default:
11343 		break;
11344 	}
11345 
11346 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
11347 	 * The id may be overwritten later if we create a new variable offset.
11348 	 */
11349 	dst_reg->type = ptr_reg->type;
11350 	dst_reg->id = ptr_reg->id;
11351 
11352 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
11353 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
11354 		return -EINVAL;
11355 
11356 	/* pointer types do not carry 32-bit bounds at the moment. */
11357 	__mark_reg32_unbounded(dst_reg);
11358 
11359 	if (sanitize_needed(opcode)) {
11360 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
11361 				       &info, false);
11362 		if (ret < 0)
11363 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
11364 	}
11365 
11366 	switch (opcode) {
11367 	case BPF_ADD:
11368 		/* We can take a fixed offset as long as it doesn't overflow
11369 		 * the s32 'off' field
11370 		 */
11371 		if (known && (ptr_reg->off + smin_val ==
11372 			      (s64)(s32)(ptr_reg->off + smin_val))) {
11373 			/* pointer += K.  Accumulate it into fixed offset */
11374 			dst_reg->smin_value = smin_ptr;
11375 			dst_reg->smax_value = smax_ptr;
11376 			dst_reg->umin_value = umin_ptr;
11377 			dst_reg->umax_value = umax_ptr;
11378 			dst_reg->var_off = ptr_reg->var_off;
11379 			dst_reg->off = ptr_reg->off + smin_val;
11380 			dst_reg->raw = ptr_reg->raw;
11381 			break;
11382 		}
11383 		/* A new variable offset is created.  Note that off_reg->off
11384 		 * == 0, since it's a scalar.
11385 		 * dst_reg gets the pointer type and since some positive
11386 		 * integer value was added to the pointer, give it a new 'id'
11387 		 * if it's a PTR_TO_PACKET.
11388 		 * this creates a new 'base' pointer, off_reg (variable) gets
11389 		 * added into the variable offset, and we copy the fixed offset
11390 		 * from ptr_reg.
11391 		 */
11392 		if (signed_add_overflows(smin_ptr, smin_val) ||
11393 		    signed_add_overflows(smax_ptr, smax_val)) {
11394 			dst_reg->smin_value = S64_MIN;
11395 			dst_reg->smax_value = S64_MAX;
11396 		} else {
11397 			dst_reg->smin_value = smin_ptr + smin_val;
11398 			dst_reg->smax_value = smax_ptr + smax_val;
11399 		}
11400 		if (umin_ptr + umin_val < umin_ptr ||
11401 		    umax_ptr + umax_val < umax_ptr) {
11402 			dst_reg->umin_value = 0;
11403 			dst_reg->umax_value = U64_MAX;
11404 		} else {
11405 			dst_reg->umin_value = umin_ptr + umin_val;
11406 			dst_reg->umax_value = umax_ptr + umax_val;
11407 		}
11408 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
11409 		dst_reg->off = ptr_reg->off;
11410 		dst_reg->raw = ptr_reg->raw;
11411 		if (reg_is_pkt_pointer(ptr_reg)) {
11412 			dst_reg->id = ++env->id_gen;
11413 			/* something was added to pkt_ptr, set range to zero */
11414 			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
11415 		}
11416 		break;
11417 	case BPF_SUB:
11418 		if (dst_reg == off_reg) {
11419 			/* scalar -= pointer.  Creates an unknown scalar */
11420 			verbose(env, "R%d tried to subtract pointer from scalar\n",
11421 				dst);
11422 			return -EACCES;
11423 		}
11424 		/* We don't allow subtraction from FP, because (according to
11425 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
11426 		 * be able to deal with it.
11427 		 */
11428 		if (ptr_reg->type == PTR_TO_STACK) {
11429 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
11430 				dst);
11431 			return -EACCES;
11432 		}
11433 		if (known && (ptr_reg->off - smin_val ==
11434 			      (s64)(s32)(ptr_reg->off - smin_val))) {
11435 			/* pointer -= K.  Subtract it from fixed offset */
11436 			dst_reg->smin_value = smin_ptr;
11437 			dst_reg->smax_value = smax_ptr;
11438 			dst_reg->umin_value = umin_ptr;
11439 			dst_reg->umax_value = umax_ptr;
11440 			dst_reg->var_off = ptr_reg->var_off;
11441 			dst_reg->id = ptr_reg->id;
11442 			dst_reg->off = ptr_reg->off - smin_val;
11443 			dst_reg->raw = ptr_reg->raw;
11444 			break;
11445 		}
11446 		/* A new variable offset is created.  If the subtrahend is known
11447 		 * nonnegative, then any reg->range we had before is still good.
11448 		 */
11449 		if (signed_sub_overflows(smin_ptr, smax_val) ||
11450 		    signed_sub_overflows(smax_ptr, smin_val)) {
11451 			/* Overflow possible, we know nothing */
11452 			dst_reg->smin_value = S64_MIN;
11453 			dst_reg->smax_value = S64_MAX;
11454 		} else {
11455 			dst_reg->smin_value = smin_ptr - smax_val;
11456 			dst_reg->smax_value = smax_ptr - smin_val;
11457 		}
11458 		if (umin_ptr < umax_val) {
11459 			/* Overflow possible, we know nothing */
11460 			dst_reg->umin_value = 0;
11461 			dst_reg->umax_value = U64_MAX;
11462 		} else {
11463 			/* Cannot overflow (as long as bounds are consistent) */
11464 			dst_reg->umin_value = umin_ptr - umax_val;
11465 			dst_reg->umax_value = umax_ptr - umin_val;
11466 		}
11467 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
11468 		dst_reg->off = ptr_reg->off;
11469 		dst_reg->raw = ptr_reg->raw;
11470 		if (reg_is_pkt_pointer(ptr_reg)) {
11471 			dst_reg->id = ++env->id_gen;
11472 			/* something was added to pkt_ptr, set range to zero */
11473 			if (smin_val < 0)
11474 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
11475 		}
11476 		break;
11477 	case BPF_AND:
11478 	case BPF_OR:
11479 	case BPF_XOR:
11480 		/* bitwise ops on pointers are troublesome, prohibit. */
11481 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
11482 			dst, bpf_alu_string[opcode >> 4]);
11483 		return -EACCES;
11484 	default:
11485 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
11486 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
11487 			dst, bpf_alu_string[opcode >> 4]);
11488 		return -EACCES;
11489 	}
11490 
11491 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
11492 		return -EINVAL;
11493 	reg_bounds_sync(dst_reg);
11494 	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
11495 		return -EACCES;
11496 	if (sanitize_needed(opcode)) {
11497 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
11498 				       &info, true);
11499 		if (ret < 0)
11500 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
11501 	}
11502 
11503 	return 0;
11504 }
11505 
11506 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
11507 				 struct bpf_reg_state *src_reg)
11508 {
11509 	s32 smin_val = src_reg->s32_min_value;
11510 	s32 smax_val = src_reg->s32_max_value;
11511 	u32 umin_val = src_reg->u32_min_value;
11512 	u32 umax_val = src_reg->u32_max_value;
11513 
11514 	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
11515 	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
11516 		dst_reg->s32_min_value = S32_MIN;
11517 		dst_reg->s32_max_value = S32_MAX;
11518 	} else {
11519 		dst_reg->s32_min_value += smin_val;
11520 		dst_reg->s32_max_value += smax_val;
11521 	}
11522 	if (dst_reg->u32_min_value + umin_val < umin_val ||
11523 	    dst_reg->u32_max_value + umax_val < umax_val) {
11524 		dst_reg->u32_min_value = 0;
11525 		dst_reg->u32_max_value = U32_MAX;
11526 	} else {
11527 		dst_reg->u32_min_value += umin_val;
11528 		dst_reg->u32_max_value += umax_val;
11529 	}
11530 }
11531 
11532 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
11533 			       struct bpf_reg_state *src_reg)
11534 {
11535 	s64 smin_val = src_reg->smin_value;
11536 	s64 smax_val = src_reg->smax_value;
11537 	u64 umin_val = src_reg->umin_value;
11538 	u64 umax_val = src_reg->umax_value;
11539 
11540 	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
11541 	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
11542 		dst_reg->smin_value = S64_MIN;
11543 		dst_reg->smax_value = S64_MAX;
11544 	} else {
11545 		dst_reg->smin_value += smin_val;
11546 		dst_reg->smax_value += smax_val;
11547 	}
11548 	if (dst_reg->umin_value + umin_val < umin_val ||
11549 	    dst_reg->umax_value + umax_val < umax_val) {
11550 		dst_reg->umin_value = 0;
11551 		dst_reg->umax_value = U64_MAX;
11552 	} else {
11553 		dst_reg->umin_value += umin_val;
11554 		dst_reg->umax_value += umax_val;
11555 	}
11556 }
11557 
11558 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
11559 				 struct bpf_reg_state *src_reg)
11560 {
11561 	s32 smin_val = src_reg->s32_min_value;
11562 	s32 smax_val = src_reg->s32_max_value;
11563 	u32 umin_val = src_reg->u32_min_value;
11564 	u32 umax_val = src_reg->u32_max_value;
11565 
11566 	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
11567 	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
11568 		/* Overflow possible, we know nothing */
11569 		dst_reg->s32_min_value = S32_MIN;
11570 		dst_reg->s32_max_value = S32_MAX;
11571 	} else {
11572 		dst_reg->s32_min_value -= smax_val;
11573 		dst_reg->s32_max_value -= smin_val;
11574 	}
11575 	if (dst_reg->u32_min_value < umax_val) {
11576 		/* Overflow possible, we know nothing */
11577 		dst_reg->u32_min_value = 0;
11578 		dst_reg->u32_max_value = U32_MAX;
11579 	} else {
11580 		/* Cannot overflow (as long as bounds are consistent) */
11581 		dst_reg->u32_min_value -= umax_val;
11582 		dst_reg->u32_max_value -= umin_val;
11583 	}
11584 }
11585 
11586 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
11587 			       struct bpf_reg_state *src_reg)
11588 {
11589 	s64 smin_val = src_reg->smin_value;
11590 	s64 smax_val = src_reg->smax_value;
11591 	u64 umin_val = src_reg->umin_value;
11592 	u64 umax_val = src_reg->umax_value;
11593 
11594 	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
11595 	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
11596 		/* Overflow possible, we know nothing */
11597 		dst_reg->smin_value = S64_MIN;
11598 		dst_reg->smax_value = S64_MAX;
11599 	} else {
11600 		dst_reg->smin_value -= smax_val;
11601 		dst_reg->smax_value -= smin_val;
11602 	}
11603 	if (dst_reg->umin_value < umax_val) {
11604 		/* Overflow possible, we know nothing */
11605 		dst_reg->umin_value = 0;
11606 		dst_reg->umax_value = U64_MAX;
11607 	} else {
11608 		/* Cannot overflow (as long as bounds are consistent) */
11609 		dst_reg->umin_value -= umax_val;
11610 		dst_reg->umax_value -= umin_val;
11611 	}
11612 }
11613 
11614 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
11615 				 struct bpf_reg_state *src_reg)
11616 {
11617 	s32 smin_val = src_reg->s32_min_value;
11618 	u32 umin_val = src_reg->u32_min_value;
11619 	u32 umax_val = src_reg->u32_max_value;
11620 
11621 	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
11622 		/* Ain't nobody got time to multiply that sign */
11623 		__mark_reg32_unbounded(dst_reg);
11624 		return;
11625 	}
11626 	/* Both values are positive, so we can work with unsigned and
11627 	 * copy the result to signed (unless it exceeds S32_MAX).
11628 	 */
11629 	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
11630 		/* Potential overflow, we know nothing */
11631 		__mark_reg32_unbounded(dst_reg);
11632 		return;
11633 	}
11634 	dst_reg->u32_min_value *= umin_val;
11635 	dst_reg->u32_max_value *= umax_val;
11636 	if (dst_reg->u32_max_value > S32_MAX) {
11637 		/* Overflow possible, we know nothing */
11638 		dst_reg->s32_min_value = S32_MIN;
11639 		dst_reg->s32_max_value = S32_MAX;
11640 	} else {
11641 		dst_reg->s32_min_value = dst_reg->u32_min_value;
11642 		dst_reg->s32_max_value = dst_reg->u32_max_value;
11643 	}
11644 }
11645 
11646 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
11647 			       struct bpf_reg_state *src_reg)
11648 {
11649 	s64 smin_val = src_reg->smin_value;
11650 	u64 umin_val = src_reg->umin_value;
11651 	u64 umax_val = src_reg->umax_value;
11652 
11653 	if (smin_val < 0 || dst_reg->smin_value < 0) {
11654 		/* Ain't nobody got time to multiply that sign */
11655 		__mark_reg64_unbounded(dst_reg);
11656 		return;
11657 	}
11658 	/* Both values are positive, so we can work with unsigned and
11659 	 * copy the result to signed (unless it exceeds S64_MAX).
11660 	 */
11661 	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
11662 		/* Potential overflow, we know nothing */
11663 		__mark_reg64_unbounded(dst_reg);
11664 		return;
11665 	}
11666 	dst_reg->umin_value *= umin_val;
11667 	dst_reg->umax_value *= umax_val;
11668 	if (dst_reg->umax_value > S64_MAX) {
11669 		/* Overflow possible, we know nothing */
11670 		dst_reg->smin_value = S64_MIN;
11671 		dst_reg->smax_value = S64_MAX;
11672 	} else {
11673 		dst_reg->smin_value = dst_reg->umin_value;
11674 		dst_reg->smax_value = dst_reg->umax_value;
11675 	}
11676 }
11677 
11678 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
11679 				 struct bpf_reg_state *src_reg)
11680 {
11681 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
11682 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11683 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11684 	s32 smin_val = src_reg->s32_min_value;
11685 	u32 umax_val = src_reg->u32_max_value;
11686 
11687 	if (src_known && dst_known) {
11688 		__mark_reg32_known(dst_reg, var32_off.value);
11689 		return;
11690 	}
11691 
11692 	/* We get our minimum from the var_off, since that's inherently
11693 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
11694 	 */
11695 	dst_reg->u32_min_value = var32_off.value;
11696 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
11697 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11698 		/* Lose signed bounds when ANDing negative numbers,
11699 		 * ain't nobody got time for that.
11700 		 */
11701 		dst_reg->s32_min_value = S32_MIN;
11702 		dst_reg->s32_max_value = S32_MAX;
11703 	} else {
11704 		/* ANDing two positives gives a positive, so safe to
11705 		 * cast result into s64.
11706 		 */
11707 		dst_reg->s32_min_value = dst_reg->u32_min_value;
11708 		dst_reg->s32_max_value = dst_reg->u32_max_value;
11709 	}
11710 }
11711 
11712 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
11713 			       struct bpf_reg_state *src_reg)
11714 {
11715 	bool src_known = tnum_is_const(src_reg->var_off);
11716 	bool dst_known = tnum_is_const(dst_reg->var_off);
11717 	s64 smin_val = src_reg->smin_value;
11718 	u64 umax_val = src_reg->umax_value;
11719 
11720 	if (src_known && dst_known) {
11721 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
11722 		return;
11723 	}
11724 
11725 	/* We get our minimum from the var_off, since that's inherently
11726 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
11727 	 */
11728 	dst_reg->umin_value = dst_reg->var_off.value;
11729 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
11730 	if (dst_reg->smin_value < 0 || smin_val < 0) {
11731 		/* Lose signed bounds when ANDing negative numbers,
11732 		 * ain't nobody got time for that.
11733 		 */
11734 		dst_reg->smin_value = S64_MIN;
11735 		dst_reg->smax_value = S64_MAX;
11736 	} else {
11737 		/* ANDing two positives gives a positive, so safe to
11738 		 * cast result into s64.
11739 		 */
11740 		dst_reg->smin_value = dst_reg->umin_value;
11741 		dst_reg->smax_value = dst_reg->umax_value;
11742 	}
11743 	/* We may learn something more from the var_off */
11744 	__update_reg_bounds(dst_reg);
11745 }
11746 
11747 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
11748 				struct bpf_reg_state *src_reg)
11749 {
11750 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
11751 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11752 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11753 	s32 smin_val = src_reg->s32_min_value;
11754 	u32 umin_val = src_reg->u32_min_value;
11755 
11756 	if (src_known && dst_known) {
11757 		__mark_reg32_known(dst_reg, var32_off.value);
11758 		return;
11759 	}
11760 
11761 	/* We get our maximum from the var_off, and our minimum is the
11762 	 * maximum of the operands' minima
11763 	 */
11764 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
11765 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11766 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11767 		/* Lose signed bounds when ORing negative numbers,
11768 		 * ain't nobody got time for that.
11769 		 */
11770 		dst_reg->s32_min_value = S32_MIN;
11771 		dst_reg->s32_max_value = S32_MAX;
11772 	} else {
11773 		/* ORing two positives gives a positive, so safe to
11774 		 * cast result into s64.
11775 		 */
11776 		dst_reg->s32_min_value = dst_reg->u32_min_value;
11777 		dst_reg->s32_max_value = dst_reg->u32_max_value;
11778 	}
11779 }
11780 
11781 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
11782 			      struct bpf_reg_state *src_reg)
11783 {
11784 	bool src_known = tnum_is_const(src_reg->var_off);
11785 	bool dst_known = tnum_is_const(dst_reg->var_off);
11786 	s64 smin_val = src_reg->smin_value;
11787 	u64 umin_val = src_reg->umin_value;
11788 
11789 	if (src_known && dst_known) {
11790 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
11791 		return;
11792 	}
11793 
11794 	/* We get our maximum from the var_off, and our minimum is the
11795 	 * maximum of the operands' minima
11796 	 */
11797 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
11798 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11799 	if (dst_reg->smin_value < 0 || smin_val < 0) {
11800 		/* Lose signed bounds when ORing negative numbers,
11801 		 * ain't nobody got time for that.
11802 		 */
11803 		dst_reg->smin_value = S64_MIN;
11804 		dst_reg->smax_value = S64_MAX;
11805 	} else {
11806 		/* ORing two positives gives a positive, so safe to
11807 		 * cast result into s64.
11808 		 */
11809 		dst_reg->smin_value = dst_reg->umin_value;
11810 		dst_reg->smax_value = dst_reg->umax_value;
11811 	}
11812 	/* We may learn something more from the var_off */
11813 	__update_reg_bounds(dst_reg);
11814 }
11815 
11816 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
11817 				 struct bpf_reg_state *src_reg)
11818 {
11819 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
11820 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11821 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11822 	s32 smin_val = src_reg->s32_min_value;
11823 
11824 	if (src_known && dst_known) {
11825 		__mark_reg32_known(dst_reg, var32_off.value);
11826 		return;
11827 	}
11828 
11829 	/* We get both minimum and maximum from the var32_off. */
11830 	dst_reg->u32_min_value = var32_off.value;
11831 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11832 
11833 	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
11834 		/* XORing two positive sign numbers gives a positive,
11835 		 * so safe to cast u32 result into s32.
11836 		 */
11837 		dst_reg->s32_min_value = dst_reg->u32_min_value;
11838 		dst_reg->s32_max_value = dst_reg->u32_max_value;
11839 	} else {
11840 		dst_reg->s32_min_value = S32_MIN;
11841 		dst_reg->s32_max_value = S32_MAX;
11842 	}
11843 }
11844 
11845 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
11846 			       struct bpf_reg_state *src_reg)
11847 {
11848 	bool src_known = tnum_is_const(src_reg->var_off);
11849 	bool dst_known = tnum_is_const(dst_reg->var_off);
11850 	s64 smin_val = src_reg->smin_value;
11851 
11852 	if (src_known && dst_known) {
11853 		/* dst_reg->var_off.value has been updated earlier */
11854 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
11855 		return;
11856 	}
11857 
11858 	/* We get both minimum and maximum from the var_off. */
11859 	dst_reg->umin_value = dst_reg->var_off.value;
11860 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11861 
11862 	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
11863 		/* XORing two positive sign numbers gives a positive,
11864 		 * so safe to cast u64 result into s64.
11865 		 */
11866 		dst_reg->smin_value = dst_reg->umin_value;
11867 		dst_reg->smax_value = dst_reg->umax_value;
11868 	} else {
11869 		dst_reg->smin_value = S64_MIN;
11870 		dst_reg->smax_value = S64_MAX;
11871 	}
11872 
11873 	__update_reg_bounds(dst_reg);
11874 }
11875 
11876 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11877 				   u64 umin_val, u64 umax_val)
11878 {
11879 	/* We lose all sign bit information (except what we can pick
11880 	 * up from var_off)
11881 	 */
11882 	dst_reg->s32_min_value = S32_MIN;
11883 	dst_reg->s32_max_value = S32_MAX;
11884 	/* If we might shift our top bit out, then we know nothing */
11885 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
11886 		dst_reg->u32_min_value = 0;
11887 		dst_reg->u32_max_value = U32_MAX;
11888 	} else {
11889 		dst_reg->u32_min_value <<= umin_val;
11890 		dst_reg->u32_max_value <<= umax_val;
11891 	}
11892 }
11893 
11894 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11895 				 struct bpf_reg_state *src_reg)
11896 {
11897 	u32 umax_val = src_reg->u32_max_value;
11898 	u32 umin_val = src_reg->u32_min_value;
11899 	/* u32 alu operation will zext upper bits */
11900 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
11901 
11902 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11903 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
11904 	/* Not required but being careful mark reg64 bounds as unknown so
11905 	 * that we are forced to pick them up from tnum and zext later and
11906 	 * if some path skips this step we are still safe.
11907 	 */
11908 	__mark_reg64_unbounded(dst_reg);
11909 	__update_reg32_bounds(dst_reg);
11910 }
11911 
11912 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
11913 				   u64 umin_val, u64 umax_val)
11914 {
11915 	/* Special case <<32 because it is a common compiler pattern to sign
11916 	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
11917 	 * positive we know this shift will also be positive so we can track
11918 	 * bounds correctly. Otherwise we lose all sign bit information except
11919 	 * what we can pick up from var_off. Perhaps we can generalize this
11920 	 * later to shifts of any length.
11921 	 */
11922 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
11923 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
11924 	else
11925 		dst_reg->smax_value = S64_MAX;
11926 
11927 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
11928 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
11929 	else
11930 		dst_reg->smin_value = S64_MIN;
11931 
11932 	/* If we might shift our top bit out, then we know nothing */
11933 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
11934 		dst_reg->umin_value = 0;
11935 		dst_reg->umax_value = U64_MAX;
11936 	} else {
11937 		dst_reg->umin_value <<= umin_val;
11938 		dst_reg->umax_value <<= umax_val;
11939 	}
11940 }
11941 
11942 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
11943 			       struct bpf_reg_state *src_reg)
11944 {
11945 	u64 umax_val = src_reg->umax_value;
11946 	u64 umin_val = src_reg->umin_value;
11947 
11948 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
11949 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
11950 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11951 
11952 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
11953 	/* We may learn something more from the var_off */
11954 	__update_reg_bounds(dst_reg);
11955 }
11956 
11957 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
11958 				 struct bpf_reg_state *src_reg)
11959 {
11960 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
11961 	u32 umax_val = src_reg->u32_max_value;
11962 	u32 umin_val = src_reg->u32_min_value;
11963 
11964 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
11965 	 * be negative, then either:
11966 	 * 1) src_reg might be zero, so the sign bit of the result is
11967 	 *    unknown, so we lose our signed bounds
11968 	 * 2) it's known negative, thus the unsigned bounds capture the
11969 	 *    signed bounds
11970 	 * 3) the signed bounds cross zero, so they tell us nothing
11971 	 *    about the result
11972 	 * If the value in dst_reg is known nonnegative, then again the
11973 	 * unsigned bounds capture the signed bounds.
11974 	 * Thus, in all cases it suffices to blow away our signed bounds
11975 	 * and rely on inferring new ones from the unsigned bounds and
11976 	 * var_off of the result.
11977 	 */
11978 	dst_reg->s32_min_value = S32_MIN;
11979 	dst_reg->s32_max_value = S32_MAX;
11980 
11981 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
11982 	dst_reg->u32_min_value >>= umax_val;
11983 	dst_reg->u32_max_value >>= umin_val;
11984 
11985 	__mark_reg64_unbounded(dst_reg);
11986 	__update_reg32_bounds(dst_reg);
11987 }
11988 
11989 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
11990 			       struct bpf_reg_state *src_reg)
11991 {
11992 	u64 umax_val = src_reg->umax_value;
11993 	u64 umin_val = src_reg->umin_value;
11994 
11995 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
11996 	 * be negative, then either:
11997 	 * 1) src_reg might be zero, so the sign bit of the result is
11998 	 *    unknown, so we lose our signed bounds
11999 	 * 2) it's known negative, thus the unsigned bounds capture the
12000 	 *    signed bounds
12001 	 * 3) the signed bounds cross zero, so they tell us nothing
12002 	 *    about the result
12003 	 * If the value in dst_reg is known nonnegative, then again the
12004 	 * unsigned bounds capture the signed bounds.
12005 	 * Thus, in all cases it suffices to blow away our signed bounds
12006 	 * and rely on inferring new ones from the unsigned bounds and
12007 	 * var_off of the result.
12008 	 */
12009 	dst_reg->smin_value = S64_MIN;
12010 	dst_reg->smax_value = S64_MAX;
12011 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
12012 	dst_reg->umin_value >>= umax_val;
12013 	dst_reg->umax_value >>= umin_val;
12014 
12015 	/* Its not easy to operate on alu32 bounds here because it depends
12016 	 * on bits being shifted in. Take easy way out and mark unbounded
12017 	 * so we can recalculate later from tnum.
12018 	 */
12019 	__mark_reg32_unbounded(dst_reg);
12020 	__update_reg_bounds(dst_reg);
12021 }
12022 
12023 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
12024 				  struct bpf_reg_state *src_reg)
12025 {
12026 	u64 umin_val = src_reg->u32_min_value;
12027 
12028 	/* Upon reaching here, src_known is true and
12029 	 * umax_val is equal to umin_val.
12030 	 */
12031 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
12032 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
12033 
12034 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
12035 
12036 	/* blow away the dst_reg umin_value/umax_value and rely on
12037 	 * dst_reg var_off to refine the result.
12038 	 */
12039 	dst_reg->u32_min_value = 0;
12040 	dst_reg->u32_max_value = U32_MAX;
12041 
12042 	__mark_reg64_unbounded(dst_reg);
12043 	__update_reg32_bounds(dst_reg);
12044 }
12045 
12046 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
12047 				struct bpf_reg_state *src_reg)
12048 {
12049 	u64 umin_val = src_reg->umin_value;
12050 
12051 	/* Upon reaching here, src_known is true and umax_val is equal
12052 	 * to umin_val.
12053 	 */
12054 	dst_reg->smin_value >>= umin_val;
12055 	dst_reg->smax_value >>= umin_val;
12056 
12057 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
12058 
12059 	/* blow away the dst_reg umin_value/umax_value and rely on
12060 	 * dst_reg var_off to refine the result.
12061 	 */
12062 	dst_reg->umin_value = 0;
12063 	dst_reg->umax_value = U64_MAX;
12064 
12065 	/* Its not easy to operate on alu32 bounds here because it depends
12066 	 * on bits being shifted in from upper 32-bits. Take easy way out
12067 	 * and mark unbounded so we can recalculate later from tnum.
12068 	 */
12069 	__mark_reg32_unbounded(dst_reg);
12070 	__update_reg_bounds(dst_reg);
12071 }
12072 
12073 /* WARNING: This function does calculations on 64-bit values, but the actual
12074  * execution may occur on 32-bit values. Therefore, things like bitshifts
12075  * need extra checks in the 32-bit case.
12076  */
12077 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
12078 				      struct bpf_insn *insn,
12079 				      struct bpf_reg_state *dst_reg,
12080 				      struct bpf_reg_state src_reg)
12081 {
12082 	struct bpf_reg_state *regs = cur_regs(env);
12083 	u8 opcode = BPF_OP(insn->code);
12084 	bool src_known;
12085 	s64 smin_val, smax_val;
12086 	u64 umin_val, umax_val;
12087 	s32 s32_min_val, s32_max_val;
12088 	u32 u32_min_val, u32_max_val;
12089 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
12090 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
12091 	int ret;
12092 
12093 	smin_val = src_reg.smin_value;
12094 	smax_val = src_reg.smax_value;
12095 	umin_val = src_reg.umin_value;
12096 	umax_val = src_reg.umax_value;
12097 
12098 	s32_min_val = src_reg.s32_min_value;
12099 	s32_max_val = src_reg.s32_max_value;
12100 	u32_min_val = src_reg.u32_min_value;
12101 	u32_max_val = src_reg.u32_max_value;
12102 
12103 	if (alu32) {
12104 		src_known = tnum_subreg_is_const(src_reg.var_off);
12105 		if ((src_known &&
12106 		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
12107 		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
12108 			/* Taint dst register if offset had invalid bounds
12109 			 * derived from e.g. dead branches.
12110 			 */
12111 			__mark_reg_unknown(env, dst_reg);
12112 			return 0;
12113 		}
12114 	} else {
12115 		src_known = tnum_is_const(src_reg.var_off);
12116 		if ((src_known &&
12117 		     (smin_val != smax_val || umin_val != umax_val)) ||
12118 		    smin_val > smax_val || umin_val > umax_val) {
12119 			/* Taint dst register if offset had invalid bounds
12120 			 * derived from e.g. dead branches.
12121 			 */
12122 			__mark_reg_unknown(env, dst_reg);
12123 			return 0;
12124 		}
12125 	}
12126 
12127 	if (!src_known &&
12128 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
12129 		__mark_reg_unknown(env, dst_reg);
12130 		return 0;
12131 	}
12132 
12133 	if (sanitize_needed(opcode)) {
12134 		ret = sanitize_val_alu(env, insn);
12135 		if (ret < 0)
12136 			return sanitize_err(env, insn, ret, NULL, NULL);
12137 	}
12138 
12139 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
12140 	 * There are two classes of instructions: The first class we track both
12141 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
12142 	 * greatest amount of precision when alu operations are mixed with jmp32
12143 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
12144 	 * and BPF_OR. This is possible because these ops have fairly easy to
12145 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
12146 	 * See alu32 verifier tests for examples. The second class of
12147 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
12148 	 * with regards to tracking sign/unsigned bounds because the bits may
12149 	 * cross subreg boundaries in the alu64 case. When this happens we mark
12150 	 * the reg unbounded in the subreg bound space and use the resulting
12151 	 * tnum to calculate an approximation of the sign/unsigned bounds.
12152 	 */
12153 	switch (opcode) {
12154 	case BPF_ADD:
12155 		scalar32_min_max_add(dst_reg, &src_reg);
12156 		scalar_min_max_add(dst_reg, &src_reg);
12157 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
12158 		break;
12159 	case BPF_SUB:
12160 		scalar32_min_max_sub(dst_reg, &src_reg);
12161 		scalar_min_max_sub(dst_reg, &src_reg);
12162 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
12163 		break;
12164 	case BPF_MUL:
12165 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
12166 		scalar32_min_max_mul(dst_reg, &src_reg);
12167 		scalar_min_max_mul(dst_reg, &src_reg);
12168 		break;
12169 	case BPF_AND:
12170 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
12171 		scalar32_min_max_and(dst_reg, &src_reg);
12172 		scalar_min_max_and(dst_reg, &src_reg);
12173 		break;
12174 	case BPF_OR:
12175 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
12176 		scalar32_min_max_or(dst_reg, &src_reg);
12177 		scalar_min_max_or(dst_reg, &src_reg);
12178 		break;
12179 	case BPF_XOR:
12180 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
12181 		scalar32_min_max_xor(dst_reg, &src_reg);
12182 		scalar_min_max_xor(dst_reg, &src_reg);
12183 		break;
12184 	case BPF_LSH:
12185 		if (umax_val >= insn_bitness) {
12186 			/* Shifts greater than 31 or 63 are undefined.
12187 			 * This includes shifts by a negative number.
12188 			 */
12189 			mark_reg_unknown(env, regs, insn->dst_reg);
12190 			break;
12191 		}
12192 		if (alu32)
12193 			scalar32_min_max_lsh(dst_reg, &src_reg);
12194 		else
12195 			scalar_min_max_lsh(dst_reg, &src_reg);
12196 		break;
12197 	case BPF_RSH:
12198 		if (umax_val >= insn_bitness) {
12199 			/* Shifts greater than 31 or 63 are undefined.
12200 			 * This includes shifts by a negative number.
12201 			 */
12202 			mark_reg_unknown(env, regs, insn->dst_reg);
12203 			break;
12204 		}
12205 		if (alu32)
12206 			scalar32_min_max_rsh(dst_reg, &src_reg);
12207 		else
12208 			scalar_min_max_rsh(dst_reg, &src_reg);
12209 		break;
12210 	case BPF_ARSH:
12211 		if (umax_val >= insn_bitness) {
12212 			/* Shifts greater than 31 or 63 are undefined.
12213 			 * This includes shifts by a negative number.
12214 			 */
12215 			mark_reg_unknown(env, regs, insn->dst_reg);
12216 			break;
12217 		}
12218 		if (alu32)
12219 			scalar32_min_max_arsh(dst_reg, &src_reg);
12220 		else
12221 			scalar_min_max_arsh(dst_reg, &src_reg);
12222 		break;
12223 	default:
12224 		mark_reg_unknown(env, regs, insn->dst_reg);
12225 		break;
12226 	}
12227 
12228 	/* ALU32 ops are zero extended into 64bit register */
12229 	if (alu32)
12230 		zext_32_to_64(dst_reg);
12231 	reg_bounds_sync(dst_reg);
12232 	return 0;
12233 }
12234 
12235 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
12236  * and var_off.
12237  */
12238 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
12239 				   struct bpf_insn *insn)
12240 {
12241 	struct bpf_verifier_state *vstate = env->cur_state;
12242 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
12243 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
12244 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
12245 	u8 opcode = BPF_OP(insn->code);
12246 	int err;
12247 
12248 	dst_reg = &regs[insn->dst_reg];
12249 	src_reg = NULL;
12250 	if (dst_reg->type != SCALAR_VALUE)
12251 		ptr_reg = dst_reg;
12252 	else
12253 		/* Make sure ID is cleared otherwise dst_reg min/max could be
12254 		 * incorrectly propagated into other registers by find_equal_scalars()
12255 		 */
12256 		dst_reg->id = 0;
12257 	if (BPF_SRC(insn->code) == BPF_X) {
12258 		src_reg = &regs[insn->src_reg];
12259 		if (src_reg->type != SCALAR_VALUE) {
12260 			if (dst_reg->type != SCALAR_VALUE) {
12261 				/* Combining two pointers by any ALU op yields
12262 				 * an arbitrary scalar. Disallow all math except
12263 				 * pointer subtraction
12264 				 */
12265 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
12266 					mark_reg_unknown(env, regs, insn->dst_reg);
12267 					return 0;
12268 				}
12269 				verbose(env, "R%d pointer %s pointer prohibited\n",
12270 					insn->dst_reg,
12271 					bpf_alu_string[opcode >> 4]);
12272 				return -EACCES;
12273 			} else {
12274 				/* scalar += pointer
12275 				 * This is legal, but we have to reverse our
12276 				 * src/dest handling in computing the range
12277 				 */
12278 				err = mark_chain_precision(env, insn->dst_reg);
12279 				if (err)
12280 					return err;
12281 				return adjust_ptr_min_max_vals(env, insn,
12282 							       src_reg, dst_reg);
12283 			}
12284 		} else if (ptr_reg) {
12285 			/* pointer += scalar */
12286 			err = mark_chain_precision(env, insn->src_reg);
12287 			if (err)
12288 				return err;
12289 			return adjust_ptr_min_max_vals(env, insn,
12290 						       dst_reg, src_reg);
12291 		} else if (dst_reg->precise) {
12292 			/* if dst_reg is precise, src_reg should be precise as well */
12293 			err = mark_chain_precision(env, insn->src_reg);
12294 			if (err)
12295 				return err;
12296 		}
12297 	} else {
12298 		/* Pretend the src is a reg with a known value, since we only
12299 		 * need to be able to read from this state.
12300 		 */
12301 		off_reg.type = SCALAR_VALUE;
12302 		__mark_reg_known(&off_reg, insn->imm);
12303 		src_reg = &off_reg;
12304 		if (ptr_reg) /* pointer += K */
12305 			return adjust_ptr_min_max_vals(env, insn,
12306 						       ptr_reg, src_reg);
12307 	}
12308 
12309 	/* Got here implies adding two SCALAR_VALUEs */
12310 	if (WARN_ON_ONCE(ptr_reg)) {
12311 		print_verifier_state(env, state, true);
12312 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
12313 		return -EINVAL;
12314 	}
12315 	if (WARN_ON(!src_reg)) {
12316 		print_verifier_state(env, state, true);
12317 		verbose(env, "verifier internal error: no src_reg\n");
12318 		return -EINVAL;
12319 	}
12320 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
12321 }
12322 
12323 /* check validity of 32-bit and 64-bit arithmetic operations */
12324 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
12325 {
12326 	struct bpf_reg_state *regs = cur_regs(env);
12327 	u8 opcode = BPF_OP(insn->code);
12328 	int err;
12329 
12330 	if (opcode == BPF_END || opcode == BPF_NEG) {
12331 		if (opcode == BPF_NEG) {
12332 			if (BPF_SRC(insn->code) != BPF_K ||
12333 			    insn->src_reg != BPF_REG_0 ||
12334 			    insn->off != 0 || insn->imm != 0) {
12335 				verbose(env, "BPF_NEG uses reserved fields\n");
12336 				return -EINVAL;
12337 			}
12338 		} else {
12339 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
12340 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
12341 			    BPF_CLASS(insn->code) == BPF_ALU64) {
12342 				verbose(env, "BPF_END uses reserved fields\n");
12343 				return -EINVAL;
12344 			}
12345 		}
12346 
12347 		/* check src operand */
12348 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
12349 		if (err)
12350 			return err;
12351 
12352 		if (is_pointer_value(env, insn->dst_reg)) {
12353 			verbose(env, "R%d pointer arithmetic prohibited\n",
12354 				insn->dst_reg);
12355 			return -EACCES;
12356 		}
12357 
12358 		/* check dest operand */
12359 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
12360 		if (err)
12361 			return err;
12362 
12363 	} else if (opcode == BPF_MOV) {
12364 
12365 		if (BPF_SRC(insn->code) == BPF_X) {
12366 			if (insn->imm != 0 || insn->off != 0) {
12367 				verbose(env, "BPF_MOV uses reserved fields\n");
12368 				return -EINVAL;
12369 			}
12370 
12371 			/* check src operand */
12372 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
12373 			if (err)
12374 				return err;
12375 		} else {
12376 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
12377 				verbose(env, "BPF_MOV uses reserved fields\n");
12378 				return -EINVAL;
12379 			}
12380 		}
12381 
12382 		/* check dest operand, mark as required later */
12383 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
12384 		if (err)
12385 			return err;
12386 
12387 		if (BPF_SRC(insn->code) == BPF_X) {
12388 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
12389 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
12390 
12391 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
12392 				/* case: R1 = R2
12393 				 * copy register state to dest reg
12394 				 */
12395 				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
12396 					/* Assign src and dst registers the same ID
12397 					 * that will be used by find_equal_scalars()
12398 					 * to propagate min/max range.
12399 					 */
12400 					src_reg->id = ++env->id_gen;
12401 				copy_register_state(dst_reg, src_reg);
12402 				dst_reg->live |= REG_LIVE_WRITTEN;
12403 				dst_reg->subreg_def = DEF_NOT_SUBREG;
12404 			} else {
12405 				/* R1 = (u32) R2 */
12406 				if (is_pointer_value(env, insn->src_reg)) {
12407 					verbose(env,
12408 						"R%d partial copy of pointer\n",
12409 						insn->src_reg);
12410 					return -EACCES;
12411 				} else if (src_reg->type == SCALAR_VALUE) {
12412 					copy_register_state(dst_reg, src_reg);
12413 					/* Make sure ID is cleared otherwise
12414 					 * dst_reg min/max could be incorrectly
12415 					 * propagated into src_reg by find_equal_scalars()
12416 					 */
12417 					dst_reg->id = 0;
12418 					dst_reg->live |= REG_LIVE_WRITTEN;
12419 					dst_reg->subreg_def = env->insn_idx + 1;
12420 				} else {
12421 					mark_reg_unknown(env, regs,
12422 							 insn->dst_reg);
12423 				}
12424 				zext_32_to_64(dst_reg);
12425 				reg_bounds_sync(dst_reg);
12426 			}
12427 		} else {
12428 			/* case: R = imm
12429 			 * remember the value we stored into this reg
12430 			 */
12431 			/* clear any state __mark_reg_known doesn't set */
12432 			mark_reg_unknown(env, regs, insn->dst_reg);
12433 			regs[insn->dst_reg].type = SCALAR_VALUE;
12434 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
12435 				__mark_reg_known(regs + insn->dst_reg,
12436 						 insn->imm);
12437 			} else {
12438 				__mark_reg_known(regs + insn->dst_reg,
12439 						 (u32)insn->imm);
12440 			}
12441 		}
12442 
12443 	} else if (opcode > BPF_END) {
12444 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
12445 		return -EINVAL;
12446 
12447 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
12448 
12449 		if (BPF_SRC(insn->code) == BPF_X) {
12450 			if (insn->imm != 0 || insn->off != 0) {
12451 				verbose(env, "BPF_ALU uses reserved fields\n");
12452 				return -EINVAL;
12453 			}
12454 			/* check src1 operand */
12455 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
12456 			if (err)
12457 				return err;
12458 		} else {
12459 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
12460 				verbose(env, "BPF_ALU uses reserved fields\n");
12461 				return -EINVAL;
12462 			}
12463 		}
12464 
12465 		/* check src2 operand */
12466 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
12467 		if (err)
12468 			return err;
12469 
12470 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
12471 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
12472 			verbose(env, "div by zero\n");
12473 			return -EINVAL;
12474 		}
12475 
12476 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
12477 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
12478 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
12479 
12480 			if (insn->imm < 0 || insn->imm >= size) {
12481 				verbose(env, "invalid shift %d\n", insn->imm);
12482 				return -EINVAL;
12483 			}
12484 		}
12485 
12486 		/* check dest operand */
12487 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
12488 		if (err)
12489 			return err;
12490 
12491 		return adjust_reg_min_max_vals(env, insn);
12492 	}
12493 
12494 	return 0;
12495 }
12496 
12497 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
12498 				   struct bpf_reg_state *dst_reg,
12499 				   enum bpf_reg_type type,
12500 				   bool range_right_open)
12501 {
12502 	struct bpf_func_state *state;
12503 	struct bpf_reg_state *reg;
12504 	int new_range;
12505 
12506 	if (dst_reg->off < 0 ||
12507 	    (dst_reg->off == 0 && range_right_open))
12508 		/* This doesn't give us any range */
12509 		return;
12510 
12511 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
12512 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
12513 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
12514 		 * than pkt_end, but that's because it's also less than pkt.
12515 		 */
12516 		return;
12517 
12518 	new_range = dst_reg->off;
12519 	if (range_right_open)
12520 		new_range++;
12521 
12522 	/* Examples for register markings:
12523 	 *
12524 	 * pkt_data in dst register:
12525 	 *
12526 	 *   r2 = r3;
12527 	 *   r2 += 8;
12528 	 *   if (r2 > pkt_end) goto <handle exception>
12529 	 *   <access okay>
12530 	 *
12531 	 *   r2 = r3;
12532 	 *   r2 += 8;
12533 	 *   if (r2 < pkt_end) goto <access okay>
12534 	 *   <handle exception>
12535 	 *
12536 	 *   Where:
12537 	 *     r2 == dst_reg, pkt_end == src_reg
12538 	 *     r2=pkt(id=n,off=8,r=0)
12539 	 *     r3=pkt(id=n,off=0,r=0)
12540 	 *
12541 	 * pkt_data in src register:
12542 	 *
12543 	 *   r2 = r3;
12544 	 *   r2 += 8;
12545 	 *   if (pkt_end >= r2) goto <access okay>
12546 	 *   <handle exception>
12547 	 *
12548 	 *   r2 = r3;
12549 	 *   r2 += 8;
12550 	 *   if (pkt_end <= r2) goto <handle exception>
12551 	 *   <access okay>
12552 	 *
12553 	 *   Where:
12554 	 *     pkt_end == dst_reg, r2 == src_reg
12555 	 *     r2=pkt(id=n,off=8,r=0)
12556 	 *     r3=pkt(id=n,off=0,r=0)
12557 	 *
12558 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
12559 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
12560 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
12561 	 * the check.
12562 	 */
12563 
12564 	/* If our ids match, then we must have the same max_value.  And we
12565 	 * don't care about the other reg's fixed offset, since if it's too big
12566 	 * the range won't allow anything.
12567 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
12568 	 */
12569 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12570 		if (reg->type == type && reg->id == dst_reg->id)
12571 			/* keep the maximum range already checked */
12572 			reg->range = max(reg->range, new_range);
12573 	}));
12574 }
12575 
12576 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
12577 {
12578 	struct tnum subreg = tnum_subreg(reg->var_off);
12579 	s32 sval = (s32)val;
12580 
12581 	switch (opcode) {
12582 	case BPF_JEQ:
12583 		if (tnum_is_const(subreg))
12584 			return !!tnum_equals_const(subreg, val);
12585 		else if (val < reg->u32_min_value || val > reg->u32_max_value)
12586 			return 0;
12587 		break;
12588 	case BPF_JNE:
12589 		if (tnum_is_const(subreg))
12590 			return !tnum_equals_const(subreg, val);
12591 		else if (val < reg->u32_min_value || val > reg->u32_max_value)
12592 			return 1;
12593 		break;
12594 	case BPF_JSET:
12595 		if ((~subreg.mask & subreg.value) & val)
12596 			return 1;
12597 		if (!((subreg.mask | subreg.value) & val))
12598 			return 0;
12599 		break;
12600 	case BPF_JGT:
12601 		if (reg->u32_min_value > val)
12602 			return 1;
12603 		else if (reg->u32_max_value <= val)
12604 			return 0;
12605 		break;
12606 	case BPF_JSGT:
12607 		if (reg->s32_min_value > sval)
12608 			return 1;
12609 		else if (reg->s32_max_value <= sval)
12610 			return 0;
12611 		break;
12612 	case BPF_JLT:
12613 		if (reg->u32_max_value < val)
12614 			return 1;
12615 		else if (reg->u32_min_value >= val)
12616 			return 0;
12617 		break;
12618 	case BPF_JSLT:
12619 		if (reg->s32_max_value < sval)
12620 			return 1;
12621 		else if (reg->s32_min_value >= sval)
12622 			return 0;
12623 		break;
12624 	case BPF_JGE:
12625 		if (reg->u32_min_value >= val)
12626 			return 1;
12627 		else if (reg->u32_max_value < val)
12628 			return 0;
12629 		break;
12630 	case BPF_JSGE:
12631 		if (reg->s32_min_value >= sval)
12632 			return 1;
12633 		else if (reg->s32_max_value < sval)
12634 			return 0;
12635 		break;
12636 	case BPF_JLE:
12637 		if (reg->u32_max_value <= val)
12638 			return 1;
12639 		else if (reg->u32_min_value > val)
12640 			return 0;
12641 		break;
12642 	case BPF_JSLE:
12643 		if (reg->s32_max_value <= sval)
12644 			return 1;
12645 		else if (reg->s32_min_value > sval)
12646 			return 0;
12647 		break;
12648 	}
12649 
12650 	return -1;
12651 }
12652 
12653 
12654 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
12655 {
12656 	s64 sval = (s64)val;
12657 
12658 	switch (opcode) {
12659 	case BPF_JEQ:
12660 		if (tnum_is_const(reg->var_off))
12661 			return !!tnum_equals_const(reg->var_off, val);
12662 		else if (val < reg->umin_value || val > reg->umax_value)
12663 			return 0;
12664 		break;
12665 	case BPF_JNE:
12666 		if (tnum_is_const(reg->var_off))
12667 			return !tnum_equals_const(reg->var_off, val);
12668 		else if (val < reg->umin_value || val > reg->umax_value)
12669 			return 1;
12670 		break;
12671 	case BPF_JSET:
12672 		if ((~reg->var_off.mask & reg->var_off.value) & val)
12673 			return 1;
12674 		if (!((reg->var_off.mask | reg->var_off.value) & val))
12675 			return 0;
12676 		break;
12677 	case BPF_JGT:
12678 		if (reg->umin_value > val)
12679 			return 1;
12680 		else if (reg->umax_value <= val)
12681 			return 0;
12682 		break;
12683 	case BPF_JSGT:
12684 		if (reg->smin_value > sval)
12685 			return 1;
12686 		else if (reg->smax_value <= sval)
12687 			return 0;
12688 		break;
12689 	case BPF_JLT:
12690 		if (reg->umax_value < val)
12691 			return 1;
12692 		else if (reg->umin_value >= val)
12693 			return 0;
12694 		break;
12695 	case BPF_JSLT:
12696 		if (reg->smax_value < sval)
12697 			return 1;
12698 		else if (reg->smin_value >= sval)
12699 			return 0;
12700 		break;
12701 	case BPF_JGE:
12702 		if (reg->umin_value >= val)
12703 			return 1;
12704 		else if (reg->umax_value < val)
12705 			return 0;
12706 		break;
12707 	case BPF_JSGE:
12708 		if (reg->smin_value >= sval)
12709 			return 1;
12710 		else if (reg->smax_value < sval)
12711 			return 0;
12712 		break;
12713 	case BPF_JLE:
12714 		if (reg->umax_value <= val)
12715 			return 1;
12716 		else if (reg->umin_value > val)
12717 			return 0;
12718 		break;
12719 	case BPF_JSLE:
12720 		if (reg->smax_value <= sval)
12721 			return 1;
12722 		else if (reg->smin_value > sval)
12723 			return 0;
12724 		break;
12725 	}
12726 
12727 	return -1;
12728 }
12729 
12730 /* compute branch direction of the expression "if (reg opcode val) goto target;"
12731  * and return:
12732  *  1 - branch will be taken and "goto target" will be executed
12733  *  0 - branch will not be taken and fall-through to next insn
12734  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
12735  *      range [0,10]
12736  */
12737 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
12738 			   bool is_jmp32)
12739 {
12740 	if (__is_pointer_value(false, reg)) {
12741 		if (!reg_type_not_null(reg->type))
12742 			return -1;
12743 
12744 		/* If pointer is valid tests against zero will fail so we can
12745 		 * use this to direct branch taken.
12746 		 */
12747 		if (val != 0)
12748 			return -1;
12749 
12750 		switch (opcode) {
12751 		case BPF_JEQ:
12752 			return 0;
12753 		case BPF_JNE:
12754 			return 1;
12755 		default:
12756 			return -1;
12757 		}
12758 	}
12759 
12760 	if (is_jmp32)
12761 		return is_branch32_taken(reg, val, opcode);
12762 	return is_branch64_taken(reg, val, opcode);
12763 }
12764 
12765 static int flip_opcode(u32 opcode)
12766 {
12767 	/* How can we transform "a <op> b" into "b <op> a"? */
12768 	static const u8 opcode_flip[16] = {
12769 		/* these stay the same */
12770 		[BPF_JEQ  >> 4] = BPF_JEQ,
12771 		[BPF_JNE  >> 4] = BPF_JNE,
12772 		[BPF_JSET >> 4] = BPF_JSET,
12773 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
12774 		[BPF_JGE  >> 4] = BPF_JLE,
12775 		[BPF_JGT  >> 4] = BPF_JLT,
12776 		[BPF_JLE  >> 4] = BPF_JGE,
12777 		[BPF_JLT  >> 4] = BPF_JGT,
12778 		[BPF_JSGE >> 4] = BPF_JSLE,
12779 		[BPF_JSGT >> 4] = BPF_JSLT,
12780 		[BPF_JSLE >> 4] = BPF_JSGE,
12781 		[BPF_JSLT >> 4] = BPF_JSGT
12782 	};
12783 	return opcode_flip[opcode >> 4];
12784 }
12785 
12786 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
12787 				   struct bpf_reg_state *src_reg,
12788 				   u8 opcode)
12789 {
12790 	struct bpf_reg_state *pkt;
12791 
12792 	if (src_reg->type == PTR_TO_PACKET_END) {
12793 		pkt = dst_reg;
12794 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
12795 		pkt = src_reg;
12796 		opcode = flip_opcode(opcode);
12797 	} else {
12798 		return -1;
12799 	}
12800 
12801 	if (pkt->range >= 0)
12802 		return -1;
12803 
12804 	switch (opcode) {
12805 	case BPF_JLE:
12806 		/* pkt <= pkt_end */
12807 		fallthrough;
12808 	case BPF_JGT:
12809 		/* pkt > pkt_end */
12810 		if (pkt->range == BEYOND_PKT_END)
12811 			/* pkt has at last one extra byte beyond pkt_end */
12812 			return opcode == BPF_JGT;
12813 		break;
12814 	case BPF_JLT:
12815 		/* pkt < pkt_end */
12816 		fallthrough;
12817 	case BPF_JGE:
12818 		/* pkt >= pkt_end */
12819 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
12820 			return opcode == BPF_JGE;
12821 		break;
12822 	}
12823 	return -1;
12824 }
12825 
12826 /* Adjusts the register min/max values in the case that the dst_reg is the
12827  * variable register that we are working on, and src_reg is a constant or we're
12828  * simply doing a BPF_K check.
12829  * In JEQ/JNE cases we also adjust the var_off values.
12830  */
12831 static void reg_set_min_max(struct bpf_reg_state *true_reg,
12832 			    struct bpf_reg_state *false_reg,
12833 			    u64 val, u32 val32,
12834 			    u8 opcode, bool is_jmp32)
12835 {
12836 	struct tnum false_32off = tnum_subreg(false_reg->var_off);
12837 	struct tnum false_64off = false_reg->var_off;
12838 	struct tnum true_32off = tnum_subreg(true_reg->var_off);
12839 	struct tnum true_64off = true_reg->var_off;
12840 	s64 sval = (s64)val;
12841 	s32 sval32 = (s32)val32;
12842 
12843 	/* If the dst_reg is a pointer, we can't learn anything about its
12844 	 * variable offset from the compare (unless src_reg were a pointer into
12845 	 * the same object, but we don't bother with that.
12846 	 * Since false_reg and true_reg have the same type by construction, we
12847 	 * only need to check one of them for pointerness.
12848 	 */
12849 	if (__is_pointer_value(false, false_reg))
12850 		return;
12851 
12852 	switch (opcode) {
12853 	/* JEQ/JNE comparison doesn't change the register equivalence.
12854 	 *
12855 	 * r1 = r2;
12856 	 * if (r1 == 42) goto label;
12857 	 * ...
12858 	 * label: // here both r1 and r2 are known to be 42.
12859 	 *
12860 	 * Hence when marking register as known preserve it's ID.
12861 	 */
12862 	case BPF_JEQ:
12863 		if (is_jmp32) {
12864 			__mark_reg32_known(true_reg, val32);
12865 			true_32off = tnum_subreg(true_reg->var_off);
12866 		} else {
12867 			___mark_reg_known(true_reg, val);
12868 			true_64off = true_reg->var_off;
12869 		}
12870 		break;
12871 	case BPF_JNE:
12872 		if (is_jmp32) {
12873 			__mark_reg32_known(false_reg, val32);
12874 			false_32off = tnum_subreg(false_reg->var_off);
12875 		} else {
12876 			___mark_reg_known(false_reg, val);
12877 			false_64off = false_reg->var_off;
12878 		}
12879 		break;
12880 	case BPF_JSET:
12881 		if (is_jmp32) {
12882 			false_32off = tnum_and(false_32off, tnum_const(~val32));
12883 			if (is_power_of_2(val32))
12884 				true_32off = tnum_or(true_32off,
12885 						     tnum_const(val32));
12886 		} else {
12887 			false_64off = tnum_and(false_64off, tnum_const(~val));
12888 			if (is_power_of_2(val))
12889 				true_64off = tnum_or(true_64off,
12890 						     tnum_const(val));
12891 		}
12892 		break;
12893 	case BPF_JGE:
12894 	case BPF_JGT:
12895 	{
12896 		if (is_jmp32) {
12897 			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
12898 			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
12899 
12900 			false_reg->u32_max_value = min(false_reg->u32_max_value,
12901 						       false_umax);
12902 			true_reg->u32_min_value = max(true_reg->u32_min_value,
12903 						      true_umin);
12904 		} else {
12905 			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
12906 			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
12907 
12908 			false_reg->umax_value = min(false_reg->umax_value, false_umax);
12909 			true_reg->umin_value = max(true_reg->umin_value, true_umin);
12910 		}
12911 		break;
12912 	}
12913 	case BPF_JSGE:
12914 	case BPF_JSGT:
12915 	{
12916 		if (is_jmp32) {
12917 			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
12918 			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
12919 
12920 			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
12921 			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
12922 		} else {
12923 			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
12924 			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
12925 
12926 			false_reg->smax_value = min(false_reg->smax_value, false_smax);
12927 			true_reg->smin_value = max(true_reg->smin_value, true_smin);
12928 		}
12929 		break;
12930 	}
12931 	case BPF_JLE:
12932 	case BPF_JLT:
12933 	{
12934 		if (is_jmp32) {
12935 			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
12936 			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
12937 
12938 			false_reg->u32_min_value = max(false_reg->u32_min_value,
12939 						       false_umin);
12940 			true_reg->u32_max_value = min(true_reg->u32_max_value,
12941 						      true_umax);
12942 		} else {
12943 			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
12944 			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
12945 
12946 			false_reg->umin_value = max(false_reg->umin_value, false_umin);
12947 			true_reg->umax_value = min(true_reg->umax_value, true_umax);
12948 		}
12949 		break;
12950 	}
12951 	case BPF_JSLE:
12952 	case BPF_JSLT:
12953 	{
12954 		if (is_jmp32) {
12955 			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
12956 			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
12957 
12958 			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
12959 			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
12960 		} else {
12961 			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
12962 			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
12963 
12964 			false_reg->smin_value = max(false_reg->smin_value, false_smin);
12965 			true_reg->smax_value = min(true_reg->smax_value, true_smax);
12966 		}
12967 		break;
12968 	}
12969 	default:
12970 		return;
12971 	}
12972 
12973 	if (is_jmp32) {
12974 		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
12975 					     tnum_subreg(false_32off));
12976 		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
12977 					    tnum_subreg(true_32off));
12978 		__reg_combine_32_into_64(false_reg);
12979 		__reg_combine_32_into_64(true_reg);
12980 	} else {
12981 		false_reg->var_off = false_64off;
12982 		true_reg->var_off = true_64off;
12983 		__reg_combine_64_into_32(false_reg);
12984 		__reg_combine_64_into_32(true_reg);
12985 	}
12986 }
12987 
12988 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
12989  * the variable reg.
12990  */
12991 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
12992 				struct bpf_reg_state *false_reg,
12993 				u64 val, u32 val32,
12994 				u8 opcode, bool is_jmp32)
12995 {
12996 	opcode = flip_opcode(opcode);
12997 	/* This uses zero as "not present in table"; luckily the zero opcode,
12998 	 * BPF_JA, can't get here.
12999 	 */
13000 	if (opcode)
13001 		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
13002 }
13003 
13004 /* Regs are known to be equal, so intersect their min/max/var_off */
13005 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
13006 				  struct bpf_reg_state *dst_reg)
13007 {
13008 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
13009 							dst_reg->umin_value);
13010 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
13011 							dst_reg->umax_value);
13012 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
13013 							dst_reg->smin_value);
13014 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
13015 							dst_reg->smax_value);
13016 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
13017 							     dst_reg->var_off);
13018 	reg_bounds_sync(src_reg);
13019 	reg_bounds_sync(dst_reg);
13020 }
13021 
13022 static void reg_combine_min_max(struct bpf_reg_state *true_src,
13023 				struct bpf_reg_state *true_dst,
13024 				struct bpf_reg_state *false_src,
13025 				struct bpf_reg_state *false_dst,
13026 				u8 opcode)
13027 {
13028 	switch (opcode) {
13029 	case BPF_JEQ:
13030 		__reg_combine_min_max(true_src, true_dst);
13031 		break;
13032 	case BPF_JNE:
13033 		__reg_combine_min_max(false_src, false_dst);
13034 		break;
13035 	}
13036 }
13037 
13038 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
13039 				 struct bpf_reg_state *reg, u32 id,
13040 				 bool is_null)
13041 {
13042 	if (type_may_be_null(reg->type) && reg->id == id &&
13043 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
13044 		/* Old offset (both fixed and variable parts) should have been
13045 		 * known-zero, because we don't allow pointer arithmetic on
13046 		 * pointers that might be NULL. If we see this happening, don't
13047 		 * convert the register.
13048 		 *
13049 		 * But in some cases, some helpers that return local kptrs
13050 		 * advance offset for the returned pointer. In those cases, it
13051 		 * is fine to expect to see reg->off.
13052 		 */
13053 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
13054 			return;
13055 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
13056 		    WARN_ON_ONCE(reg->off))
13057 			return;
13058 
13059 		if (is_null) {
13060 			reg->type = SCALAR_VALUE;
13061 			/* We don't need id and ref_obj_id from this point
13062 			 * onwards anymore, thus we should better reset it,
13063 			 * so that state pruning has chances to take effect.
13064 			 */
13065 			reg->id = 0;
13066 			reg->ref_obj_id = 0;
13067 
13068 			return;
13069 		}
13070 
13071 		mark_ptr_not_null_reg(reg);
13072 
13073 		if (!reg_may_point_to_spin_lock(reg)) {
13074 			/* For not-NULL ptr, reg->ref_obj_id will be reset
13075 			 * in release_reference().
13076 			 *
13077 			 * reg->id is still used by spin_lock ptr. Other
13078 			 * than spin_lock ptr type, reg->id can be reset.
13079 			 */
13080 			reg->id = 0;
13081 		}
13082 	}
13083 }
13084 
13085 /* The logic is similar to find_good_pkt_pointers(), both could eventually
13086  * be folded together at some point.
13087  */
13088 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
13089 				  bool is_null)
13090 {
13091 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13092 	struct bpf_reg_state *regs = state->regs, *reg;
13093 	u32 ref_obj_id = regs[regno].ref_obj_id;
13094 	u32 id = regs[regno].id;
13095 
13096 	if (ref_obj_id && ref_obj_id == id && is_null)
13097 		/* regs[regno] is in the " == NULL" branch.
13098 		 * No one could have freed the reference state before
13099 		 * doing the NULL check.
13100 		 */
13101 		WARN_ON_ONCE(release_reference_state(state, id));
13102 
13103 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
13104 		mark_ptr_or_null_reg(state, reg, id, is_null);
13105 	}));
13106 }
13107 
13108 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
13109 				   struct bpf_reg_state *dst_reg,
13110 				   struct bpf_reg_state *src_reg,
13111 				   struct bpf_verifier_state *this_branch,
13112 				   struct bpf_verifier_state *other_branch)
13113 {
13114 	if (BPF_SRC(insn->code) != BPF_X)
13115 		return false;
13116 
13117 	/* Pointers are always 64-bit. */
13118 	if (BPF_CLASS(insn->code) == BPF_JMP32)
13119 		return false;
13120 
13121 	switch (BPF_OP(insn->code)) {
13122 	case BPF_JGT:
13123 		if ((dst_reg->type == PTR_TO_PACKET &&
13124 		     src_reg->type == PTR_TO_PACKET_END) ||
13125 		    (dst_reg->type == PTR_TO_PACKET_META &&
13126 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13127 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
13128 			find_good_pkt_pointers(this_branch, dst_reg,
13129 					       dst_reg->type, false);
13130 			mark_pkt_end(other_branch, insn->dst_reg, true);
13131 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13132 			    src_reg->type == PTR_TO_PACKET) ||
13133 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13134 			    src_reg->type == PTR_TO_PACKET_META)) {
13135 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
13136 			find_good_pkt_pointers(other_branch, src_reg,
13137 					       src_reg->type, true);
13138 			mark_pkt_end(this_branch, insn->src_reg, false);
13139 		} else {
13140 			return false;
13141 		}
13142 		break;
13143 	case BPF_JLT:
13144 		if ((dst_reg->type == PTR_TO_PACKET &&
13145 		     src_reg->type == PTR_TO_PACKET_END) ||
13146 		    (dst_reg->type == PTR_TO_PACKET_META &&
13147 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13148 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
13149 			find_good_pkt_pointers(other_branch, dst_reg,
13150 					       dst_reg->type, true);
13151 			mark_pkt_end(this_branch, insn->dst_reg, false);
13152 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13153 			    src_reg->type == PTR_TO_PACKET) ||
13154 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13155 			    src_reg->type == PTR_TO_PACKET_META)) {
13156 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
13157 			find_good_pkt_pointers(this_branch, src_reg,
13158 					       src_reg->type, false);
13159 			mark_pkt_end(other_branch, insn->src_reg, true);
13160 		} else {
13161 			return false;
13162 		}
13163 		break;
13164 	case BPF_JGE:
13165 		if ((dst_reg->type == PTR_TO_PACKET &&
13166 		     src_reg->type == PTR_TO_PACKET_END) ||
13167 		    (dst_reg->type == PTR_TO_PACKET_META &&
13168 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13169 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
13170 			find_good_pkt_pointers(this_branch, dst_reg,
13171 					       dst_reg->type, true);
13172 			mark_pkt_end(other_branch, insn->dst_reg, false);
13173 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13174 			    src_reg->type == PTR_TO_PACKET) ||
13175 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13176 			    src_reg->type == PTR_TO_PACKET_META)) {
13177 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
13178 			find_good_pkt_pointers(other_branch, src_reg,
13179 					       src_reg->type, false);
13180 			mark_pkt_end(this_branch, insn->src_reg, true);
13181 		} else {
13182 			return false;
13183 		}
13184 		break;
13185 	case BPF_JLE:
13186 		if ((dst_reg->type == PTR_TO_PACKET &&
13187 		     src_reg->type == PTR_TO_PACKET_END) ||
13188 		    (dst_reg->type == PTR_TO_PACKET_META &&
13189 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13190 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
13191 			find_good_pkt_pointers(other_branch, dst_reg,
13192 					       dst_reg->type, false);
13193 			mark_pkt_end(this_branch, insn->dst_reg, true);
13194 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13195 			    src_reg->type == PTR_TO_PACKET) ||
13196 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13197 			    src_reg->type == PTR_TO_PACKET_META)) {
13198 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
13199 			find_good_pkt_pointers(this_branch, src_reg,
13200 					       src_reg->type, true);
13201 			mark_pkt_end(other_branch, insn->src_reg, false);
13202 		} else {
13203 			return false;
13204 		}
13205 		break;
13206 	default:
13207 		return false;
13208 	}
13209 
13210 	return true;
13211 }
13212 
13213 static void find_equal_scalars(struct bpf_verifier_state *vstate,
13214 			       struct bpf_reg_state *known_reg)
13215 {
13216 	struct bpf_func_state *state;
13217 	struct bpf_reg_state *reg;
13218 
13219 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
13220 		if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
13221 			copy_register_state(reg, known_reg);
13222 	}));
13223 }
13224 
13225 static int check_cond_jmp_op(struct bpf_verifier_env *env,
13226 			     struct bpf_insn *insn, int *insn_idx)
13227 {
13228 	struct bpf_verifier_state *this_branch = env->cur_state;
13229 	struct bpf_verifier_state *other_branch;
13230 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
13231 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
13232 	struct bpf_reg_state *eq_branch_regs;
13233 	u8 opcode = BPF_OP(insn->code);
13234 	bool is_jmp32;
13235 	int pred = -1;
13236 	int err;
13237 
13238 	/* Only conditional jumps are expected to reach here. */
13239 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
13240 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
13241 		return -EINVAL;
13242 	}
13243 
13244 	if (BPF_SRC(insn->code) == BPF_X) {
13245 		if (insn->imm != 0) {
13246 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
13247 			return -EINVAL;
13248 		}
13249 
13250 		/* check src1 operand */
13251 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
13252 		if (err)
13253 			return err;
13254 
13255 		if (is_pointer_value(env, insn->src_reg)) {
13256 			verbose(env, "R%d pointer comparison prohibited\n",
13257 				insn->src_reg);
13258 			return -EACCES;
13259 		}
13260 		src_reg = &regs[insn->src_reg];
13261 	} else {
13262 		if (insn->src_reg != BPF_REG_0) {
13263 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
13264 			return -EINVAL;
13265 		}
13266 	}
13267 
13268 	/* check src2 operand */
13269 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13270 	if (err)
13271 		return err;
13272 
13273 	dst_reg = &regs[insn->dst_reg];
13274 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
13275 
13276 	if (BPF_SRC(insn->code) == BPF_K) {
13277 		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
13278 	} else if (src_reg->type == SCALAR_VALUE &&
13279 		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
13280 		pred = is_branch_taken(dst_reg,
13281 				       tnum_subreg(src_reg->var_off).value,
13282 				       opcode,
13283 				       is_jmp32);
13284 	} else if (src_reg->type == SCALAR_VALUE &&
13285 		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
13286 		pred = is_branch_taken(dst_reg,
13287 				       src_reg->var_off.value,
13288 				       opcode,
13289 				       is_jmp32);
13290 	} else if (dst_reg->type == SCALAR_VALUE &&
13291 		   is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
13292 		pred = is_branch_taken(src_reg,
13293 				       tnum_subreg(dst_reg->var_off).value,
13294 				       flip_opcode(opcode),
13295 				       is_jmp32);
13296 	} else if (dst_reg->type == SCALAR_VALUE &&
13297 		   !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
13298 		pred = is_branch_taken(src_reg,
13299 				       dst_reg->var_off.value,
13300 				       flip_opcode(opcode),
13301 				       is_jmp32);
13302 	} else if (reg_is_pkt_pointer_any(dst_reg) &&
13303 		   reg_is_pkt_pointer_any(src_reg) &&
13304 		   !is_jmp32) {
13305 		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
13306 	}
13307 
13308 	if (pred >= 0) {
13309 		/* If we get here with a dst_reg pointer type it is because
13310 		 * above is_branch_taken() special cased the 0 comparison.
13311 		 */
13312 		if (!__is_pointer_value(false, dst_reg))
13313 			err = mark_chain_precision(env, insn->dst_reg);
13314 		if (BPF_SRC(insn->code) == BPF_X && !err &&
13315 		    !__is_pointer_value(false, src_reg))
13316 			err = mark_chain_precision(env, insn->src_reg);
13317 		if (err)
13318 			return err;
13319 	}
13320 
13321 	if (pred == 1) {
13322 		/* Only follow the goto, ignore fall-through. If needed, push
13323 		 * the fall-through branch for simulation under speculative
13324 		 * execution.
13325 		 */
13326 		if (!env->bypass_spec_v1 &&
13327 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
13328 					       *insn_idx))
13329 			return -EFAULT;
13330 		*insn_idx += insn->off;
13331 		return 0;
13332 	} else if (pred == 0) {
13333 		/* Only follow the fall-through branch, since that's where the
13334 		 * program will go. If needed, push the goto branch for
13335 		 * simulation under speculative execution.
13336 		 */
13337 		if (!env->bypass_spec_v1 &&
13338 		    !sanitize_speculative_path(env, insn,
13339 					       *insn_idx + insn->off + 1,
13340 					       *insn_idx))
13341 			return -EFAULT;
13342 		return 0;
13343 	}
13344 
13345 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
13346 				  false);
13347 	if (!other_branch)
13348 		return -EFAULT;
13349 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
13350 
13351 	/* detect if we are comparing against a constant value so we can adjust
13352 	 * our min/max values for our dst register.
13353 	 * this is only legit if both are scalars (or pointers to the same
13354 	 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
13355 	 * because otherwise the different base pointers mean the offsets aren't
13356 	 * comparable.
13357 	 */
13358 	if (BPF_SRC(insn->code) == BPF_X) {
13359 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
13360 
13361 		if (dst_reg->type == SCALAR_VALUE &&
13362 		    src_reg->type == SCALAR_VALUE) {
13363 			if (tnum_is_const(src_reg->var_off) ||
13364 			    (is_jmp32 &&
13365 			     tnum_is_const(tnum_subreg(src_reg->var_off))))
13366 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
13367 						dst_reg,
13368 						src_reg->var_off.value,
13369 						tnum_subreg(src_reg->var_off).value,
13370 						opcode, is_jmp32);
13371 			else if (tnum_is_const(dst_reg->var_off) ||
13372 				 (is_jmp32 &&
13373 				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
13374 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
13375 						    src_reg,
13376 						    dst_reg->var_off.value,
13377 						    tnum_subreg(dst_reg->var_off).value,
13378 						    opcode, is_jmp32);
13379 			else if (!is_jmp32 &&
13380 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
13381 				/* Comparing for equality, we can combine knowledge */
13382 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
13383 						    &other_branch_regs[insn->dst_reg],
13384 						    src_reg, dst_reg, opcode);
13385 			if (src_reg->id &&
13386 			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
13387 				find_equal_scalars(this_branch, src_reg);
13388 				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
13389 			}
13390 
13391 		}
13392 	} else if (dst_reg->type == SCALAR_VALUE) {
13393 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
13394 					dst_reg, insn->imm, (u32)insn->imm,
13395 					opcode, is_jmp32);
13396 	}
13397 
13398 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
13399 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
13400 		find_equal_scalars(this_branch, dst_reg);
13401 		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
13402 	}
13403 
13404 	/* if one pointer register is compared to another pointer
13405 	 * register check if PTR_MAYBE_NULL could be lifted.
13406 	 * E.g. register A - maybe null
13407 	 *      register B - not null
13408 	 * for JNE A, B, ... - A is not null in the false branch;
13409 	 * for JEQ A, B, ... - A is not null in the true branch.
13410 	 *
13411 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
13412 	 * not need to be null checked by the BPF program, i.e.,
13413 	 * could be null even without PTR_MAYBE_NULL marking, so
13414 	 * only propagate nullness when neither reg is that type.
13415 	 */
13416 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
13417 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
13418 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
13419 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
13420 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
13421 		eq_branch_regs = NULL;
13422 		switch (opcode) {
13423 		case BPF_JEQ:
13424 			eq_branch_regs = other_branch_regs;
13425 			break;
13426 		case BPF_JNE:
13427 			eq_branch_regs = regs;
13428 			break;
13429 		default:
13430 			/* do nothing */
13431 			break;
13432 		}
13433 		if (eq_branch_regs) {
13434 			if (type_may_be_null(src_reg->type))
13435 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
13436 			else
13437 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
13438 		}
13439 	}
13440 
13441 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
13442 	 * NOTE: these optimizations below are related with pointer comparison
13443 	 *       which will never be JMP32.
13444 	 */
13445 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
13446 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
13447 	    type_may_be_null(dst_reg->type)) {
13448 		/* Mark all identical registers in each branch as either
13449 		 * safe or unknown depending R == 0 or R != 0 conditional.
13450 		 */
13451 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
13452 				      opcode == BPF_JNE);
13453 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
13454 				      opcode == BPF_JEQ);
13455 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
13456 					   this_branch, other_branch) &&
13457 		   is_pointer_value(env, insn->dst_reg)) {
13458 		verbose(env, "R%d pointer comparison prohibited\n",
13459 			insn->dst_reg);
13460 		return -EACCES;
13461 	}
13462 	if (env->log.level & BPF_LOG_LEVEL)
13463 		print_insn_state(env, this_branch->frame[this_branch->curframe]);
13464 	return 0;
13465 }
13466 
13467 /* verify BPF_LD_IMM64 instruction */
13468 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
13469 {
13470 	struct bpf_insn_aux_data *aux = cur_aux(env);
13471 	struct bpf_reg_state *regs = cur_regs(env);
13472 	struct bpf_reg_state *dst_reg;
13473 	struct bpf_map *map;
13474 	int err;
13475 
13476 	if (BPF_SIZE(insn->code) != BPF_DW) {
13477 		verbose(env, "invalid BPF_LD_IMM insn\n");
13478 		return -EINVAL;
13479 	}
13480 	if (insn->off != 0) {
13481 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
13482 		return -EINVAL;
13483 	}
13484 
13485 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
13486 	if (err)
13487 		return err;
13488 
13489 	dst_reg = &regs[insn->dst_reg];
13490 	if (insn->src_reg == 0) {
13491 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
13492 
13493 		dst_reg->type = SCALAR_VALUE;
13494 		__mark_reg_known(&regs[insn->dst_reg], imm);
13495 		return 0;
13496 	}
13497 
13498 	/* All special src_reg cases are listed below. From this point onwards
13499 	 * we either succeed and assign a corresponding dst_reg->type after
13500 	 * zeroing the offset, or fail and reject the program.
13501 	 */
13502 	mark_reg_known_zero(env, regs, insn->dst_reg);
13503 
13504 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
13505 		dst_reg->type = aux->btf_var.reg_type;
13506 		switch (base_type(dst_reg->type)) {
13507 		case PTR_TO_MEM:
13508 			dst_reg->mem_size = aux->btf_var.mem_size;
13509 			break;
13510 		case PTR_TO_BTF_ID:
13511 			dst_reg->btf = aux->btf_var.btf;
13512 			dst_reg->btf_id = aux->btf_var.btf_id;
13513 			break;
13514 		default:
13515 			verbose(env, "bpf verifier is misconfigured\n");
13516 			return -EFAULT;
13517 		}
13518 		return 0;
13519 	}
13520 
13521 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
13522 		struct bpf_prog_aux *aux = env->prog->aux;
13523 		u32 subprogno = find_subprog(env,
13524 					     env->insn_idx + insn->imm + 1);
13525 
13526 		if (!aux->func_info) {
13527 			verbose(env, "missing btf func_info\n");
13528 			return -EINVAL;
13529 		}
13530 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
13531 			verbose(env, "callback function not static\n");
13532 			return -EINVAL;
13533 		}
13534 
13535 		dst_reg->type = PTR_TO_FUNC;
13536 		dst_reg->subprogno = subprogno;
13537 		return 0;
13538 	}
13539 
13540 	map = env->used_maps[aux->map_index];
13541 	dst_reg->map_ptr = map;
13542 
13543 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
13544 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
13545 		dst_reg->type = PTR_TO_MAP_VALUE;
13546 		dst_reg->off = aux->map_off;
13547 		WARN_ON_ONCE(map->max_entries != 1);
13548 		/* We want reg->id to be same (0) as map_value is not distinct */
13549 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
13550 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
13551 		dst_reg->type = CONST_PTR_TO_MAP;
13552 	} else {
13553 		verbose(env, "bpf verifier is misconfigured\n");
13554 		return -EINVAL;
13555 	}
13556 
13557 	return 0;
13558 }
13559 
13560 static bool may_access_skb(enum bpf_prog_type type)
13561 {
13562 	switch (type) {
13563 	case BPF_PROG_TYPE_SOCKET_FILTER:
13564 	case BPF_PROG_TYPE_SCHED_CLS:
13565 	case BPF_PROG_TYPE_SCHED_ACT:
13566 		return true;
13567 	default:
13568 		return false;
13569 	}
13570 }
13571 
13572 /* verify safety of LD_ABS|LD_IND instructions:
13573  * - they can only appear in the programs where ctx == skb
13574  * - since they are wrappers of function calls, they scratch R1-R5 registers,
13575  *   preserve R6-R9, and store return value into R0
13576  *
13577  * Implicit input:
13578  *   ctx == skb == R6 == CTX
13579  *
13580  * Explicit input:
13581  *   SRC == any register
13582  *   IMM == 32-bit immediate
13583  *
13584  * Output:
13585  *   R0 - 8/16/32-bit skb data converted to cpu endianness
13586  */
13587 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
13588 {
13589 	struct bpf_reg_state *regs = cur_regs(env);
13590 	static const int ctx_reg = BPF_REG_6;
13591 	u8 mode = BPF_MODE(insn->code);
13592 	int i, err;
13593 
13594 	if (!may_access_skb(resolve_prog_type(env->prog))) {
13595 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
13596 		return -EINVAL;
13597 	}
13598 
13599 	if (!env->ops->gen_ld_abs) {
13600 		verbose(env, "bpf verifier is misconfigured\n");
13601 		return -EINVAL;
13602 	}
13603 
13604 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
13605 	    BPF_SIZE(insn->code) == BPF_DW ||
13606 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
13607 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
13608 		return -EINVAL;
13609 	}
13610 
13611 	/* check whether implicit source operand (register R6) is readable */
13612 	err = check_reg_arg(env, ctx_reg, SRC_OP);
13613 	if (err)
13614 		return err;
13615 
13616 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
13617 	 * gen_ld_abs() may terminate the program at runtime, leading to
13618 	 * reference leak.
13619 	 */
13620 	err = check_reference_leak(env);
13621 	if (err) {
13622 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
13623 		return err;
13624 	}
13625 
13626 	if (env->cur_state->active_lock.ptr) {
13627 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
13628 		return -EINVAL;
13629 	}
13630 
13631 	if (env->cur_state->active_rcu_lock) {
13632 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
13633 		return -EINVAL;
13634 	}
13635 
13636 	if (regs[ctx_reg].type != PTR_TO_CTX) {
13637 		verbose(env,
13638 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
13639 		return -EINVAL;
13640 	}
13641 
13642 	if (mode == BPF_IND) {
13643 		/* check explicit source operand */
13644 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
13645 		if (err)
13646 			return err;
13647 	}
13648 
13649 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
13650 	if (err < 0)
13651 		return err;
13652 
13653 	/* reset caller saved regs to unreadable */
13654 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13655 		mark_reg_not_init(env, regs, caller_saved[i]);
13656 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
13657 	}
13658 
13659 	/* mark destination R0 register as readable, since it contains
13660 	 * the value fetched from the packet.
13661 	 * Already marked as written above.
13662 	 */
13663 	mark_reg_unknown(env, regs, BPF_REG_0);
13664 	/* ld_abs load up to 32-bit skb data. */
13665 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
13666 	return 0;
13667 }
13668 
13669 static int check_return_code(struct bpf_verifier_env *env)
13670 {
13671 	struct tnum enforce_attach_type_range = tnum_unknown;
13672 	const struct bpf_prog *prog = env->prog;
13673 	struct bpf_reg_state *reg;
13674 	struct tnum range = tnum_range(0, 1);
13675 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
13676 	int err;
13677 	struct bpf_func_state *frame = env->cur_state->frame[0];
13678 	const bool is_subprog = frame->subprogno;
13679 
13680 	/* LSM and struct_ops func-ptr's return type could be "void" */
13681 	if (!is_subprog) {
13682 		switch (prog_type) {
13683 		case BPF_PROG_TYPE_LSM:
13684 			if (prog->expected_attach_type == BPF_LSM_CGROUP)
13685 				/* See below, can be 0 or 0-1 depending on hook. */
13686 				break;
13687 			fallthrough;
13688 		case BPF_PROG_TYPE_STRUCT_OPS:
13689 			if (!prog->aux->attach_func_proto->type)
13690 				return 0;
13691 			break;
13692 		default:
13693 			break;
13694 		}
13695 	}
13696 
13697 	/* eBPF calling convention is such that R0 is used
13698 	 * to return the value from eBPF program.
13699 	 * Make sure that it's readable at this time
13700 	 * of bpf_exit, which means that program wrote
13701 	 * something into it earlier
13702 	 */
13703 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
13704 	if (err)
13705 		return err;
13706 
13707 	if (is_pointer_value(env, BPF_REG_0)) {
13708 		verbose(env, "R0 leaks addr as return value\n");
13709 		return -EACCES;
13710 	}
13711 
13712 	reg = cur_regs(env) + BPF_REG_0;
13713 
13714 	if (frame->in_async_callback_fn) {
13715 		/* enforce return zero from async callbacks like timer */
13716 		if (reg->type != SCALAR_VALUE) {
13717 			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
13718 				reg_type_str(env, reg->type));
13719 			return -EINVAL;
13720 		}
13721 
13722 		if (!tnum_in(tnum_const(0), reg->var_off)) {
13723 			verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
13724 			return -EINVAL;
13725 		}
13726 		return 0;
13727 	}
13728 
13729 	if (is_subprog) {
13730 		if (reg->type != SCALAR_VALUE) {
13731 			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
13732 				reg_type_str(env, reg->type));
13733 			return -EINVAL;
13734 		}
13735 		return 0;
13736 	}
13737 
13738 	switch (prog_type) {
13739 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
13740 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
13741 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
13742 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
13743 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
13744 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
13745 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
13746 			range = tnum_range(1, 1);
13747 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
13748 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
13749 			range = tnum_range(0, 3);
13750 		break;
13751 	case BPF_PROG_TYPE_CGROUP_SKB:
13752 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
13753 			range = tnum_range(0, 3);
13754 			enforce_attach_type_range = tnum_range(2, 3);
13755 		}
13756 		break;
13757 	case BPF_PROG_TYPE_CGROUP_SOCK:
13758 	case BPF_PROG_TYPE_SOCK_OPS:
13759 	case BPF_PROG_TYPE_CGROUP_DEVICE:
13760 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
13761 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
13762 		break;
13763 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
13764 		if (!env->prog->aux->attach_btf_id)
13765 			return 0;
13766 		range = tnum_const(0);
13767 		break;
13768 	case BPF_PROG_TYPE_TRACING:
13769 		switch (env->prog->expected_attach_type) {
13770 		case BPF_TRACE_FENTRY:
13771 		case BPF_TRACE_FEXIT:
13772 			range = tnum_const(0);
13773 			break;
13774 		case BPF_TRACE_RAW_TP:
13775 		case BPF_MODIFY_RETURN:
13776 			return 0;
13777 		case BPF_TRACE_ITER:
13778 			break;
13779 		default:
13780 			return -ENOTSUPP;
13781 		}
13782 		break;
13783 	case BPF_PROG_TYPE_SK_LOOKUP:
13784 		range = tnum_range(SK_DROP, SK_PASS);
13785 		break;
13786 
13787 	case BPF_PROG_TYPE_LSM:
13788 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
13789 			/* Regular BPF_PROG_TYPE_LSM programs can return
13790 			 * any value.
13791 			 */
13792 			return 0;
13793 		}
13794 		if (!env->prog->aux->attach_func_proto->type) {
13795 			/* Make sure programs that attach to void
13796 			 * hooks don't try to modify return value.
13797 			 */
13798 			range = tnum_range(1, 1);
13799 		}
13800 		break;
13801 
13802 	case BPF_PROG_TYPE_EXT:
13803 		/* freplace program can return anything as its return value
13804 		 * depends on the to-be-replaced kernel func or bpf program.
13805 		 */
13806 	default:
13807 		return 0;
13808 	}
13809 
13810 	if (reg->type != SCALAR_VALUE) {
13811 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
13812 			reg_type_str(env, reg->type));
13813 		return -EINVAL;
13814 	}
13815 
13816 	if (!tnum_in(range, reg->var_off)) {
13817 		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
13818 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
13819 		    prog_type == BPF_PROG_TYPE_LSM &&
13820 		    !prog->aux->attach_func_proto->type)
13821 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
13822 		return -EINVAL;
13823 	}
13824 
13825 	if (!tnum_is_unknown(enforce_attach_type_range) &&
13826 	    tnum_in(enforce_attach_type_range, reg->var_off))
13827 		env->prog->enforce_expected_attach_type = 1;
13828 	return 0;
13829 }
13830 
13831 /* non-recursive DFS pseudo code
13832  * 1  procedure DFS-iterative(G,v):
13833  * 2      label v as discovered
13834  * 3      let S be a stack
13835  * 4      S.push(v)
13836  * 5      while S is not empty
13837  * 6            t <- S.peek()
13838  * 7            if t is what we're looking for:
13839  * 8                return t
13840  * 9            for all edges e in G.adjacentEdges(t) do
13841  * 10               if edge e is already labelled
13842  * 11                   continue with the next edge
13843  * 12               w <- G.adjacentVertex(t,e)
13844  * 13               if vertex w is not discovered and not explored
13845  * 14                   label e as tree-edge
13846  * 15                   label w as discovered
13847  * 16                   S.push(w)
13848  * 17                   continue at 5
13849  * 18               else if vertex w is discovered
13850  * 19                   label e as back-edge
13851  * 20               else
13852  * 21                   // vertex w is explored
13853  * 22                   label e as forward- or cross-edge
13854  * 23           label t as explored
13855  * 24           S.pop()
13856  *
13857  * convention:
13858  * 0x10 - discovered
13859  * 0x11 - discovered and fall-through edge labelled
13860  * 0x12 - discovered and fall-through and branch edges labelled
13861  * 0x20 - explored
13862  */
13863 
13864 enum {
13865 	DISCOVERED = 0x10,
13866 	EXPLORED = 0x20,
13867 	FALLTHROUGH = 1,
13868 	BRANCH = 2,
13869 };
13870 
13871 static u32 state_htab_size(struct bpf_verifier_env *env)
13872 {
13873 	return env->prog->len;
13874 }
13875 
13876 static struct bpf_verifier_state_list **explored_state(
13877 					struct bpf_verifier_env *env,
13878 					int idx)
13879 {
13880 	struct bpf_verifier_state *cur = env->cur_state;
13881 	struct bpf_func_state *state = cur->frame[cur->curframe];
13882 
13883 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
13884 }
13885 
13886 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
13887 {
13888 	env->insn_aux_data[idx].prune_point = true;
13889 }
13890 
13891 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
13892 {
13893 	return env->insn_aux_data[insn_idx].prune_point;
13894 }
13895 
13896 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
13897 {
13898 	env->insn_aux_data[idx].force_checkpoint = true;
13899 }
13900 
13901 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
13902 {
13903 	return env->insn_aux_data[insn_idx].force_checkpoint;
13904 }
13905 
13906 
13907 enum {
13908 	DONE_EXPLORING = 0,
13909 	KEEP_EXPLORING = 1,
13910 };
13911 
13912 /* t, w, e - match pseudo-code above:
13913  * t - index of current instruction
13914  * w - next instruction
13915  * e - edge
13916  */
13917 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
13918 		     bool loop_ok)
13919 {
13920 	int *insn_stack = env->cfg.insn_stack;
13921 	int *insn_state = env->cfg.insn_state;
13922 
13923 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
13924 		return DONE_EXPLORING;
13925 
13926 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
13927 		return DONE_EXPLORING;
13928 
13929 	if (w < 0 || w >= env->prog->len) {
13930 		verbose_linfo(env, t, "%d: ", t);
13931 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
13932 		return -EINVAL;
13933 	}
13934 
13935 	if (e == BRANCH) {
13936 		/* mark branch target for state pruning */
13937 		mark_prune_point(env, w);
13938 		mark_jmp_point(env, w);
13939 	}
13940 
13941 	if (insn_state[w] == 0) {
13942 		/* tree-edge */
13943 		insn_state[t] = DISCOVERED | e;
13944 		insn_state[w] = DISCOVERED;
13945 		if (env->cfg.cur_stack >= env->prog->len)
13946 			return -E2BIG;
13947 		insn_stack[env->cfg.cur_stack++] = w;
13948 		return KEEP_EXPLORING;
13949 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
13950 		if (loop_ok && env->bpf_capable)
13951 			return DONE_EXPLORING;
13952 		verbose_linfo(env, t, "%d: ", t);
13953 		verbose_linfo(env, w, "%d: ", w);
13954 		verbose(env, "back-edge from insn %d to %d\n", t, w);
13955 		return -EINVAL;
13956 	} else if (insn_state[w] == EXPLORED) {
13957 		/* forward- or cross-edge */
13958 		insn_state[t] = DISCOVERED | e;
13959 	} else {
13960 		verbose(env, "insn state internal bug\n");
13961 		return -EFAULT;
13962 	}
13963 	return DONE_EXPLORING;
13964 }
13965 
13966 static int visit_func_call_insn(int t, struct bpf_insn *insns,
13967 				struct bpf_verifier_env *env,
13968 				bool visit_callee)
13969 {
13970 	int ret;
13971 
13972 	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
13973 	if (ret)
13974 		return ret;
13975 
13976 	mark_prune_point(env, t + 1);
13977 	/* when we exit from subprog, we need to record non-linear history */
13978 	mark_jmp_point(env, t + 1);
13979 
13980 	if (visit_callee) {
13981 		mark_prune_point(env, t);
13982 		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
13983 				/* It's ok to allow recursion from CFG point of
13984 				 * view. __check_func_call() will do the actual
13985 				 * check.
13986 				 */
13987 				bpf_pseudo_func(insns + t));
13988 	}
13989 	return ret;
13990 }
13991 
13992 /* Visits the instruction at index t and returns one of the following:
13993  *  < 0 - an error occurred
13994  *  DONE_EXPLORING - the instruction was fully explored
13995  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
13996  */
13997 static int visit_insn(int t, struct bpf_verifier_env *env)
13998 {
13999 	struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
14000 	int ret;
14001 
14002 	if (bpf_pseudo_func(insn))
14003 		return visit_func_call_insn(t, insns, env, true);
14004 
14005 	/* All non-branch instructions have a single fall-through edge. */
14006 	if (BPF_CLASS(insn->code) != BPF_JMP &&
14007 	    BPF_CLASS(insn->code) != BPF_JMP32)
14008 		return push_insn(t, t + 1, FALLTHROUGH, env, false);
14009 
14010 	switch (BPF_OP(insn->code)) {
14011 	case BPF_EXIT:
14012 		return DONE_EXPLORING;
14013 
14014 	case BPF_CALL:
14015 		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
14016 			/* Mark this call insn as a prune point to trigger
14017 			 * is_state_visited() check before call itself is
14018 			 * processed by __check_func_call(). Otherwise new
14019 			 * async state will be pushed for further exploration.
14020 			 */
14021 			mark_prune_point(env, t);
14022 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
14023 			struct bpf_kfunc_call_arg_meta meta;
14024 
14025 			ret = fetch_kfunc_meta(env, insn, &meta, NULL);
14026 			if (ret == 0 && is_iter_next_kfunc(&meta)) {
14027 				mark_prune_point(env, t);
14028 				/* Checking and saving state checkpoints at iter_next() call
14029 				 * is crucial for fast convergence of open-coded iterator loop
14030 				 * logic, so we need to force it. If we don't do that,
14031 				 * is_state_visited() might skip saving a checkpoint, causing
14032 				 * unnecessarily long sequence of not checkpointed
14033 				 * instructions and jumps, leading to exhaustion of jump
14034 				 * history buffer, and potentially other undesired outcomes.
14035 				 * It is expected that with correct open-coded iterators
14036 				 * convergence will happen quickly, so we don't run a risk of
14037 				 * exhausting memory.
14038 				 */
14039 				mark_force_checkpoint(env, t);
14040 			}
14041 		}
14042 		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
14043 
14044 	case BPF_JA:
14045 		if (BPF_SRC(insn->code) != BPF_K)
14046 			return -EINVAL;
14047 
14048 		/* unconditional jump with single edge */
14049 		ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env,
14050 				true);
14051 		if (ret)
14052 			return ret;
14053 
14054 		mark_prune_point(env, t + insn->off + 1);
14055 		mark_jmp_point(env, t + insn->off + 1);
14056 
14057 		return ret;
14058 
14059 	default:
14060 		/* conditional jump with two edges */
14061 		mark_prune_point(env, t);
14062 
14063 		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
14064 		if (ret)
14065 			return ret;
14066 
14067 		return push_insn(t, t + insn->off + 1, BRANCH, env, true);
14068 	}
14069 }
14070 
14071 /* non-recursive depth-first-search to detect loops in BPF program
14072  * loop == back-edge in directed graph
14073  */
14074 static int check_cfg(struct bpf_verifier_env *env)
14075 {
14076 	int insn_cnt = env->prog->len;
14077 	int *insn_stack, *insn_state;
14078 	int ret = 0;
14079 	int i;
14080 
14081 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
14082 	if (!insn_state)
14083 		return -ENOMEM;
14084 
14085 	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
14086 	if (!insn_stack) {
14087 		kvfree(insn_state);
14088 		return -ENOMEM;
14089 	}
14090 
14091 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
14092 	insn_stack[0] = 0; /* 0 is the first instruction */
14093 	env->cfg.cur_stack = 1;
14094 
14095 	while (env->cfg.cur_stack > 0) {
14096 		int t = insn_stack[env->cfg.cur_stack - 1];
14097 
14098 		ret = visit_insn(t, env);
14099 		switch (ret) {
14100 		case DONE_EXPLORING:
14101 			insn_state[t] = EXPLORED;
14102 			env->cfg.cur_stack--;
14103 			break;
14104 		case KEEP_EXPLORING:
14105 			break;
14106 		default:
14107 			if (ret > 0) {
14108 				verbose(env, "visit_insn internal bug\n");
14109 				ret = -EFAULT;
14110 			}
14111 			goto err_free;
14112 		}
14113 	}
14114 
14115 	if (env->cfg.cur_stack < 0) {
14116 		verbose(env, "pop stack internal bug\n");
14117 		ret = -EFAULT;
14118 		goto err_free;
14119 	}
14120 
14121 	for (i = 0; i < insn_cnt; i++) {
14122 		if (insn_state[i] != EXPLORED) {
14123 			verbose(env, "unreachable insn %d\n", i);
14124 			ret = -EINVAL;
14125 			goto err_free;
14126 		}
14127 	}
14128 	ret = 0; /* cfg looks good */
14129 
14130 err_free:
14131 	kvfree(insn_state);
14132 	kvfree(insn_stack);
14133 	env->cfg.insn_state = env->cfg.insn_stack = NULL;
14134 	return ret;
14135 }
14136 
14137 static int check_abnormal_return(struct bpf_verifier_env *env)
14138 {
14139 	int i;
14140 
14141 	for (i = 1; i < env->subprog_cnt; i++) {
14142 		if (env->subprog_info[i].has_ld_abs) {
14143 			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
14144 			return -EINVAL;
14145 		}
14146 		if (env->subprog_info[i].has_tail_call) {
14147 			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
14148 			return -EINVAL;
14149 		}
14150 	}
14151 	return 0;
14152 }
14153 
14154 /* The minimum supported BTF func info size */
14155 #define MIN_BPF_FUNCINFO_SIZE	8
14156 #define MAX_FUNCINFO_REC_SIZE	252
14157 
14158 static int check_btf_func(struct bpf_verifier_env *env,
14159 			  const union bpf_attr *attr,
14160 			  bpfptr_t uattr)
14161 {
14162 	const struct btf_type *type, *func_proto, *ret_type;
14163 	u32 i, nfuncs, urec_size, min_size;
14164 	u32 krec_size = sizeof(struct bpf_func_info);
14165 	struct bpf_func_info *krecord;
14166 	struct bpf_func_info_aux *info_aux = NULL;
14167 	struct bpf_prog *prog;
14168 	const struct btf *btf;
14169 	bpfptr_t urecord;
14170 	u32 prev_offset = 0;
14171 	bool scalar_return;
14172 	int ret = -ENOMEM;
14173 
14174 	nfuncs = attr->func_info_cnt;
14175 	if (!nfuncs) {
14176 		if (check_abnormal_return(env))
14177 			return -EINVAL;
14178 		return 0;
14179 	}
14180 
14181 	if (nfuncs != env->subprog_cnt) {
14182 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
14183 		return -EINVAL;
14184 	}
14185 
14186 	urec_size = attr->func_info_rec_size;
14187 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
14188 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
14189 	    urec_size % sizeof(u32)) {
14190 		verbose(env, "invalid func info rec size %u\n", urec_size);
14191 		return -EINVAL;
14192 	}
14193 
14194 	prog = env->prog;
14195 	btf = prog->aux->btf;
14196 
14197 	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
14198 	min_size = min_t(u32, krec_size, urec_size);
14199 
14200 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
14201 	if (!krecord)
14202 		return -ENOMEM;
14203 	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
14204 	if (!info_aux)
14205 		goto err_free;
14206 
14207 	for (i = 0; i < nfuncs; i++) {
14208 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
14209 		if (ret) {
14210 			if (ret == -E2BIG) {
14211 				verbose(env, "nonzero tailing record in func info");
14212 				/* set the size kernel expects so loader can zero
14213 				 * out the rest of the record.
14214 				 */
14215 				if (copy_to_bpfptr_offset(uattr,
14216 							  offsetof(union bpf_attr, func_info_rec_size),
14217 							  &min_size, sizeof(min_size)))
14218 					ret = -EFAULT;
14219 			}
14220 			goto err_free;
14221 		}
14222 
14223 		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
14224 			ret = -EFAULT;
14225 			goto err_free;
14226 		}
14227 
14228 		/* check insn_off */
14229 		ret = -EINVAL;
14230 		if (i == 0) {
14231 			if (krecord[i].insn_off) {
14232 				verbose(env,
14233 					"nonzero insn_off %u for the first func info record",
14234 					krecord[i].insn_off);
14235 				goto err_free;
14236 			}
14237 		} else if (krecord[i].insn_off <= prev_offset) {
14238 			verbose(env,
14239 				"same or smaller insn offset (%u) than previous func info record (%u)",
14240 				krecord[i].insn_off, prev_offset);
14241 			goto err_free;
14242 		}
14243 
14244 		if (env->subprog_info[i].start != krecord[i].insn_off) {
14245 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
14246 			goto err_free;
14247 		}
14248 
14249 		/* check type_id */
14250 		type = btf_type_by_id(btf, krecord[i].type_id);
14251 		if (!type || !btf_type_is_func(type)) {
14252 			verbose(env, "invalid type id %d in func info",
14253 				krecord[i].type_id);
14254 			goto err_free;
14255 		}
14256 		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
14257 
14258 		func_proto = btf_type_by_id(btf, type->type);
14259 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
14260 			/* btf_func_check() already verified it during BTF load */
14261 			goto err_free;
14262 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
14263 		scalar_return =
14264 			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
14265 		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
14266 			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
14267 			goto err_free;
14268 		}
14269 		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
14270 			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
14271 			goto err_free;
14272 		}
14273 
14274 		prev_offset = krecord[i].insn_off;
14275 		bpfptr_add(&urecord, urec_size);
14276 	}
14277 
14278 	prog->aux->func_info = krecord;
14279 	prog->aux->func_info_cnt = nfuncs;
14280 	prog->aux->func_info_aux = info_aux;
14281 	return 0;
14282 
14283 err_free:
14284 	kvfree(krecord);
14285 	kfree(info_aux);
14286 	return ret;
14287 }
14288 
14289 static void adjust_btf_func(struct bpf_verifier_env *env)
14290 {
14291 	struct bpf_prog_aux *aux = env->prog->aux;
14292 	int i;
14293 
14294 	if (!aux->func_info)
14295 		return;
14296 
14297 	for (i = 0; i < env->subprog_cnt; i++)
14298 		aux->func_info[i].insn_off = env->subprog_info[i].start;
14299 }
14300 
14301 #define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
14302 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
14303 
14304 static int check_btf_line(struct bpf_verifier_env *env,
14305 			  const union bpf_attr *attr,
14306 			  bpfptr_t uattr)
14307 {
14308 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
14309 	struct bpf_subprog_info *sub;
14310 	struct bpf_line_info *linfo;
14311 	struct bpf_prog *prog;
14312 	const struct btf *btf;
14313 	bpfptr_t ulinfo;
14314 	int err;
14315 
14316 	nr_linfo = attr->line_info_cnt;
14317 	if (!nr_linfo)
14318 		return 0;
14319 	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
14320 		return -EINVAL;
14321 
14322 	rec_size = attr->line_info_rec_size;
14323 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
14324 	    rec_size > MAX_LINEINFO_REC_SIZE ||
14325 	    rec_size & (sizeof(u32) - 1))
14326 		return -EINVAL;
14327 
14328 	/* Need to zero it in case the userspace may
14329 	 * pass in a smaller bpf_line_info object.
14330 	 */
14331 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
14332 			 GFP_KERNEL | __GFP_NOWARN);
14333 	if (!linfo)
14334 		return -ENOMEM;
14335 
14336 	prog = env->prog;
14337 	btf = prog->aux->btf;
14338 
14339 	s = 0;
14340 	sub = env->subprog_info;
14341 	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
14342 	expected_size = sizeof(struct bpf_line_info);
14343 	ncopy = min_t(u32, expected_size, rec_size);
14344 	for (i = 0; i < nr_linfo; i++) {
14345 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
14346 		if (err) {
14347 			if (err == -E2BIG) {
14348 				verbose(env, "nonzero tailing record in line_info");
14349 				if (copy_to_bpfptr_offset(uattr,
14350 							  offsetof(union bpf_attr, line_info_rec_size),
14351 							  &expected_size, sizeof(expected_size)))
14352 					err = -EFAULT;
14353 			}
14354 			goto err_free;
14355 		}
14356 
14357 		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
14358 			err = -EFAULT;
14359 			goto err_free;
14360 		}
14361 
14362 		/*
14363 		 * Check insn_off to ensure
14364 		 * 1) strictly increasing AND
14365 		 * 2) bounded by prog->len
14366 		 *
14367 		 * The linfo[0].insn_off == 0 check logically falls into
14368 		 * the later "missing bpf_line_info for func..." case
14369 		 * because the first linfo[0].insn_off must be the
14370 		 * first sub also and the first sub must have
14371 		 * subprog_info[0].start == 0.
14372 		 */
14373 		if ((i && linfo[i].insn_off <= prev_offset) ||
14374 		    linfo[i].insn_off >= prog->len) {
14375 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
14376 				i, linfo[i].insn_off, prev_offset,
14377 				prog->len);
14378 			err = -EINVAL;
14379 			goto err_free;
14380 		}
14381 
14382 		if (!prog->insnsi[linfo[i].insn_off].code) {
14383 			verbose(env,
14384 				"Invalid insn code at line_info[%u].insn_off\n",
14385 				i);
14386 			err = -EINVAL;
14387 			goto err_free;
14388 		}
14389 
14390 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
14391 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
14392 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
14393 			err = -EINVAL;
14394 			goto err_free;
14395 		}
14396 
14397 		if (s != env->subprog_cnt) {
14398 			if (linfo[i].insn_off == sub[s].start) {
14399 				sub[s].linfo_idx = i;
14400 				s++;
14401 			} else if (sub[s].start < linfo[i].insn_off) {
14402 				verbose(env, "missing bpf_line_info for func#%u\n", s);
14403 				err = -EINVAL;
14404 				goto err_free;
14405 			}
14406 		}
14407 
14408 		prev_offset = linfo[i].insn_off;
14409 		bpfptr_add(&ulinfo, rec_size);
14410 	}
14411 
14412 	if (s != env->subprog_cnt) {
14413 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
14414 			env->subprog_cnt - s, s);
14415 		err = -EINVAL;
14416 		goto err_free;
14417 	}
14418 
14419 	prog->aux->linfo = linfo;
14420 	prog->aux->nr_linfo = nr_linfo;
14421 
14422 	return 0;
14423 
14424 err_free:
14425 	kvfree(linfo);
14426 	return err;
14427 }
14428 
14429 #define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
14430 #define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
14431 
14432 static int check_core_relo(struct bpf_verifier_env *env,
14433 			   const union bpf_attr *attr,
14434 			   bpfptr_t uattr)
14435 {
14436 	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
14437 	struct bpf_core_relo core_relo = {};
14438 	struct bpf_prog *prog = env->prog;
14439 	const struct btf *btf = prog->aux->btf;
14440 	struct bpf_core_ctx ctx = {
14441 		.log = &env->log,
14442 		.btf = btf,
14443 	};
14444 	bpfptr_t u_core_relo;
14445 	int err;
14446 
14447 	nr_core_relo = attr->core_relo_cnt;
14448 	if (!nr_core_relo)
14449 		return 0;
14450 	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
14451 		return -EINVAL;
14452 
14453 	rec_size = attr->core_relo_rec_size;
14454 	if (rec_size < MIN_CORE_RELO_SIZE ||
14455 	    rec_size > MAX_CORE_RELO_SIZE ||
14456 	    rec_size % sizeof(u32))
14457 		return -EINVAL;
14458 
14459 	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
14460 	expected_size = sizeof(struct bpf_core_relo);
14461 	ncopy = min_t(u32, expected_size, rec_size);
14462 
14463 	/* Unlike func_info and line_info, copy and apply each CO-RE
14464 	 * relocation record one at a time.
14465 	 */
14466 	for (i = 0; i < nr_core_relo; i++) {
14467 		/* future proofing when sizeof(bpf_core_relo) changes */
14468 		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
14469 		if (err) {
14470 			if (err == -E2BIG) {
14471 				verbose(env, "nonzero tailing record in core_relo");
14472 				if (copy_to_bpfptr_offset(uattr,
14473 							  offsetof(union bpf_attr, core_relo_rec_size),
14474 							  &expected_size, sizeof(expected_size)))
14475 					err = -EFAULT;
14476 			}
14477 			break;
14478 		}
14479 
14480 		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
14481 			err = -EFAULT;
14482 			break;
14483 		}
14484 
14485 		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
14486 			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
14487 				i, core_relo.insn_off, prog->len);
14488 			err = -EINVAL;
14489 			break;
14490 		}
14491 
14492 		err = bpf_core_apply(&ctx, &core_relo, i,
14493 				     &prog->insnsi[core_relo.insn_off / 8]);
14494 		if (err)
14495 			break;
14496 		bpfptr_add(&u_core_relo, rec_size);
14497 	}
14498 	return err;
14499 }
14500 
14501 static int check_btf_info(struct bpf_verifier_env *env,
14502 			  const union bpf_attr *attr,
14503 			  bpfptr_t uattr)
14504 {
14505 	struct btf *btf;
14506 	int err;
14507 
14508 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
14509 		if (check_abnormal_return(env))
14510 			return -EINVAL;
14511 		return 0;
14512 	}
14513 
14514 	btf = btf_get_by_fd(attr->prog_btf_fd);
14515 	if (IS_ERR(btf))
14516 		return PTR_ERR(btf);
14517 	if (btf_is_kernel(btf)) {
14518 		btf_put(btf);
14519 		return -EACCES;
14520 	}
14521 	env->prog->aux->btf = btf;
14522 
14523 	err = check_btf_func(env, attr, uattr);
14524 	if (err)
14525 		return err;
14526 
14527 	err = check_btf_line(env, attr, uattr);
14528 	if (err)
14529 		return err;
14530 
14531 	err = check_core_relo(env, attr, uattr);
14532 	if (err)
14533 		return err;
14534 
14535 	return 0;
14536 }
14537 
14538 /* check %cur's range satisfies %old's */
14539 static bool range_within(struct bpf_reg_state *old,
14540 			 struct bpf_reg_state *cur)
14541 {
14542 	return old->umin_value <= cur->umin_value &&
14543 	       old->umax_value >= cur->umax_value &&
14544 	       old->smin_value <= cur->smin_value &&
14545 	       old->smax_value >= cur->smax_value &&
14546 	       old->u32_min_value <= cur->u32_min_value &&
14547 	       old->u32_max_value >= cur->u32_max_value &&
14548 	       old->s32_min_value <= cur->s32_min_value &&
14549 	       old->s32_max_value >= cur->s32_max_value;
14550 }
14551 
14552 /* If in the old state two registers had the same id, then they need to have
14553  * the same id in the new state as well.  But that id could be different from
14554  * the old state, so we need to track the mapping from old to new ids.
14555  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
14556  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
14557  * regs with a different old id could still have new id 9, we don't care about
14558  * that.
14559  * So we look through our idmap to see if this old id has been seen before.  If
14560  * so, we require the new id to match; otherwise, we add the id pair to the map.
14561  */
14562 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
14563 {
14564 	unsigned int i;
14565 
14566 	/* either both IDs should be set or both should be zero */
14567 	if (!!old_id != !!cur_id)
14568 		return false;
14569 
14570 	if (old_id == 0) /* cur_id == 0 as well */
14571 		return true;
14572 
14573 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
14574 		if (!idmap[i].old) {
14575 			/* Reached an empty slot; haven't seen this id before */
14576 			idmap[i].old = old_id;
14577 			idmap[i].cur = cur_id;
14578 			return true;
14579 		}
14580 		if (idmap[i].old == old_id)
14581 			return idmap[i].cur == cur_id;
14582 	}
14583 	/* We ran out of idmap slots, which should be impossible */
14584 	WARN_ON_ONCE(1);
14585 	return false;
14586 }
14587 
14588 static void clean_func_state(struct bpf_verifier_env *env,
14589 			     struct bpf_func_state *st)
14590 {
14591 	enum bpf_reg_liveness live;
14592 	int i, j;
14593 
14594 	for (i = 0; i < BPF_REG_FP; i++) {
14595 		live = st->regs[i].live;
14596 		/* liveness must not touch this register anymore */
14597 		st->regs[i].live |= REG_LIVE_DONE;
14598 		if (!(live & REG_LIVE_READ))
14599 			/* since the register is unused, clear its state
14600 			 * to make further comparison simpler
14601 			 */
14602 			__mark_reg_not_init(env, &st->regs[i]);
14603 	}
14604 
14605 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
14606 		live = st->stack[i].spilled_ptr.live;
14607 		/* liveness must not touch this stack slot anymore */
14608 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
14609 		if (!(live & REG_LIVE_READ)) {
14610 			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
14611 			for (j = 0; j < BPF_REG_SIZE; j++)
14612 				st->stack[i].slot_type[j] = STACK_INVALID;
14613 		}
14614 	}
14615 }
14616 
14617 static void clean_verifier_state(struct bpf_verifier_env *env,
14618 				 struct bpf_verifier_state *st)
14619 {
14620 	int i;
14621 
14622 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
14623 		/* all regs in this state in all frames were already marked */
14624 		return;
14625 
14626 	for (i = 0; i <= st->curframe; i++)
14627 		clean_func_state(env, st->frame[i]);
14628 }
14629 
14630 /* the parentage chains form a tree.
14631  * the verifier states are added to state lists at given insn and
14632  * pushed into state stack for future exploration.
14633  * when the verifier reaches bpf_exit insn some of the verifer states
14634  * stored in the state lists have their final liveness state already,
14635  * but a lot of states will get revised from liveness point of view when
14636  * the verifier explores other branches.
14637  * Example:
14638  * 1: r0 = 1
14639  * 2: if r1 == 100 goto pc+1
14640  * 3: r0 = 2
14641  * 4: exit
14642  * when the verifier reaches exit insn the register r0 in the state list of
14643  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
14644  * of insn 2 and goes exploring further. At the insn 4 it will walk the
14645  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
14646  *
14647  * Since the verifier pushes the branch states as it sees them while exploring
14648  * the program the condition of walking the branch instruction for the second
14649  * time means that all states below this branch were already explored and
14650  * their final liveness marks are already propagated.
14651  * Hence when the verifier completes the search of state list in is_state_visited()
14652  * we can call this clean_live_states() function to mark all liveness states
14653  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
14654  * will not be used.
14655  * This function also clears the registers and stack for states that !READ
14656  * to simplify state merging.
14657  *
14658  * Important note here that walking the same branch instruction in the callee
14659  * doesn't meant that the states are DONE. The verifier has to compare
14660  * the callsites
14661  */
14662 static void clean_live_states(struct bpf_verifier_env *env, int insn,
14663 			      struct bpf_verifier_state *cur)
14664 {
14665 	struct bpf_verifier_state_list *sl;
14666 	int i;
14667 
14668 	sl = *explored_state(env, insn);
14669 	while (sl) {
14670 		if (sl->state.branches)
14671 			goto next;
14672 		if (sl->state.insn_idx != insn ||
14673 		    sl->state.curframe != cur->curframe)
14674 			goto next;
14675 		for (i = 0; i <= cur->curframe; i++)
14676 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
14677 				goto next;
14678 		clean_verifier_state(env, &sl->state);
14679 next:
14680 		sl = sl->next;
14681 	}
14682 }
14683 
14684 static bool regs_exact(const struct bpf_reg_state *rold,
14685 		       const struct bpf_reg_state *rcur,
14686 		       struct bpf_id_pair *idmap)
14687 {
14688 	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
14689 	       check_ids(rold->id, rcur->id, idmap) &&
14690 	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
14691 }
14692 
14693 /* Returns true if (rold safe implies rcur safe) */
14694 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
14695 		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
14696 {
14697 	if (!(rold->live & REG_LIVE_READ))
14698 		/* explored state didn't use this */
14699 		return true;
14700 	if (rold->type == NOT_INIT)
14701 		/* explored state can't have used this */
14702 		return true;
14703 	if (rcur->type == NOT_INIT)
14704 		return false;
14705 
14706 	/* Enforce that register types have to match exactly, including their
14707 	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
14708 	 * rule.
14709 	 *
14710 	 * One can make a point that using a pointer register as unbounded
14711 	 * SCALAR would be technically acceptable, but this could lead to
14712 	 * pointer leaks because scalars are allowed to leak while pointers
14713 	 * are not. We could make this safe in special cases if root is
14714 	 * calling us, but it's probably not worth the hassle.
14715 	 *
14716 	 * Also, register types that are *not* MAYBE_NULL could technically be
14717 	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
14718 	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
14719 	 * to the same map).
14720 	 * However, if the old MAYBE_NULL register then got NULL checked,
14721 	 * doing so could have affected others with the same id, and we can't
14722 	 * check for that because we lost the id when we converted to
14723 	 * a non-MAYBE_NULL variant.
14724 	 * So, as a general rule we don't allow mixing MAYBE_NULL and
14725 	 * non-MAYBE_NULL registers as well.
14726 	 */
14727 	if (rold->type != rcur->type)
14728 		return false;
14729 
14730 	switch (base_type(rold->type)) {
14731 	case SCALAR_VALUE:
14732 		if (regs_exact(rold, rcur, idmap))
14733 			return true;
14734 		if (env->explore_alu_limits)
14735 			return false;
14736 		if (!rold->precise)
14737 			return true;
14738 		/* new val must satisfy old val knowledge */
14739 		return range_within(rold, rcur) &&
14740 		       tnum_in(rold->var_off, rcur->var_off);
14741 	case PTR_TO_MAP_KEY:
14742 	case PTR_TO_MAP_VALUE:
14743 	case PTR_TO_MEM:
14744 	case PTR_TO_BUF:
14745 	case PTR_TO_TP_BUFFER:
14746 		/* If the new min/max/var_off satisfy the old ones and
14747 		 * everything else matches, we are OK.
14748 		 */
14749 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
14750 		       range_within(rold, rcur) &&
14751 		       tnum_in(rold->var_off, rcur->var_off) &&
14752 		       check_ids(rold->id, rcur->id, idmap) &&
14753 		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
14754 	case PTR_TO_PACKET_META:
14755 	case PTR_TO_PACKET:
14756 		/* We must have at least as much range as the old ptr
14757 		 * did, so that any accesses which were safe before are
14758 		 * still safe.  This is true even if old range < old off,
14759 		 * since someone could have accessed through (ptr - k), or
14760 		 * even done ptr -= k in a register, to get a safe access.
14761 		 */
14762 		if (rold->range > rcur->range)
14763 			return false;
14764 		/* If the offsets don't match, we can't trust our alignment;
14765 		 * nor can we be sure that we won't fall out of range.
14766 		 */
14767 		if (rold->off != rcur->off)
14768 			return false;
14769 		/* id relations must be preserved */
14770 		if (!check_ids(rold->id, rcur->id, idmap))
14771 			return false;
14772 		/* new val must satisfy old val knowledge */
14773 		return range_within(rold, rcur) &&
14774 		       tnum_in(rold->var_off, rcur->var_off);
14775 	case PTR_TO_STACK:
14776 		/* two stack pointers are equal only if they're pointing to
14777 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
14778 		 */
14779 		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
14780 	default:
14781 		return regs_exact(rold, rcur, idmap);
14782 	}
14783 }
14784 
14785 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
14786 		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
14787 {
14788 	int i, spi;
14789 
14790 	/* walk slots of the explored stack and ignore any additional
14791 	 * slots in the current stack, since explored(safe) state
14792 	 * didn't use them
14793 	 */
14794 	for (i = 0; i < old->allocated_stack; i++) {
14795 		struct bpf_reg_state *old_reg, *cur_reg;
14796 
14797 		spi = i / BPF_REG_SIZE;
14798 
14799 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
14800 			i += BPF_REG_SIZE - 1;
14801 			/* explored state didn't use this */
14802 			continue;
14803 		}
14804 
14805 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
14806 			continue;
14807 
14808 		if (env->allow_uninit_stack &&
14809 		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
14810 			continue;
14811 
14812 		/* explored stack has more populated slots than current stack
14813 		 * and these slots were used
14814 		 */
14815 		if (i >= cur->allocated_stack)
14816 			return false;
14817 
14818 		/* if old state was safe with misc data in the stack
14819 		 * it will be safe with zero-initialized stack.
14820 		 * The opposite is not true
14821 		 */
14822 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
14823 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
14824 			continue;
14825 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
14826 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
14827 			/* Ex: old explored (safe) state has STACK_SPILL in
14828 			 * this stack slot, but current has STACK_MISC ->
14829 			 * this verifier states are not equivalent,
14830 			 * return false to continue verification of this path
14831 			 */
14832 			return false;
14833 		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
14834 			continue;
14835 		/* Both old and cur are having same slot_type */
14836 		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
14837 		case STACK_SPILL:
14838 			/* when explored and current stack slot are both storing
14839 			 * spilled registers, check that stored pointers types
14840 			 * are the same as well.
14841 			 * Ex: explored safe path could have stored
14842 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
14843 			 * but current path has stored:
14844 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
14845 			 * such verifier states are not equivalent.
14846 			 * return false to continue verification of this path
14847 			 */
14848 			if (!regsafe(env, &old->stack[spi].spilled_ptr,
14849 				     &cur->stack[spi].spilled_ptr, idmap))
14850 				return false;
14851 			break;
14852 		case STACK_DYNPTR:
14853 			old_reg = &old->stack[spi].spilled_ptr;
14854 			cur_reg = &cur->stack[spi].spilled_ptr;
14855 			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
14856 			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
14857 			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
14858 				return false;
14859 			break;
14860 		case STACK_ITER:
14861 			old_reg = &old->stack[spi].spilled_ptr;
14862 			cur_reg = &cur->stack[spi].spilled_ptr;
14863 			/* iter.depth is not compared between states as it
14864 			 * doesn't matter for correctness and would otherwise
14865 			 * prevent convergence; we maintain it only to prevent
14866 			 * infinite loop check triggering, see
14867 			 * iter_active_depths_differ()
14868 			 */
14869 			if (old_reg->iter.btf != cur_reg->iter.btf ||
14870 			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
14871 			    old_reg->iter.state != cur_reg->iter.state ||
14872 			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
14873 			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
14874 				return false;
14875 			break;
14876 		case STACK_MISC:
14877 		case STACK_ZERO:
14878 		case STACK_INVALID:
14879 			continue;
14880 		/* Ensure that new unhandled slot types return false by default */
14881 		default:
14882 			return false;
14883 		}
14884 	}
14885 	return true;
14886 }
14887 
14888 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
14889 		    struct bpf_id_pair *idmap)
14890 {
14891 	int i;
14892 
14893 	if (old->acquired_refs != cur->acquired_refs)
14894 		return false;
14895 
14896 	for (i = 0; i < old->acquired_refs; i++) {
14897 		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
14898 			return false;
14899 	}
14900 
14901 	return true;
14902 }
14903 
14904 /* compare two verifier states
14905  *
14906  * all states stored in state_list are known to be valid, since
14907  * verifier reached 'bpf_exit' instruction through them
14908  *
14909  * this function is called when verifier exploring different branches of
14910  * execution popped from the state stack. If it sees an old state that has
14911  * more strict register state and more strict stack state then this execution
14912  * branch doesn't need to be explored further, since verifier already
14913  * concluded that more strict state leads to valid finish.
14914  *
14915  * Therefore two states are equivalent if register state is more conservative
14916  * and explored stack state is more conservative than the current one.
14917  * Example:
14918  *       explored                   current
14919  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
14920  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
14921  *
14922  * In other words if current stack state (one being explored) has more
14923  * valid slots than old one that already passed validation, it means
14924  * the verifier can stop exploring and conclude that current state is valid too
14925  *
14926  * Similarly with registers. If explored state has register type as invalid
14927  * whereas register type in current state is meaningful, it means that
14928  * the current state will reach 'bpf_exit' instruction safely
14929  */
14930 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
14931 			      struct bpf_func_state *cur)
14932 {
14933 	int i;
14934 
14935 	for (i = 0; i < MAX_BPF_REG; i++)
14936 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
14937 			     env->idmap_scratch))
14938 			return false;
14939 
14940 	if (!stacksafe(env, old, cur, env->idmap_scratch))
14941 		return false;
14942 
14943 	if (!refsafe(old, cur, env->idmap_scratch))
14944 		return false;
14945 
14946 	return true;
14947 }
14948 
14949 static bool states_equal(struct bpf_verifier_env *env,
14950 			 struct bpf_verifier_state *old,
14951 			 struct bpf_verifier_state *cur)
14952 {
14953 	int i;
14954 
14955 	if (old->curframe != cur->curframe)
14956 		return false;
14957 
14958 	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
14959 
14960 	/* Verification state from speculative execution simulation
14961 	 * must never prune a non-speculative execution one.
14962 	 */
14963 	if (old->speculative && !cur->speculative)
14964 		return false;
14965 
14966 	if (old->active_lock.ptr != cur->active_lock.ptr)
14967 		return false;
14968 
14969 	/* Old and cur active_lock's have to be either both present
14970 	 * or both absent.
14971 	 */
14972 	if (!!old->active_lock.id != !!cur->active_lock.id)
14973 		return false;
14974 
14975 	if (old->active_lock.id &&
14976 	    !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
14977 		return false;
14978 
14979 	if (old->active_rcu_lock != cur->active_rcu_lock)
14980 		return false;
14981 
14982 	/* for states to be equal callsites have to be the same
14983 	 * and all frame states need to be equivalent
14984 	 */
14985 	for (i = 0; i <= old->curframe; i++) {
14986 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
14987 			return false;
14988 		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
14989 			return false;
14990 	}
14991 	return true;
14992 }
14993 
14994 /* Return 0 if no propagation happened. Return negative error code if error
14995  * happened. Otherwise, return the propagated bit.
14996  */
14997 static int propagate_liveness_reg(struct bpf_verifier_env *env,
14998 				  struct bpf_reg_state *reg,
14999 				  struct bpf_reg_state *parent_reg)
15000 {
15001 	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
15002 	u8 flag = reg->live & REG_LIVE_READ;
15003 	int err;
15004 
15005 	/* When comes here, read flags of PARENT_REG or REG could be any of
15006 	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
15007 	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
15008 	 */
15009 	if (parent_flag == REG_LIVE_READ64 ||
15010 	    /* Or if there is no read flag from REG. */
15011 	    !flag ||
15012 	    /* Or if the read flag from REG is the same as PARENT_REG. */
15013 	    parent_flag == flag)
15014 		return 0;
15015 
15016 	err = mark_reg_read(env, reg, parent_reg, flag);
15017 	if (err)
15018 		return err;
15019 
15020 	return flag;
15021 }
15022 
15023 /* A write screens off any subsequent reads; but write marks come from the
15024  * straight-line code between a state and its parent.  When we arrive at an
15025  * equivalent state (jump target or such) we didn't arrive by the straight-line
15026  * code, so read marks in the state must propagate to the parent regardless
15027  * of the state's write marks. That's what 'parent == state->parent' comparison
15028  * in mark_reg_read() is for.
15029  */
15030 static int propagate_liveness(struct bpf_verifier_env *env,
15031 			      const struct bpf_verifier_state *vstate,
15032 			      struct bpf_verifier_state *vparent)
15033 {
15034 	struct bpf_reg_state *state_reg, *parent_reg;
15035 	struct bpf_func_state *state, *parent;
15036 	int i, frame, err = 0;
15037 
15038 	if (vparent->curframe != vstate->curframe) {
15039 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
15040 		     vparent->curframe, vstate->curframe);
15041 		return -EFAULT;
15042 	}
15043 	/* Propagate read liveness of registers... */
15044 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
15045 	for (frame = 0; frame <= vstate->curframe; frame++) {
15046 		parent = vparent->frame[frame];
15047 		state = vstate->frame[frame];
15048 		parent_reg = parent->regs;
15049 		state_reg = state->regs;
15050 		/* We don't need to worry about FP liveness, it's read-only */
15051 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
15052 			err = propagate_liveness_reg(env, &state_reg[i],
15053 						     &parent_reg[i]);
15054 			if (err < 0)
15055 				return err;
15056 			if (err == REG_LIVE_READ64)
15057 				mark_insn_zext(env, &parent_reg[i]);
15058 		}
15059 
15060 		/* Propagate stack slots. */
15061 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
15062 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
15063 			parent_reg = &parent->stack[i].spilled_ptr;
15064 			state_reg = &state->stack[i].spilled_ptr;
15065 			err = propagate_liveness_reg(env, state_reg,
15066 						     parent_reg);
15067 			if (err < 0)
15068 				return err;
15069 		}
15070 	}
15071 	return 0;
15072 }
15073 
15074 /* find precise scalars in the previous equivalent state and
15075  * propagate them into the current state
15076  */
15077 static int propagate_precision(struct bpf_verifier_env *env,
15078 			       const struct bpf_verifier_state *old)
15079 {
15080 	struct bpf_reg_state *state_reg;
15081 	struct bpf_func_state *state;
15082 	int i, err = 0, fr;
15083 
15084 	for (fr = old->curframe; fr >= 0; fr--) {
15085 		state = old->frame[fr];
15086 		state_reg = state->regs;
15087 		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
15088 			if (state_reg->type != SCALAR_VALUE ||
15089 			    !state_reg->precise ||
15090 			    !(state_reg->live & REG_LIVE_READ))
15091 				continue;
15092 			if (env->log.level & BPF_LOG_LEVEL2)
15093 				verbose(env, "frame %d: propagating r%d\n", fr, i);
15094 			err = mark_chain_precision_frame(env, fr, i);
15095 			if (err < 0)
15096 				return err;
15097 		}
15098 
15099 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
15100 			if (!is_spilled_reg(&state->stack[i]))
15101 				continue;
15102 			state_reg = &state->stack[i].spilled_ptr;
15103 			if (state_reg->type != SCALAR_VALUE ||
15104 			    !state_reg->precise ||
15105 			    !(state_reg->live & REG_LIVE_READ))
15106 				continue;
15107 			if (env->log.level & BPF_LOG_LEVEL2)
15108 				verbose(env, "frame %d: propagating fp%d\n",
15109 					fr, (-i - 1) * BPF_REG_SIZE);
15110 			err = mark_chain_precision_stack_frame(env, fr, i);
15111 			if (err < 0)
15112 				return err;
15113 		}
15114 	}
15115 	return 0;
15116 }
15117 
15118 static bool states_maybe_looping(struct bpf_verifier_state *old,
15119 				 struct bpf_verifier_state *cur)
15120 {
15121 	struct bpf_func_state *fold, *fcur;
15122 	int i, fr = cur->curframe;
15123 
15124 	if (old->curframe != fr)
15125 		return false;
15126 
15127 	fold = old->frame[fr];
15128 	fcur = cur->frame[fr];
15129 	for (i = 0; i < MAX_BPF_REG; i++)
15130 		if (memcmp(&fold->regs[i], &fcur->regs[i],
15131 			   offsetof(struct bpf_reg_state, parent)))
15132 			return false;
15133 	return true;
15134 }
15135 
15136 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
15137 {
15138 	return env->insn_aux_data[insn_idx].is_iter_next;
15139 }
15140 
15141 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
15142  * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
15143  * states to match, which otherwise would look like an infinite loop. So while
15144  * iter_next() calls are taken care of, we still need to be careful and
15145  * prevent erroneous and too eager declaration of "ininite loop", when
15146  * iterators are involved.
15147  *
15148  * Here's a situation in pseudo-BPF assembly form:
15149  *
15150  *   0: again:                          ; set up iter_next() call args
15151  *   1:   r1 = &it                      ; <CHECKPOINT HERE>
15152  *   2:   call bpf_iter_num_next        ; this is iter_next() call
15153  *   3:   if r0 == 0 goto done
15154  *   4:   ... something useful here ...
15155  *   5:   goto again                    ; another iteration
15156  *   6: done:
15157  *   7:   r1 = &it
15158  *   8:   call bpf_iter_num_destroy     ; clean up iter state
15159  *   9:   exit
15160  *
15161  * This is a typical loop. Let's assume that we have a prune point at 1:,
15162  * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
15163  * again`, assuming other heuristics don't get in a way).
15164  *
15165  * When we first time come to 1:, let's say we have some state X. We proceed
15166  * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
15167  * Now we come back to validate that forked ACTIVE state. We proceed through
15168  * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
15169  * are converging. But the problem is that we don't know that yet, as this
15170  * convergence has to happen at iter_next() call site only. So if nothing is
15171  * done, at 1: verifier will use bounded loop logic and declare infinite
15172  * looping (and would be *technically* correct, if not for iterator's
15173  * "eventual sticky NULL" contract, see process_iter_next_call()). But we
15174  * don't want that. So what we do in process_iter_next_call() when we go on
15175  * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
15176  * a different iteration. So when we suspect an infinite loop, we additionally
15177  * check if any of the *ACTIVE* iterator states depths differ. If yes, we
15178  * pretend we are not looping and wait for next iter_next() call.
15179  *
15180  * This only applies to ACTIVE state. In DRAINED state we don't expect to
15181  * loop, because that would actually mean infinite loop, as DRAINED state is
15182  * "sticky", and so we'll keep returning into the same instruction with the
15183  * same state (at least in one of possible code paths).
15184  *
15185  * This approach allows to keep infinite loop heuristic even in the face of
15186  * active iterator. E.g., C snippet below is and will be detected as
15187  * inifintely looping:
15188  *
15189  *   struct bpf_iter_num it;
15190  *   int *p, x;
15191  *
15192  *   bpf_iter_num_new(&it, 0, 10);
15193  *   while ((p = bpf_iter_num_next(&t))) {
15194  *       x = p;
15195  *       while (x--) {} // <<-- infinite loop here
15196  *   }
15197  *
15198  */
15199 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
15200 {
15201 	struct bpf_reg_state *slot, *cur_slot;
15202 	struct bpf_func_state *state;
15203 	int i, fr;
15204 
15205 	for (fr = old->curframe; fr >= 0; fr--) {
15206 		state = old->frame[fr];
15207 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
15208 			if (state->stack[i].slot_type[0] != STACK_ITER)
15209 				continue;
15210 
15211 			slot = &state->stack[i].spilled_ptr;
15212 			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
15213 				continue;
15214 
15215 			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
15216 			if (cur_slot->iter.depth != slot->iter.depth)
15217 				return true;
15218 		}
15219 	}
15220 	return false;
15221 }
15222 
15223 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
15224 {
15225 	struct bpf_verifier_state_list *new_sl;
15226 	struct bpf_verifier_state_list *sl, **pprev;
15227 	struct bpf_verifier_state *cur = env->cur_state, *new;
15228 	int i, j, err, states_cnt = 0;
15229 	bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
15230 	bool add_new_state = force_new_state;
15231 
15232 	/* bpf progs typically have pruning point every 4 instructions
15233 	 * http://vger.kernel.org/bpfconf2019.html#session-1
15234 	 * Do not add new state for future pruning if the verifier hasn't seen
15235 	 * at least 2 jumps and at least 8 instructions.
15236 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
15237 	 * In tests that amounts to up to 50% reduction into total verifier
15238 	 * memory consumption and 20% verifier time speedup.
15239 	 */
15240 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
15241 	    env->insn_processed - env->prev_insn_processed >= 8)
15242 		add_new_state = true;
15243 
15244 	pprev = explored_state(env, insn_idx);
15245 	sl = *pprev;
15246 
15247 	clean_live_states(env, insn_idx, cur);
15248 
15249 	while (sl) {
15250 		states_cnt++;
15251 		if (sl->state.insn_idx != insn_idx)
15252 			goto next;
15253 
15254 		if (sl->state.branches) {
15255 			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
15256 
15257 			if (frame->in_async_callback_fn &&
15258 			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
15259 				/* Different async_entry_cnt means that the verifier is
15260 				 * processing another entry into async callback.
15261 				 * Seeing the same state is not an indication of infinite
15262 				 * loop or infinite recursion.
15263 				 * But finding the same state doesn't mean that it's safe
15264 				 * to stop processing the current state. The previous state
15265 				 * hasn't yet reached bpf_exit, since state.branches > 0.
15266 				 * Checking in_async_callback_fn alone is not enough either.
15267 				 * Since the verifier still needs to catch infinite loops
15268 				 * inside async callbacks.
15269 				 */
15270 				goto skip_inf_loop_check;
15271 			}
15272 			/* BPF open-coded iterators loop detection is special.
15273 			 * states_maybe_looping() logic is too simplistic in detecting
15274 			 * states that *might* be equivalent, because it doesn't know
15275 			 * about ID remapping, so don't even perform it.
15276 			 * See process_iter_next_call() and iter_active_depths_differ()
15277 			 * for overview of the logic. When current and one of parent
15278 			 * states are detected as equivalent, it's a good thing: we prove
15279 			 * convergence and can stop simulating further iterations.
15280 			 * It's safe to assume that iterator loop will finish, taking into
15281 			 * account iter_next() contract of eventually returning
15282 			 * sticky NULL result.
15283 			 */
15284 			if (is_iter_next_insn(env, insn_idx)) {
15285 				if (states_equal(env, &sl->state, cur)) {
15286 					struct bpf_func_state *cur_frame;
15287 					struct bpf_reg_state *iter_state, *iter_reg;
15288 					int spi;
15289 
15290 					cur_frame = cur->frame[cur->curframe];
15291 					/* btf_check_iter_kfuncs() enforces that
15292 					 * iter state pointer is always the first arg
15293 					 */
15294 					iter_reg = &cur_frame->regs[BPF_REG_1];
15295 					/* current state is valid due to states_equal(),
15296 					 * so we can assume valid iter and reg state,
15297 					 * no need for extra (re-)validations
15298 					 */
15299 					spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
15300 					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
15301 					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE)
15302 						goto hit;
15303 				}
15304 				goto skip_inf_loop_check;
15305 			}
15306 			/* attempt to detect infinite loop to avoid unnecessary doomed work */
15307 			if (states_maybe_looping(&sl->state, cur) &&
15308 			    states_equal(env, &sl->state, cur) &&
15309 			    !iter_active_depths_differ(&sl->state, cur)) {
15310 				verbose_linfo(env, insn_idx, "; ");
15311 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
15312 				return -EINVAL;
15313 			}
15314 			/* if the verifier is processing a loop, avoid adding new state
15315 			 * too often, since different loop iterations have distinct
15316 			 * states and may not help future pruning.
15317 			 * This threshold shouldn't be too low to make sure that
15318 			 * a loop with large bound will be rejected quickly.
15319 			 * The most abusive loop will be:
15320 			 * r1 += 1
15321 			 * if r1 < 1000000 goto pc-2
15322 			 * 1M insn_procssed limit / 100 == 10k peak states.
15323 			 * This threshold shouldn't be too high either, since states
15324 			 * at the end of the loop are likely to be useful in pruning.
15325 			 */
15326 skip_inf_loop_check:
15327 			if (!force_new_state &&
15328 			    env->jmps_processed - env->prev_jmps_processed < 20 &&
15329 			    env->insn_processed - env->prev_insn_processed < 100)
15330 				add_new_state = false;
15331 			goto miss;
15332 		}
15333 		if (states_equal(env, &sl->state, cur)) {
15334 hit:
15335 			sl->hit_cnt++;
15336 			/* reached equivalent register/stack state,
15337 			 * prune the search.
15338 			 * Registers read by the continuation are read by us.
15339 			 * If we have any write marks in env->cur_state, they
15340 			 * will prevent corresponding reads in the continuation
15341 			 * from reaching our parent (an explored_state).  Our
15342 			 * own state will get the read marks recorded, but
15343 			 * they'll be immediately forgotten as we're pruning
15344 			 * this state and will pop a new one.
15345 			 */
15346 			err = propagate_liveness(env, &sl->state, cur);
15347 
15348 			/* if previous state reached the exit with precision and
15349 			 * current state is equivalent to it (except precsion marks)
15350 			 * the precision needs to be propagated back in
15351 			 * the current state.
15352 			 */
15353 			err = err ? : push_jmp_history(env, cur);
15354 			err = err ? : propagate_precision(env, &sl->state);
15355 			if (err)
15356 				return err;
15357 			return 1;
15358 		}
15359 miss:
15360 		/* when new state is not going to be added do not increase miss count.
15361 		 * Otherwise several loop iterations will remove the state
15362 		 * recorded earlier. The goal of these heuristics is to have
15363 		 * states from some iterations of the loop (some in the beginning
15364 		 * and some at the end) to help pruning.
15365 		 */
15366 		if (add_new_state)
15367 			sl->miss_cnt++;
15368 		/* heuristic to determine whether this state is beneficial
15369 		 * to keep checking from state equivalence point of view.
15370 		 * Higher numbers increase max_states_per_insn and verification time,
15371 		 * but do not meaningfully decrease insn_processed.
15372 		 */
15373 		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
15374 			/* the state is unlikely to be useful. Remove it to
15375 			 * speed up verification
15376 			 */
15377 			*pprev = sl->next;
15378 			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
15379 				u32 br = sl->state.branches;
15380 
15381 				WARN_ONCE(br,
15382 					  "BUG live_done but branches_to_explore %d\n",
15383 					  br);
15384 				free_verifier_state(&sl->state, false);
15385 				kfree(sl);
15386 				env->peak_states--;
15387 			} else {
15388 				/* cannot free this state, since parentage chain may
15389 				 * walk it later. Add it for free_list instead to
15390 				 * be freed at the end of verification
15391 				 */
15392 				sl->next = env->free_list;
15393 				env->free_list = sl;
15394 			}
15395 			sl = *pprev;
15396 			continue;
15397 		}
15398 next:
15399 		pprev = &sl->next;
15400 		sl = *pprev;
15401 	}
15402 
15403 	if (env->max_states_per_insn < states_cnt)
15404 		env->max_states_per_insn = states_cnt;
15405 
15406 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
15407 		return 0;
15408 
15409 	if (!add_new_state)
15410 		return 0;
15411 
15412 	/* There were no equivalent states, remember the current one.
15413 	 * Technically the current state is not proven to be safe yet,
15414 	 * but it will either reach outer most bpf_exit (which means it's safe)
15415 	 * or it will be rejected. When there are no loops the verifier won't be
15416 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
15417 	 * again on the way to bpf_exit.
15418 	 * When looping the sl->state.branches will be > 0 and this state
15419 	 * will not be considered for equivalence until branches == 0.
15420 	 */
15421 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
15422 	if (!new_sl)
15423 		return -ENOMEM;
15424 	env->total_states++;
15425 	env->peak_states++;
15426 	env->prev_jmps_processed = env->jmps_processed;
15427 	env->prev_insn_processed = env->insn_processed;
15428 
15429 	/* forget precise markings we inherited, see __mark_chain_precision */
15430 	if (env->bpf_capable)
15431 		mark_all_scalars_imprecise(env, cur);
15432 
15433 	/* add new state to the head of linked list */
15434 	new = &new_sl->state;
15435 	err = copy_verifier_state(new, cur);
15436 	if (err) {
15437 		free_verifier_state(new, false);
15438 		kfree(new_sl);
15439 		return err;
15440 	}
15441 	new->insn_idx = insn_idx;
15442 	WARN_ONCE(new->branches != 1,
15443 		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
15444 
15445 	cur->parent = new;
15446 	cur->first_insn_idx = insn_idx;
15447 	clear_jmp_history(cur);
15448 	new_sl->next = *explored_state(env, insn_idx);
15449 	*explored_state(env, insn_idx) = new_sl;
15450 	/* connect new state to parentage chain. Current frame needs all
15451 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
15452 	 * to the stack implicitly by JITs) so in callers' frames connect just
15453 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
15454 	 * the state of the call instruction (with WRITTEN set), and r0 comes
15455 	 * from callee with its full parentage chain, anyway.
15456 	 */
15457 	/* clear write marks in current state: the writes we did are not writes
15458 	 * our child did, so they don't screen off its reads from us.
15459 	 * (There are no read marks in current state, because reads always mark
15460 	 * their parent and current state never has children yet.  Only
15461 	 * explored_states can get read marks.)
15462 	 */
15463 	for (j = 0; j <= cur->curframe; j++) {
15464 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
15465 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
15466 		for (i = 0; i < BPF_REG_FP; i++)
15467 			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
15468 	}
15469 
15470 	/* all stack frames are accessible from callee, clear them all */
15471 	for (j = 0; j <= cur->curframe; j++) {
15472 		struct bpf_func_state *frame = cur->frame[j];
15473 		struct bpf_func_state *newframe = new->frame[j];
15474 
15475 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
15476 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
15477 			frame->stack[i].spilled_ptr.parent =
15478 						&newframe->stack[i].spilled_ptr;
15479 		}
15480 	}
15481 	return 0;
15482 }
15483 
15484 /* Return true if it's OK to have the same insn return a different type. */
15485 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
15486 {
15487 	switch (base_type(type)) {
15488 	case PTR_TO_CTX:
15489 	case PTR_TO_SOCKET:
15490 	case PTR_TO_SOCK_COMMON:
15491 	case PTR_TO_TCP_SOCK:
15492 	case PTR_TO_XDP_SOCK:
15493 	case PTR_TO_BTF_ID:
15494 		return false;
15495 	default:
15496 		return true;
15497 	}
15498 }
15499 
15500 /* If an instruction was previously used with particular pointer types, then we
15501  * need to be careful to avoid cases such as the below, where it may be ok
15502  * for one branch accessing the pointer, but not ok for the other branch:
15503  *
15504  * R1 = sock_ptr
15505  * goto X;
15506  * ...
15507  * R1 = some_other_valid_ptr;
15508  * goto X;
15509  * ...
15510  * R2 = *(u32 *)(R1 + 0);
15511  */
15512 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
15513 {
15514 	return src != prev && (!reg_type_mismatch_ok(src) ||
15515 			       !reg_type_mismatch_ok(prev));
15516 }
15517 
15518 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
15519 			     bool allow_trust_missmatch)
15520 {
15521 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
15522 
15523 	if (*prev_type == NOT_INIT) {
15524 		/* Saw a valid insn
15525 		 * dst_reg = *(u32 *)(src_reg + off)
15526 		 * save type to validate intersecting paths
15527 		 */
15528 		*prev_type = type;
15529 	} else if (reg_type_mismatch(type, *prev_type)) {
15530 		/* Abuser program is trying to use the same insn
15531 		 * dst_reg = *(u32*) (src_reg + off)
15532 		 * with different pointer types:
15533 		 * src_reg == ctx in one branch and
15534 		 * src_reg == stack|map in some other branch.
15535 		 * Reject it.
15536 		 */
15537 		if (allow_trust_missmatch &&
15538 		    base_type(type) == PTR_TO_BTF_ID &&
15539 		    base_type(*prev_type) == PTR_TO_BTF_ID) {
15540 			/*
15541 			 * Have to support a use case when one path through
15542 			 * the program yields TRUSTED pointer while another
15543 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
15544 			 * BPF_PROBE_MEM.
15545 			 */
15546 			*prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
15547 		} else {
15548 			verbose(env, "same insn cannot be used with different pointers\n");
15549 			return -EINVAL;
15550 		}
15551 	}
15552 
15553 	return 0;
15554 }
15555 
15556 static int do_check(struct bpf_verifier_env *env)
15557 {
15558 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
15559 	struct bpf_verifier_state *state = env->cur_state;
15560 	struct bpf_insn *insns = env->prog->insnsi;
15561 	struct bpf_reg_state *regs;
15562 	int insn_cnt = env->prog->len;
15563 	bool do_print_state = false;
15564 	int prev_insn_idx = -1;
15565 
15566 	for (;;) {
15567 		struct bpf_insn *insn;
15568 		u8 class;
15569 		int err;
15570 
15571 		env->prev_insn_idx = prev_insn_idx;
15572 		if (env->insn_idx >= insn_cnt) {
15573 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
15574 				env->insn_idx, insn_cnt);
15575 			return -EFAULT;
15576 		}
15577 
15578 		insn = &insns[env->insn_idx];
15579 		class = BPF_CLASS(insn->code);
15580 
15581 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
15582 			verbose(env,
15583 				"BPF program is too large. Processed %d insn\n",
15584 				env->insn_processed);
15585 			return -E2BIG;
15586 		}
15587 
15588 		state->last_insn_idx = env->prev_insn_idx;
15589 
15590 		if (is_prune_point(env, env->insn_idx)) {
15591 			err = is_state_visited(env, env->insn_idx);
15592 			if (err < 0)
15593 				return err;
15594 			if (err == 1) {
15595 				/* found equivalent state, can prune the search */
15596 				if (env->log.level & BPF_LOG_LEVEL) {
15597 					if (do_print_state)
15598 						verbose(env, "\nfrom %d to %d%s: safe\n",
15599 							env->prev_insn_idx, env->insn_idx,
15600 							env->cur_state->speculative ?
15601 							" (speculative execution)" : "");
15602 					else
15603 						verbose(env, "%d: safe\n", env->insn_idx);
15604 				}
15605 				goto process_bpf_exit;
15606 			}
15607 		}
15608 
15609 		if (is_jmp_point(env, env->insn_idx)) {
15610 			err = push_jmp_history(env, state);
15611 			if (err)
15612 				return err;
15613 		}
15614 
15615 		if (signal_pending(current))
15616 			return -EAGAIN;
15617 
15618 		if (need_resched())
15619 			cond_resched();
15620 
15621 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
15622 			verbose(env, "\nfrom %d to %d%s:",
15623 				env->prev_insn_idx, env->insn_idx,
15624 				env->cur_state->speculative ?
15625 				" (speculative execution)" : "");
15626 			print_verifier_state(env, state->frame[state->curframe], true);
15627 			do_print_state = false;
15628 		}
15629 
15630 		if (env->log.level & BPF_LOG_LEVEL) {
15631 			const struct bpf_insn_cbs cbs = {
15632 				.cb_call	= disasm_kfunc_name,
15633 				.cb_print	= verbose,
15634 				.private_data	= env,
15635 			};
15636 
15637 			if (verifier_state_scratched(env))
15638 				print_insn_state(env, state->frame[state->curframe]);
15639 
15640 			verbose_linfo(env, env->insn_idx, "; ");
15641 			env->prev_log_pos = env->log.end_pos;
15642 			verbose(env, "%d: ", env->insn_idx);
15643 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
15644 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
15645 			env->prev_log_pos = env->log.end_pos;
15646 		}
15647 
15648 		if (bpf_prog_is_offloaded(env->prog->aux)) {
15649 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
15650 							   env->prev_insn_idx);
15651 			if (err)
15652 				return err;
15653 		}
15654 
15655 		regs = cur_regs(env);
15656 		sanitize_mark_insn_seen(env);
15657 		prev_insn_idx = env->insn_idx;
15658 
15659 		if (class == BPF_ALU || class == BPF_ALU64) {
15660 			err = check_alu_op(env, insn);
15661 			if (err)
15662 				return err;
15663 
15664 		} else if (class == BPF_LDX) {
15665 			enum bpf_reg_type src_reg_type;
15666 
15667 			/* check for reserved fields is already done */
15668 
15669 			/* check src operand */
15670 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15671 			if (err)
15672 				return err;
15673 
15674 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15675 			if (err)
15676 				return err;
15677 
15678 			src_reg_type = regs[insn->src_reg].type;
15679 
15680 			/* check that memory (src_reg + off) is readable,
15681 			 * the state of dst_reg will be updated by this func
15682 			 */
15683 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
15684 					       insn->off, BPF_SIZE(insn->code),
15685 					       BPF_READ, insn->dst_reg, false);
15686 			if (err)
15687 				return err;
15688 
15689 			err = save_aux_ptr_type(env, src_reg_type, true);
15690 			if (err)
15691 				return err;
15692 		} else if (class == BPF_STX) {
15693 			enum bpf_reg_type dst_reg_type;
15694 
15695 			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
15696 				err = check_atomic(env, env->insn_idx, insn);
15697 				if (err)
15698 					return err;
15699 				env->insn_idx++;
15700 				continue;
15701 			}
15702 
15703 			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
15704 				verbose(env, "BPF_STX uses reserved fields\n");
15705 				return -EINVAL;
15706 			}
15707 
15708 			/* check src1 operand */
15709 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15710 			if (err)
15711 				return err;
15712 			/* check src2 operand */
15713 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15714 			if (err)
15715 				return err;
15716 
15717 			dst_reg_type = regs[insn->dst_reg].type;
15718 
15719 			/* check that memory (dst_reg + off) is writeable */
15720 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15721 					       insn->off, BPF_SIZE(insn->code),
15722 					       BPF_WRITE, insn->src_reg, false);
15723 			if (err)
15724 				return err;
15725 
15726 			err = save_aux_ptr_type(env, dst_reg_type, false);
15727 			if (err)
15728 				return err;
15729 		} else if (class == BPF_ST) {
15730 			enum bpf_reg_type dst_reg_type;
15731 
15732 			if (BPF_MODE(insn->code) != BPF_MEM ||
15733 			    insn->src_reg != BPF_REG_0) {
15734 				verbose(env, "BPF_ST uses reserved fields\n");
15735 				return -EINVAL;
15736 			}
15737 			/* check src operand */
15738 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15739 			if (err)
15740 				return err;
15741 
15742 			dst_reg_type = regs[insn->dst_reg].type;
15743 
15744 			/* check that memory (dst_reg + off) is writeable */
15745 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15746 					       insn->off, BPF_SIZE(insn->code),
15747 					       BPF_WRITE, -1, false);
15748 			if (err)
15749 				return err;
15750 
15751 			err = save_aux_ptr_type(env, dst_reg_type, false);
15752 			if (err)
15753 				return err;
15754 		} else if (class == BPF_JMP || class == BPF_JMP32) {
15755 			u8 opcode = BPF_OP(insn->code);
15756 
15757 			env->jmps_processed++;
15758 			if (opcode == BPF_CALL) {
15759 				if (BPF_SRC(insn->code) != BPF_K ||
15760 				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
15761 				     && insn->off != 0) ||
15762 				    (insn->src_reg != BPF_REG_0 &&
15763 				     insn->src_reg != BPF_PSEUDO_CALL &&
15764 				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
15765 				    insn->dst_reg != BPF_REG_0 ||
15766 				    class == BPF_JMP32) {
15767 					verbose(env, "BPF_CALL uses reserved fields\n");
15768 					return -EINVAL;
15769 				}
15770 
15771 				if (env->cur_state->active_lock.ptr) {
15772 					if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
15773 					    (insn->src_reg == BPF_PSEUDO_CALL) ||
15774 					    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
15775 					     (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
15776 						verbose(env, "function calls are not allowed while holding a lock\n");
15777 						return -EINVAL;
15778 					}
15779 				}
15780 				if (insn->src_reg == BPF_PSEUDO_CALL)
15781 					err = check_func_call(env, insn, &env->insn_idx);
15782 				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
15783 					err = check_kfunc_call(env, insn, &env->insn_idx);
15784 				else
15785 					err = check_helper_call(env, insn, &env->insn_idx);
15786 				if (err)
15787 					return err;
15788 
15789 				mark_reg_scratched(env, BPF_REG_0);
15790 			} else if (opcode == BPF_JA) {
15791 				if (BPF_SRC(insn->code) != BPF_K ||
15792 				    insn->imm != 0 ||
15793 				    insn->src_reg != BPF_REG_0 ||
15794 				    insn->dst_reg != BPF_REG_0 ||
15795 				    class == BPF_JMP32) {
15796 					verbose(env, "BPF_JA uses reserved fields\n");
15797 					return -EINVAL;
15798 				}
15799 
15800 				env->insn_idx += insn->off + 1;
15801 				continue;
15802 
15803 			} else if (opcode == BPF_EXIT) {
15804 				if (BPF_SRC(insn->code) != BPF_K ||
15805 				    insn->imm != 0 ||
15806 				    insn->src_reg != BPF_REG_0 ||
15807 				    insn->dst_reg != BPF_REG_0 ||
15808 				    class == BPF_JMP32) {
15809 					verbose(env, "BPF_EXIT uses reserved fields\n");
15810 					return -EINVAL;
15811 				}
15812 
15813 				if (env->cur_state->active_lock.ptr &&
15814 				    !in_rbtree_lock_required_cb(env)) {
15815 					verbose(env, "bpf_spin_unlock is missing\n");
15816 					return -EINVAL;
15817 				}
15818 
15819 				if (env->cur_state->active_rcu_lock) {
15820 					verbose(env, "bpf_rcu_read_unlock is missing\n");
15821 					return -EINVAL;
15822 				}
15823 
15824 				/* We must do check_reference_leak here before
15825 				 * prepare_func_exit to handle the case when
15826 				 * state->curframe > 0, it may be a callback
15827 				 * function, for which reference_state must
15828 				 * match caller reference state when it exits.
15829 				 */
15830 				err = check_reference_leak(env);
15831 				if (err)
15832 					return err;
15833 
15834 				if (state->curframe) {
15835 					/* exit from nested function */
15836 					err = prepare_func_exit(env, &env->insn_idx);
15837 					if (err)
15838 						return err;
15839 					do_print_state = true;
15840 					continue;
15841 				}
15842 
15843 				err = check_return_code(env);
15844 				if (err)
15845 					return err;
15846 process_bpf_exit:
15847 				mark_verifier_state_scratched(env);
15848 				update_branch_counts(env, env->cur_state);
15849 				err = pop_stack(env, &prev_insn_idx,
15850 						&env->insn_idx, pop_log);
15851 				if (err < 0) {
15852 					if (err != -ENOENT)
15853 						return err;
15854 					break;
15855 				} else {
15856 					do_print_state = true;
15857 					continue;
15858 				}
15859 			} else {
15860 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
15861 				if (err)
15862 					return err;
15863 			}
15864 		} else if (class == BPF_LD) {
15865 			u8 mode = BPF_MODE(insn->code);
15866 
15867 			if (mode == BPF_ABS || mode == BPF_IND) {
15868 				err = check_ld_abs(env, insn);
15869 				if (err)
15870 					return err;
15871 
15872 			} else if (mode == BPF_IMM) {
15873 				err = check_ld_imm(env, insn);
15874 				if (err)
15875 					return err;
15876 
15877 				env->insn_idx++;
15878 				sanitize_mark_insn_seen(env);
15879 			} else {
15880 				verbose(env, "invalid BPF_LD mode\n");
15881 				return -EINVAL;
15882 			}
15883 		} else {
15884 			verbose(env, "unknown insn class %d\n", class);
15885 			return -EINVAL;
15886 		}
15887 
15888 		env->insn_idx++;
15889 	}
15890 
15891 	return 0;
15892 }
15893 
15894 static int find_btf_percpu_datasec(struct btf *btf)
15895 {
15896 	const struct btf_type *t;
15897 	const char *tname;
15898 	int i, n;
15899 
15900 	/*
15901 	 * Both vmlinux and module each have their own ".data..percpu"
15902 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
15903 	 * types to look at only module's own BTF types.
15904 	 */
15905 	n = btf_nr_types(btf);
15906 	if (btf_is_module(btf))
15907 		i = btf_nr_types(btf_vmlinux);
15908 	else
15909 		i = 1;
15910 
15911 	for(; i < n; i++) {
15912 		t = btf_type_by_id(btf, i);
15913 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
15914 			continue;
15915 
15916 		tname = btf_name_by_offset(btf, t->name_off);
15917 		if (!strcmp(tname, ".data..percpu"))
15918 			return i;
15919 	}
15920 
15921 	return -ENOENT;
15922 }
15923 
15924 /* replace pseudo btf_id with kernel symbol address */
15925 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
15926 			       struct bpf_insn *insn,
15927 			       struct bpf_insn_aux_data *aux)
15928 {
15929 	const struct btf_var_secinfo *vsi;
15930 	const struct btf_type *datasec;
15931 	struct btf_mod_pair *btf_mod;
15932 	const struct btf_type *t;
15933 	const char *sym_name;
15934 	bool percpu = false;
15935 	u32 type, id = insn->imm;
15936 	struct btf *btf;
15937 	s32 datasec_id;
15938 	u64 addr;
15939 	int i, btf_fd, err;
15940 
15941 	btf_fd = insn[1].imm;
15942 	if (btf_fd) {
15943 		btf = btf_get_by_fd(btf_fd);
15944 		if (IS_ERR(btf)) {
15945 			verbose(env, "invalid module BTF object FD specified.\n");
15946 			return -EINVAL;
15947 		}
15948 	} else {
15949 		if (!btf_vmlinux) {
15950 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
15951 			return -EINVAL;
15952 		}
15953 		btf = btf_vmlinux;
15954 		btf_get(btf);
15955 	}
15956 
15957 	t = btf_type_by_id(btf, id);
15958 	if (!t) {
15959 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
15960 		err = -ENOENT;
15961 		goto err_put;
15962 	}
15963 
15964 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
15965 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
15966 		err = -EINVAL;
15967 		goto err_put;
15968 	}
15969 
15970 	sym_name = btf_name_by_offset(btf, t->name_off);
15971 	addr = kallsyms_lookup_name(sym_name);
15972 	if (!addr) {
15973 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
15974 			sym_name);
15975 		err = -ENOENT;
15976 		goto err_put;
15977 	}
15978 	insn[0].imm = (u32)addr;
15979 	insn[1].imm = addr >> 32;
15980 
15981 	if (btf_type_is_func(t)) {
15982 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
15983 		aux->btf_var.mem_size = 0;
15984 		goto check_btf;
15985 	}
15986 
15987 	datasec_id = find_btf_percpu_datasec(btf);
15988 	if (datasec_id > 0) {
15989 		datasec = btf_type_by_id(btf, datasec_id);
15990 		for_each_vsi(i, datasec, vsi) {
15991 			if (vsi->type == id) {
15992 				percpu = true;
15993 				break;
15994 			}
15995 		}
15996 	}
15997 
15998 	type = t->type;
15999 	t = btf_type_skip_modifiers(btf, type, NULL);
16000 	if (percpu) {
16001 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
16002 		aux->btf_var.btf = btf;
16003 		aux->btf_var.btf_id = type;
16004 	} else if (!btf_type_is_struct(t)) {
16005 		const struct btf_type *ret;
16006 		const char *tname;
16007 		u32 tsize;
16008 
16009 		/* resolve the type size of ksym. */
16010 		ret = btf_resolve_size(btf, t, &tsize);
16011 		if (IS_ERR(ret)) {
16012 			tname = btf_name_by_offset(btf, t->name_off);
16013 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
16014 				tname, PTR_ERR(ret));
16015 			err = -EINVAL;
16016 			goto err_put;
16017 		}
16018 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
16019 		aux->btf_var.mem_size = tsize;
16020 	} else {
16021 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
16022 		aux->btf_var.btf = btf;
16023 		aux->btf_var.btf_id = type;
16024 	}
16025 check_btf:
16026 	/* check whether we recorded this BTF (and maybe module) already */
16027 	for (i = 0; i < env->used_btf_cnt; i++) {
16028 		if (env->used_btfs[i].btf == btf) {
16029 			btf_put(btf);
16030 			return 0;
16031 		}
16032 	}
16033 
16034 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
16035 		err = -E2BIG;
16036 		goto err_put;
16037 	}
16038 
16039 	btf_mod = &env->used_btfs[env->used_btf_cnt];
16040 	btf_mod->btf = btf;
16041 	btf_mod->module = NULL;
16042 
16043 	/* if we reference variables from kernel module, bump its refcount */
16044 	if (btf_is_module(btf)) {
16045 		btf_mod->module = btf_try_get_module(btf);
16046 		if (!btf_mod->module) {
16047 			err = -ENXIO;
16048 			goto err_put;
16049 		}
16050 	}
16051 
16052 	env->used_btf_cnt++;
16053 
16054 	return 0;
16055 err_put:
16056 	btf_put(btf);
16057 	return err;
16058 }
16059 
16060 static bool is_tracing_prog_type(enum bpf_prog_type type)
16061 {
16062 	switch (type) {
16063 	case BPF_PROG_TYPE_KPROBE:
16064 	case BPF_PROG_TYPE_TRACEPOINT:
16065 	case BPF_PROG_TYPE_PERF_EVENT:
16066 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16067 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
16068 		return true;
16069 	default:
16070 		return false;
16071 	}
16072 }
16073 
16074 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
16075 					struct bpf_map *map,
16076 					struct bpf_prog *prog)
16077 
16078 {
16079 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
16080 
16081 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
16082 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
16083 		if (is_tracing_prog_type(prog_type)) {
16084 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
16085 			return -EINVAL;
16086 		}
16087 	}
16088 
16089 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
16090 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
16091 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
16092 			return -EINVAL;
16093 		}
16094 
16095 		if (is_tracing_prog_type(prog_type)) {
16096 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
16097 			return -EINVAL;
16098 		}
16099 
16100 		if (prog->aux->sleepable) {
16101 			verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
16102 			return -EINVAL;
16103 		}
16104 	}
16105 
16106 	if (btf_record_has_field(map->record, BPF_TIMER)) {
16107 		if (is_tracing_prog_type(prog_type)) {
16108 			verbose(env, "tracing progs cannot use bpf_timer yet\n");
16109 			return -EINVAL;
16110 		}
16111 	}
16112 
16113 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
16114 	    !bpf_offload_prog_map_match(prog, map)) {
16115 		verbose(env, "offload device mismatch between prog and map\n");
16116 		return -EINVAL;
16117 	}
16118 
16119 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
16120 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
16121 		return -EINVAL;
16122 	}
16123 
16124 	if (prog->aux->sleepable)
16125 		switch (map->map_type) {
16126 		case BPF_MAP_TYPE_HASH:
16127 		case BPF_MAP_TYPE_LRU_HASH:
16128 		case BPF_MAP_TYPE_ARRAY:
16129 		case BPF_MAP_TYPE_PERCPU_HASH:
16130 		case BPF_MAP_TYPE_PERCPU_ARRAY:
16131 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
16132 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
16133 		case BPF_MAP_TYPE_HASH_OF_MAPS:
16134 		case BPF_MAP_TYPE_RINGBUF:
16135 		case BPF_MAP_TYPE_USER_RINGBUF:
16136 		case BPF_MAP_TYPE_INODE_STORAGE:
16137 		case BPF_MAP_TYPE_SK_STORAGE:
16138 		case BPF_MAP_TYPE_TASK_STORAGE:
16139 		case BPF_MAP_TYPE_CGRP_STORAGE:
16140 			break;
16141 		default:
16142 			verbose(env,
16143 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
16144 			return -EINVAL;
16145 		}
16146 
16147 	return 0;
16148 }
16149 
16150 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
16151 {
16152 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
16153 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
16154 }
16155 
16156 /* find and rewrite pseudo imm in ld_imm64 instructions:
16157  *
16158  * 1. if it accesses map FD, replace it with actual map pointer.
16159  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
16160  *
16161  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
16162  */
16163 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
16164 {
16165 	struct bpf_insn *insn = env->prog->insnsi;
16166 	int insn_cnt = env->prog->len;
16167 	int i, j, err;
16168 
16169 	err = bpf_prog_calc_tag(env->prog);
16170 	if (err)
16171 		return err;
16172 
16173 	for (i = 0; i < insn_cnt; i++, insn++) {
16174 		if (BPF_CLASS(insn->code) == BPF_LDX &&
16175 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
16176 			verbose(env, "BPF_LDX uses reserved fields\n");
16177 			return -EINVAL;
16178 		}
16179 
16180 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
16181 			struct bpf_insn_aux_data *aux;
16182 			struct bpf_map *map;
16183 			struct fd f;
16184 			u64 addr;
16185 			u32 fd;
16186 
16187 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
16188 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
16189 			    insn[1].off != 0) {
16190 				verbose(env, "invalid bpf_ld_imm64 insn\n");
16191 				return -EINVAL;
16192 			}
16193 
16194 			if (insn[0].src_reg == 0)
16195 				/* valid generic load 64-bit imm */
16196 				goto next_insn;
16197 
16198 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
16199 				aux = &env->insn_aux_data[i];
16200 				err = check_pseudo_btf_id(env, insn, aux);
16201 				if (err)
16202 					return err;
16203 				goto next_insn;
16204 			}
16205 
16206 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
16207 				aux = &env->insn_aux_data[i];
16208 				aux->ptr_type = PTR_TO_FUNC;
16209 				goto next_insn;
16210 			}
16211 
16212 			/* In final convert_pseudo_ld_imm64() step, this is
16213 			 * converted into regular 64-bit imm load insn.
16214 			 */
16215 			switch (insn[0].src_reg) {
16216 			case BPF_PSEUDO_MAP_VALUE:
16217 			case BPF_PSEUDO_MAP_IDX_VALUE:
16218 				break;
16219 			case BPF_PSEUDO_MAP_FD:
16220 			case BPF_PSEUDO_MAP_IDX:
16221 				if (insn[1].imm == 0)
16222 					break;
16223 				fallthrough;
16224 			default:
16225 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
16226 				return -EINVAL;
16227 			}
16228 
16229 			switch (insn[0].src_reg) {
16230 			case BPF_PSEUDO_MAP_IDX_VALUE:
16231 			case BPF_PSEUDO_MAP_IDX:
16232 				if (bpfptr_is_null(env->fd_array)) {
16233 					verbose(env, "fd_idx without fd_array is invalid\n");
16234 					return -EPROTO;
16235 				}
16236 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
16237 							    insn[0].imm * sizeof(fd),
16238 							    sizeof(fd)))
16239 					return -EFAULT;
16240 				break;
16241 			default:
16242 				fd = insn[0].imm;
16243 				break;
16244 			}
16245 
16246 			f = fdget(fd);
16247 			map = __bpf_map_get(f);
16248 			if (IS_ERR(map)) {
16249 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
16250 					insn[0].imm);
16251 				return PTR_ERR(map);
16252 			}
16253 
16254 			err = check_map_prog_compatibility(env, map, env->prog);
16255 			if (err) {
16256 				fdput(f);
16257 				return err;
16258 			}
16259 
16260 			aux = &env->insn_aux_data[i];
16261 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
16262 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
16263 				addr = (unsigned long)map;
16264 			} else {
16265 				u32 off = insn[1].imm;
16266 
16267 				if (off >= BPF_MAX_VAR_OFF) {
16268 					verbose(env, "direct value offset of %u is not allowed\n", off);
16269 					fdput(f);
16270 					return -EINVAL;
16271 				}
16272 
16273 				if (!map->ops->map_direct_value_addr) {
16274 					verbose(env, "no direct value access support for this map type\n");
16275 					fdput(f);
16276 					return -EINVAL;
16277 				}
16278 
16279 				err = map->ops->map_direct_value_addr(map, &addr, off);
16280 				if (err) {
16281 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
16282 						map->value_size, off);
16283 					fdput(f);
16284 					return err;
16285 				}
16286 
16287 				aux->map_off = off;
16288 				addr += off;
16289 			}
16290 
16291 			insn[0].imm = (u32)addr;
16292 			insn[1].imm = addr >> 32;
16293 
16294 			/* check whether we recorded this map already */
16295 			for (j = 0; j < env->used_map_cnt; j++) {
16296 				if (env->used_maps[j] == map) {
16297 					aux->map_index = j;
16298 					fdput(f);
16299 					goto next_insn;
16300 				}
16301 			}
16302 
16303 			if (env->used_map_cnt >= MAX_USED_MAPS) {
16304 				fdput(f);
16305 				return -E2BIG;
16306 			}
16307 
16308 			/* hold the map. If the program is rejected by verifier,
16309 			 * the map will be released by release_maps() or it
16310 			 * will be used by the valid program until it's unloaded
16311 			 * and all maps are released in free_used_maps()
16312 			 */
16313 			bpf_map_inc(map);
16314 
16315 			aux->map_index = env->used_map_cnt;
16316 			env->used_maps[env->used_map_cnt++] = map;
16317 
16318 			if (bpf_map_is_cgroup_storage(map) &&
16319 			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
16320 				verbose(env, "only one cgroup storage of each type is allowed\n");
16321 				fdput(f);
16322 				return -EBUSY;
16323 			}
16324 
16325 			fdput(f);
16326 next_insn:
16327 			insn++;
16328 			i++;
16329 			continue;
16330 		}
16331 
16332 		/* Basic sanity check before we invest more work here. */
16333 		if (!bpf_opcode_in_insntable(insn->code)) {
16334 			verbose(env, "unknown opcode %02x\n", insn->code);
16335 			return -EINVAL;
16336 		}
16337 	}
16338 
16339 	/* now all pseudo BPF_LD_IMM64 instructions load valid
16340 	 * 'struct bpf_map *' into a register instead of user map_fd.
16341 	 * These pointers will be used later by verifier to validate map access.
16342 	 */
16343 	return 0;
16344 }
16345 
16346 /* drop refcnt of maps used by the rejected program */
16347 static void release_maps(struct bpf_verifier_env *env)
16348 {
16349 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
16350 			     env->used_map_cnt);
16351 }
16352 
16353 /* drop refcnt of maps used by the rejected program */
16354 static void release_btfs(struct bpf_verifier_env *env)
16355 {
16356 	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
16357 			     env->used_btf_cnt);
16358 }
16359 
16360 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
16361 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
16362 {
16363 	struct bpf_insn *insn = env->prog->insnsi;
16364 	int insn_cnt = env->prog->len;
16365 	int i;
16366 
16367 	for (i = 0; i < insn_cnt; i++, insn++) {
16368 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
16369 			continue;
16370 		if (insn->src_reg == BPF_PSEUDO_FUNC)
16371 			continue;
16372 		insn->src_reg = 0;
16373 	}
16374 }
16375 
16376 /* single env->prog->insni[off] instruction was replaced with the range
16377  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
16378  * [0, off) and [off, end) to new locations, so the patched range stays zero
16379  */
16380 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
16381 				 struct bpf_insn_aux_data *new_data,
16382 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
16383 {
16384 	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
16385 	struct bpf_insn *insn = new_prog->insnsi;
16386 	u32 old_seen = old_data[off].seen;
16387 	u32 prog_len;
16388 	int i;
16389 
16390 	/* aux info at OFF always needs adjustment, no matter fast path
16391 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
16392 	 * original insn at old prog.
16393 	 */
16394 	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
16395 
16396 	if (cnt == 1)
16397 		return;
16398 	prog_len = new_prog->len;
16399 
16400 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
16401 	memcpy(new_data + off + cnt - 1, old_data + off,
16402 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
16403 	for (i = off; i < off + cnt - 1; i++) {
16404 		/* Expand insni[off]'s seen count to the patched range. */
16405 		new_data[i].seen = old_seen;
16406 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
16407 	}
16408 	env->insn_aux_data = new_data;
16409 	vfree(old_data);
16410 }
16411 
16412 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
16413 {
16414 	int i;
16415 
16416 	if (len == 1)
16417 		return;
16418 	/* NOTE: fake 'exit' subprog should be updated as well. */
16419 	for (i = 0; i <= env->subprog_cnt; i++) {
16420 		if (env->subprog_info[i].start <= off)
16421 			continue;
16422 		env->subprog_info[i].start += len - 1;
16423 	}
16424 }
16425 
16426 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
16427 {
16428 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
16429 	int i, sz = prog->aux->size_poke_tab;
16430 	struct bpf_jit_poke_descriptor *desc;
16431 
16432 	for (i = 0; i < sz; i++) {
16433 		desc = &tab[i];
16434 		if (desc->insn_idx <= off)
16435 			continue;
16436 		desc->insn_idx += len - 1;
16437 	}
16438 }
16439 
16440 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
16441 					    const struct bpf_insn *patch, u32 len)
16442 {
16443 	struct bpf_prog *new_prog;
16444 	struct bpf_insn_aux_data *new_data = NULL;
16445 
16446 	if (len > 1) {
16447 		new_data = vzalloc(array_size(env->prog->len + len - 1,
16448 					      sizeof(struct bpf_insn_aux_data)));
16449 		if (!new_data)
16450 			return NULL;
16451 	}
16452 
16453 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
16454 	if (IS_ERR(new_prog)) {
16455 		if (PTR_ERR(new_prog) == -ERANGE)
16456 			verbose(env,
16457 				"insn %d cannot be patched due to 16-bit range\n",
16458 				env->insn_aux_data[off].orig_idx);
16459 		vfree(new_data);
16460 		return NULL;
16461 	}
16462 	adjust_insn_aux_data(env, new_data, new_prog, off, len);
16463 	adjust_subprog_starts(env, off, len);
16464 	adjust_poke_descs(new_prog, off, len);
16465 	return new_prog;
16466 }
16467 
16468 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
16469 					      u32 off, u32 cnt)
16470 {
16471 	int i, j;
16472 
16473 	/* find first prog starting at or after off (first to remove) */
16474 	for (i = 0; i < env->subprog_cnt; i++)
16475 		if (env->subprog_info[i].start >= off)
16476 			break;
16477 	/* find first prog starting at or after off + cnt (first to stay) */
16478 	for (j = i; j < env->subprog_cnt; j++)
16479 		if (env->subprog_info[j].start >= off + cnt)
16480 			break;
16481 	/* if j doesn't start exactly at off + cnt, we are just removing
16482 	 * the front of previous prog
16483 	 */
16484 	if (env->subprog_info[j].start != off + cnt)
16485 		j--;
16486 
16487 	if (j > i) {
16488 		struct bpf_prog_aux *aux = env->prog->aux;
16489 		int move;
16490 
16491 		/* move fake 'exit' subprog as well */
16492 		move = env->subprog_cnt + 1 - j;
16493 
16494 		memmove(env->subprog_info + i,
16495 			env->subprog_info + j,
16496 			sizeof(*env->subprog_info) * move);
16497 		env->subprog_cnt -= j - i;
16498 
16499 		/* remove func_info */
16500 		if (aux->func_info) {
16501 			move = aux->func_info_cnt - j;
16502 
16503 			memmove(aux->func_info + i,
16504 				aux->func_info + j,
16505 				sizeof(*aux->func_info) * move);
16506 			aux->func_info_cnt -= j - i;
16507 			/* func_info->insn_off is set after all code rewrites,
16508 			 * in adjust_btf_func() - no need to adjust
16509 			 */
16510 		}
16511 	} else {
16512 		/* convert i from "first prog to remove" to "first to adjust" */
16513 		if (env->subprog_info[i].start == off)
16514 			i++;
16515 	}
16516 
16517 	/* update fake 'exit' subprog as well */
16518 	for (; i <= env->subprog_cnt; i++)
16519 		env->subprog_info[i].start -= cnt;
16520 
16521 	return 0;
16522 }
16523 
16524 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
16525 				      u32 cnt)
16526 {
16527 	struct bpf_prog *prog = env->prog;
16528 	u32 i, l_off, l_cnt, nr_linfo;
16529 	struct bpf_line_info *linfo;
16530 
16531 	nr_linfo = prog->aux->nr_linfo;
16532 	if (!nr_linfo)
16533 		return 0;
16534 
16535 	linfo = prog->aux->linfo;
16536 
16537 	/* find first line info to remove, count lines to be removed */
16538 	for (i = 0; i < nr_linfo; i++)
16539 		if (linfo[i].insn_off >= off)
16540 			break;
16541 
16542 	l_off = i;
16543 	l_cnt = 0;
16544 	for (; i < nr_linfo; i++)
16545 		if (linfo[i].insn_off < off + cnt)
16546 			l_cnt++;
16547 		else
16548 			break;
16549 
16550 	/* First live insn doesn't match first live linfo, it needs to "inherit"
16551 	 * last removed linfo.  prog is already modified, so prog->len == off
16552 	 * means no live instructions after (tail of the program was removed).
16553 	 */
16554 	if (prog->len != off && l_cnt &&
16555 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
16556 		l_cnt--;
16557 		linfo[--i].insn_off = off + cnt;
16558 	}
16559 
16560 	/* remove the line info which refer to the removed instructions */
16561 	if (l_cnt) {
16562 		memmove(linfo + l_off, linfo + i,
16563 			sizeof(*linfo) * (nr_linfo - i));
16564 
16565 		prog->aux->nr_linfo -= l_cnt;
16566 		nr_linfo = prog->aux->nr_linfo;
16567 	}
16568 
16569 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
16570 	for (i = l_off; i < nr_linfo; i++)
16571 		linfo[i].insn_off -= cnt;
16572 
16573 	/* fix up all subprogs (incl. 'exit') which start >= off */
16574 	for (i = 0; i <= env->subprog_cnt; i++)
16575 		if (env->subprog_info[i].linfo_idx > l_off) {
16576 			/* program may have started in the removed region but
16577 			 * may not be fully removed
16578 			 */
16579 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
16580 				env->subprog_info[i].linfo_idx -= l_cnt;
16581 			else
16582 				env->subprog_info[i].linfo_idx = l_off;
16583 		}
16584 
16585 	return 0;
16586 }
16587 
16588 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
16589 {
16590 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16591 	unsigned int orig_prog_len = env->prog->len;
16592 	int err;
16593 
16594 	if (bpf_prog_is_offloaded(env->prog->aux))
16595 		bpf_prog_offload_remove_insns(env, off, cnt);
16596 
16597 	err = bpf_remove_insns(env->prog, off, cnt);
16598 	if (err)
16599 		return err;
16600 
16601 	err = adjust_subprog_starts_after_remove(env, off, cnt);
16602 	if (err)
16603 		return err;
16604 
16605 	err = bpf_adj_linfo_after_remove(env, off, cnt);
16606 	if (err)
16607 		return err;
16608 
16609 	memmove(aux_data + off,	aux_data + off + cnt,
16610 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
16611 
16612 	return 0;
16613 }
16614 
16615 /* The verifier does more data flow analysis than llvm and will not
16616  * explore branches that are dead at run time. Malicious programs can
16617  * have dead code too. Therefore replace all dead at-run-time code
16618  * with 'ja -1'.
16619  *
16620  * Just nops are not optimal, e.g. if they would sit at the end of the
16621  * program and through another bug we would manage to jump there, then
16622  * we'd execute beyond program memory otherwise. Returning exception
16623  * code also wouldn't work since we can have subprogs where the dead
16624  * code could be located.
16625  */
16626 static void sanitize_dead_code(struct bpf_verifier_env *env)
16627 {
16628 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16629 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
16630 	struct bpf_insn *insn = env->prog->insnsi;
16631 	const int insn_cnt = env->prog->len;
16632 	int i;
16633 
16634 	for (i = 0; i < insn_cnt; i++) {
16635 		if (aux_data[i].seen)
16636 			continue;
16637 		memcpy(insn + i, &trap, sizeof(trap));
16638 		aux_data[i].zext_dst = false;
16639 	}
16640 }
16641 
16642 static bool insn_is_cond_jump(u8 code)
16643 {
16644 	u8 op;
16645 
16646 	if (BPF_CLASS(code) == BPF_JMP32)
16647 		return true;
16648 
16649 	if (BPF_CLASS(code) != BPF_JMP)
16650 		return false;
16651 
16652 	op = BPF_OP(code);
16653 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
16654 }
16655 
16656 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
16657 {
16658 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16659 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
16660 	struct bpf_insn *insn = env->prog->insnsi;
16661 	const int insn_cnt = env->prog->len;
16662 	int i;
16663 
16664 	for (i = 0; i < insn_cnt; i++, insn++) {
16665 		if (!insn_is_cond_jump(insn->code))
16666 			continue;
16667 
16668 		if (!aux_data[i + 1].seen)
16669 			ja.off = insn->off;
16670 		else if (!aux_data[i + 1 + insn->off].seen)
16671 			ja.off = 0;
16672 		else
16673 			continue;
16674 
16675 		if (bpf_prog_is_offloaded(env->prog->aux))
16676 			bpf_prog_offload_replace_insn(env, i, &ja);
16677 
16678 		memcpy(insn, &ja, sizeof(ja));
16679 	}
16680 }
16681 
16682 static int opt_remove_dead_code(struct bpf_verifier_env *env)
16683 {
16684 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16685 	int insn_cnt = env->prog->len;
16686 	int i, err;
16687 
16688 	for (i = 0; i < insn_cnt; i++) {
16689 		int j;
16690 
16691 		j = 0;
16692 		while (i + j < insn_cnt && !aux_data[i + j].seen)
16693 			j++;
16694 		if (!j)
16695 			continue;
16696 
16697 		err = verifier_remove_insns(env, i, j);
16698 		if (err)
16699 			return err;
16700 		insn_cnt = env->prog->len;
16701 	}
16702 
16703 	return 0;
16704 }
16705 
16706 static int opt_remove_nops(struct bpf_verifier_env *env)
16707 {
16708 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
16709 	struct bpf_insn *insn = env->prog->insnsi;
16710 	int insn_cnt = env->prog->len;
16711 	int i, err;
16712 
16713 	for (i = 0; i < insn_cnt; i++) {
16714 		if (memcmp(&insn[i], &ja, sizeof(ja)))
16715 			continue;
16716 
16717 		err = verifier_remove_insns(env, i, 1);
16718 		if (err)
16719 			return err;
16720 		insn_cnt--;
16721 		i--;
16722 	}
16723 
16724 	return 0;
16725 }
16726 
16727 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
16728 					 const union bpf_attr *attr)
16729 {
16730 	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
16731 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
16732 	int i, patch_len, delta = 0, len = env->prog->len;
16733 	struct bpf_insn *insns = env->prog->insnsi;
16734 	struct bpf_prog *new_prog;
16735 	bool rnd_hi32;
16736 
16737 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
16738 	zext_patch[1] = BPF_ZEXT_REG(0);
16739 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
16740 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
16741 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
16742 	for (i = 0; i < len; i++) {
16743 		int adj_idx = i + delta;
16744 		struct bpf_insn insn;
16745 		int load_reg;
16746 
16747 		insn = insns[adj_idx];
16748 		load_reg = insn_def_regno(&insn);
16749 		if (!aux[adj_idx].zext_dst) {
16750 			u8 code, class;
16751 			u32 imm_rnd;
16752 
16753 			if (!rnd_hi32)
16754 				continue;
16755 
16756 			code = insn.code;
16757 			class = BPF_CLASS(code);
16758 			if (load_reg == -1)
16759 				continue;
16760 
16761 			/* NOTE: arg "reg" (the fourth one) is only used for
16762 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
16763 			 *       here.
16764 			 */
16765 			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
16766 				if (class == BPF_LD &&
16767 				    BPF_MODE(code) == BPF_IMM)
16768 					i++;
16769 				continue;
16770 			}
16771 
16772 			/* ctx load could be transformed into wider load. */
16773 			if (class == BPF_LDX &&
16774 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
16775 				continue;
16776 
16777 			imm_rnd = get_random_u32();
16778 			rnd_hi32_patch[0] = insn;
16779 			rnd_hi32_patch[1].imm = imm_rnd;
16780 			rnd_hi32_patch[3].dst_reg = load_reg;
16781 			patch = rnd_hi32_patch;
16782 			patch_len = 4;
16783 			goto apply_patch_buffer;
16784 		}
16785 
16786 		/* Add in an zero-extend instruction if a) the JIT has requested
16787 		 * it or b) it's a CMPXCHG.
16788 		 *
16789 		 * The latter is because: BPF_CMPXCHG always loads a value into
16790 		 * R0, therefore always zero-extends. However some archs'
16791 		 * equivalent instruction only does this load when the
16792 		 * comparison is successful. This detail of CMPXCHG is
16793 		 * orthogonal to the general zero-extension behaviour of the
16794 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
16795 		 */
16796 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
16797 			continue;
16798 
16799 		/* Zero-extension is done by the caller. */
16800 		if (bpf_pseudo_kfunc_call(&insn))
16801 			continue;
16802 
16803 		if (WARN_ON(load_reg == -1)) {
16804 			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
16805 			return -EFAULT;
16806 		}
16807 
16808 		zext_patch[0] = insn;
16809 		zext_patch[1].dst_reg = load_reg;
16810 		zext_patch[1].src_reg = load_reg;
16811 		patch = zext_patch;
16812 		patch_len = 2;
16813 apply_patch_buffer:
16814 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
16815 		if (!new_prog)
16816 			return -ENOMEM;
16817 		env->prog = new_prog;
16818 		insns = new_prog->insnsi;
16819 		aux = env->insn_aux_data;
16820 		delta += patch_len - 1;
16821 	}
16822 
16823 	return 0;
16824 }
16825 
16826 /* convert load instructions that access fields of a context type into a
16827  * sequence of instructions that access fields of the underlying structure:
16828  *     struct __sk_buff    -> struct sk_buff
16829  *     struct bpf_sock_ops -> struct sock
16830  */
16831 static int convert_ctx_accesses(struct bpf_verifier_env *env)
16832 {
16833 	const struct bpf_verifier_ops *ops = env->ops;
16834 	int i, cnt, size, ctx_field_size, delta = 0;
16835 	const int insn_cnt = env->prog->len;
16836 	struct bpf_insn insn_buf[16], *insn;
16837 	u32 target_size, size_default, off;
16838 	struct bpf_prog *new_prog;
16839 	enum bpf_access_type type;
16840 	bool is_narrower_load;
16841 
16842 	if (ops->gen_prologue || env->seen_direct_write) {
16843 		if (!ops->gen_prologue) {
16844 			verbose(env, "bpf verifier is misconfigured\n");
16845 			return -EINVAL;
16846 		}
16847 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
16848 					env->prog);
16849 		if (cnt >= ARRAY_SIZE(insn_buf)) {
16850 			verbose(env, "bpf verifier is misconfigured\n");
16851 			return -EINVAL;
16852 		} else if (cnt) {
16853 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
16854 			if (!new_prog)
16855 				return -ENOMEM;
16856 
16857 			env->prog = new_prog;
16858 			delta += cnt - 1;
16859 		}
16860 	}
16861 
16862 	if (bpf_prog_is_offloaded(env->prog->aux))
16863 		return 0;
16864 
16865 	insn = env->prog->insnsi + delta;
16866 
16867 	for (i = 0; i < insn_cnt; i++, insn++) {
16868 		bpf_convert_ctx_access_t convert_ctx_access;
16869 
16870 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
16871 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
16872 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
16873 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
16874 			type = BPF_READ;
16875 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
16876 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
16877 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
16878 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
16879 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
16880 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
16881 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
16882 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
16883 			type = BPF_WRITE;
16884 		} else {
16885 			continue;
16886 		}
16887 
16888 		if (type == BPF_WRITE &&
16889 		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
16890 			struct bpf_insn patch[] = {
16891 				*insn,
16892 				BPF_ST_NOSPEC(),
16893 			};
16894 
16895 			cnt = ARRAY_SIZE(patch);
16896 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
16897 			if (!new_prog)
16898 				return -ENOMEM;
16899 
16900 			delta    += cnt - 1;
16901 			env->prog = new_prog;
16902 			insn      = new_prog->insnsi + i + delta;
16903 			continue;
16904 		}
16905 
16906 		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
16907 		case PTR_TO_CTX:
16908 			if (!ops->convert_ctx_access)
16909 				continue;
16910 			convert_ctx_access = ops->convert_ctx_access;
16911 			break;
16912 		case PTR_TO_SOCKET:
16913 		case PTR_TO_SOCK_COMMON:
16914 			convert_ctx_access = bpf_sock_convert_ctx_access;
16915 			break;
16916 		case PTR_TO_TCP_SOCK:
16917 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
16918 			break;
16919 		case PTR_TO_XDP_SOCK:
16920 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
16921 			break;
16922 		case PTR_TO_BTF_ID:
16923 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
16924 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
16925 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
16926 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
16927 		 * any faults for loads into such types. BPF_WRITE is disallowed
16928 		 * for this case.
16929 		 */
16930 		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
16931 			if (type == BPF_READ) {
16932 				insn->code = BPF_LDX | BPF_PROBE_MEM |
16933 					BPF_SIZE((insn)->code);
16934 				env->prog->aux->num_exentries++;
16935 			}
16936 			continue;
16937 		default:
16938 			continue;
16939 		}
16940 
16941 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
16942 		size = BPF_LDST_BYTES(insn);
16943 
16944 		/* If the read access is a narrower load of the field,
16945 		 * convert to a 4/8-byte load, to minimum program type specific
16946 		 * convert_ctx_access changes. If conversion is successful,
16947 		 * we will apply proper mask to the result.
16948 		 */
16949 		is_narrower_load = size < ctx_field_size;
16950 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
16951 		off = insn->off;
16952 		if (is_narrower_load) {
16953 			u8 size_code;
16954 
16955 			if (type == BPF_WRITE) {
16956 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
16957 				return -EINVAL;
16958 			}
16959 
16960 			size_code = BPF_H;
16961 			if (ctx_field_size == 4)
16962 				size_code = BPF_W;
16963 			else if (ctx_field_size == 8)
16964 				size_code = BPF_DW;
16965 
16966 			insn->off = off & ~(size_default - 1);
16967 			insn->code = BPF_LDX | BPF_MEM | size_code;
16968 		}
16969 
16970 		target_size = 0;
16971 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
16972 					 &target_size);
16973 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
16974 		    (ctx_field_size && !target_size)) {
16975 			verbose(env, "bpf verifier is misconfigured\n");
16976 			return -EINVAL;
16977 		}
16978 
16979 		if (is_narrower_load && size < target_size) {
16980 			u8 shift = bpf_ctx_narrow_access_offset(
16981 				off, size, size_default) * 8;
16982 			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
16983 				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
16984 				return -EINVAL;
16985 			}
16986 			if (ctx_field_size <= 4) {
16987 				if (shift)
16988 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
16989 									insn->dst_reg,
16990 									shift);
16991 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
16992 								(1 << size * 8) - 1);
16993 			} else {
16994 				if (shift)
16995 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
16996 									insn->dst_reg,
16997 									shift);
16998 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
16999 								(1ULL << size * 8) - 1);
17000 			}
17001 		}
17002 
17003 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17004 		if (!new_prog)
17005 			return -ENOMEM;
17006 
17007 		delta += cnt - 1;
17008 
17009 		/* keep walking new program and skip insns we just inserted */
17010 		env->prog = new_prog;
17011 		insn      = new_prog->insnsi + i + delta;
17012 	}
17013 
17014 	return 0;
17015 }
17016 
17017 static int jit_subprogs(struct bpf_verifier_env *env)
17018 {
17019 	struct bpf_prog *prog = env->prog, **func, *tmp;
17020 	int i, j, subprog_start, subprog_end = 0, len, subprog;
17021 	struct bpf_map *map_ptr;
17022 	struct bpf_insn *insn;
17023 	void *old_bpf_func;
17024 	int err, num_exentries;
17025 
17026 	if (env->subprog_cnt <= 1)
17027 		return 0;
17028 
17029 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
17030 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
17031 			continue;
17032 
17033 		/* Upon error here we cannot fall back to interpreter but
17034 		 * need a hard reject of the program. Thus -EFAULT is
17035 		 * propagated in any case.
17036 		 */
17037 		subprog = find_subprog(env, i + insn->imm + 1);
17038 		if (subprog < 0) {
17039 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
17040 				  i + insn->imm + 1);
17041 			return -EFAULT;
17042 		}
17043 		/* temporarily remember subprog id inside insn instead of
17044 		 * aux_data, since next loop will split up all insns into funcs
17045 		 */
17046 		insn->off = subprog;
17047 		/* remember original imm in case JIT fails and fallback
17048 		 * to interpreter will be needed
17049 		 */
17050 		env->insn_aux_data[i].call_imm = insn->imm;
17051 		/* point imm to __bpf_call_base+1 from JITs point of view */
17052 		insn->imm = 1;
17053 		if (bpf_pseudo_func(insn))
17054 			/* jit (e.g. x86_64) may emit fewer instructions
17055 			 * if it learns a u32 imm is the same as a u64 imm.
17056 			 * Force a non zero here.
17057 			 */
17058 			insn[1].imm = 1;
17059 	}
17060 
17061 	err = bpf_prog_alloc_jited_linfo(prog);
17062 	if (err)
17063 		goto out_undo_insn;
17064 
17065 	err = -ENOMEM;
17066 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
17067 	if (!func)
17068 		goto out_undo_insn;
17069 
17070 	for (i = 0; i < env->subprog_cnt; i++) {
17071 		subprog_start = subprog_end;
17072 		subprog_end = env->subprog_info[i + 1].start;
17073 
17074 		len = subprog_end - subprog_start;
17075 		/* bpf_prog_run() doesn't call subprogs directly,
17076 		 * hence main prog stats include the runtime of subprogs.
17077 		 * subprogs don't have IDs and not reachable via prog_get_next_id
17078 		 * func[i]->stats will never be accessed and stays NULL
17079 		 */
17080 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
17081 		if (!func[i])
17082 			goto out_free;
17083 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
17084 		       len * sizeof(struct bpf_insn));
17085 		func[i]->type = prog->type;
17086 		func[i]->len = len;
17087 		if (bpf_prog_calc_tag(func[i]))
17088 			goto out_free;
17089 		func[i]->is_func = 1;
17090 		func[i]->aux->func_idx = i;
17091 		/* Below members will be freed only at prog->aux */
17092 		func[i]->aux->btf = prog->aux->btf;
17093 		func[i]->aux->func_info = prog->aux->func_info;
17094 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
17095 		func[i]->aux->poke_tab = prog->aux->poke_tab;
17096 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
17097 
17098 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
17099 			struct bpf_jit_poke_descriptor *poke;
17100 
17101 			poke = &prog->aux->poke_tab[j];
17102 			if (poke->insn_idx < subprog_end &&
17103 			    poke->insn_idx >= subprog_start)
17104 				poke->aux = func[i]->aux;
17105 		}
17106 
17107 		func[i]->aux->name[0] = 'F';
17108 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
17109 		func[i]->jit_requested = 1;
17110 		func[i]->blinding_requested = prog->blinding_requested;
17111 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
17112 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
17113 		func[i]->aux->linfo = prog->aux->linfo;
17114 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
17115 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
17116 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
17117 		num_exentries = 0;
17118 		insn = func[i]->insnsi;
17119 		for (j = 0; j < func[i]->len; j++, insn++) {
17120 			if (BPF_CLASS(insn->code) == BPF_LDX &&
17121 			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
17122 				num_exentries++;
17123 		}
17124 		func[i]->aux->num_exentries = num_exentries;
17125 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
17126 		func[i] = bpf_int_jit_compile(func[i]);
17127 		if (!func[i]->jited) {
17128 			err = -ENOTSUPP;
17129 			goto out_free;
17130 		}
17131 		cond_resched();
17132 	}
17133 
17134 	/* at this point all bpf functions were successfully JITed
17135 	 * now populate all bpf_calls with correct addresses and
17136 	 * run last pass of JIT
17137 	 */
17138 	for (i = 0; i < env->subprog_cnt; i++) {
17139 		insn = func[i]->insnsi;
17140 		for (j = 0; j < func[i]->len; j++, insn++) {
17141 			if (bpf_pseudo_func(insn)) {
17142 				subprog = insn->off;
17143 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
17144 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
17145 				continue;
17146 			}
17147 			if (!bpf_pseudo_call(insn))
17148 				continue;
17149 			subprog = insn->off;
17150 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
17151 		}
17152 
17153 		/* we use the aux data to keep a list of the start addresses
17154 		 * of the JITed images for each function in the program
17155 		 *
17156 		 * for some architectures, such as powerpc64, the imm field
17157 		 * might not be large enough to hold the offset of the start
17158 		 * address of the callee's JITed image from __bpf_call_base
17159 		 *
17160 		 * in such cases, we can lookup the start address of a callee
17161 		 * by using its subprog id, available from the off field of
17162 		 * the call instruction, as an index for this list
17163 		 */
17164 		func[i]->aux->func = func;
17165 		func[i]->aux->func_cnt = env->subprog_cnt;
17166 	}
17167 	for (i = 0; i < env->subprog_cnt; i++) {
17168 		old_bpf_func = func[i]->bpf_func;
17169 		tmp = bpf_int_jit_compile(func[i]);
17170 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
17171 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
17172 			err = -ENOTSUPP;
17173 			goto out_free;
17174 		}
17175 		cond_resched();
17176 	}
17177 
17178 	/* finally lock prog and jit images for all functions and
17179 	 * populate kallsysm
17180 	 */
17181 	for (i = 0; i < env->subprog_cnt; i++) {
17182 		bpf_prog_lock_ro(func[i]);
17183 		bpf_prog_kallsyms_add(func[i]);
17184 	}
17185 
17186 	/* Last step: make now unused interpreter insns from main
17187 	 * prog consistent for later dump requests, so they can
17188 	 * later look the same as if they were interpreted only.
17189 	 */
17190 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
17191 		if (bpf_pseudo_func(insn)) {
17192 			insn[0].imm = env->insn_aux_data[i].call_imm;
17193 			insn[1].imm = insn->off;
17194 			insn->off = 0;
17195 			continue;
17196 		}
17197 		if (!bpf_pseudo_call(insn))
17198 			continue;
17199 		insn->off = env->insn_aux_data[i].call_imm;
17200 		subprog = find_subprog(env, i + insn->off + 1);
17201 		insn->imm = subprog;
17202 	}
17203 
17204 	prog->jited = 1;
17205 	prog->bpf_func = func[0]->bpf_func;
17206 	prog->jited_len = func[0]->jited_len;
17207 	prog->aux->func = func;
17208 	prog->aux->func_cnt = env->subprog_cnt;
17209 	bpf_prog_jit_attempt_done(prog);
17210 	return 0;
17211 out_free:
17212 	/* We failed JIT'ing, so at this point we need to unregister poke
17213 	 * descriptors from subprogs, so that kernel is not attempting to
17214 	 * patch it anymore as we're freeing the subprog JIT memory.
17215 	 */
17216 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
17217 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
17218 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
17219 	}
17220 	/* At this point we're guaranteed that poke descriptors are not
17221 	 * live anymore. We can just unlink its descriptor table as it's
17222 	 * released with the main prog.
17223 	 */
17224 	for (i = 0; i < env->subprog_cnt; i++) {
17225 		if (!func[i])
17226 			continue;
17227 		func[i]->aux->poke_tab = NULL;
17228 		bpf_jit_free(func[i]);
17229 	}
17230 	kfree(func);
17231 out_undo_insn:
17232 	/* cleanup main prog to be interpreted */
17233 	prog->jit_requested = 0;
17234 	prog->blinding_requested = 0;
17235 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
17236 		if (!bpf_pseudo_call(insn))
17237 			continue;
17238 		insn->off = 0;
17239 		insn->imm = env->insn_aux_data[i].call_imm;
17240 	}
17241 	bpf_prog_jit_attempt_done(prog);
17242 	return err;
17243 }
17244 
17245 static int fixup_call_args(struct bpf_verifier_env *env)
17246 {
17247 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
17248 	struct bpf_prog *prog = env->prog;
17249 	struct bpf_insn *insn = prog->insnsi;
17250 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
17251 	int i, depth;
17252 #endif
17253 	int err = 0;
17254 
17255 	if (env->prog->jit_requested &&
17256 	    !bpf_prog_is_offloaded(env->prog->aux)) {
17257 		err = jit_subprogs(env);
17258 		if (err == 0)
17259 			return 0;
17260 		if (err == -EFAULT)
17261 			return err;
17262 	}
17263 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
17264 	if (has_kfunc_call) {
17265 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
17266 		return -EINVAL;
17267 	}
17268 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
17269 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
17270 		 * have to be rejected, since interpreter doesn't support them yet.
17271 		 */
17272 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
17273 		return -EINVAL;
17274 	}
17275 	for (i = 0; i < prog->len; i++, insn++) {
17276 		if (bpf_pseudo_func(insn)) {
17277 			/* When JIT fails the progs with callback calls
17278 			 * have to be rejected, since interpreter doesn't support them yet.
17279 			 */
17280 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
17281 			return -EINVAL;
17282 		}
17283 
17284 		if (!bpf_pseudo_call(insn))
17285 			continue;
17286 		depth = get_callee_stack_depth(env, insn, i);
17287 		if (depth < 0)
17288 			return depth;
17289 		bpf_patch_call_args(insn, depth);
17290 	}
17291 	err = 0;
17292 #endif
17293 	return err;
17294 }
17295 
17296 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
17297 			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
17298 {
17299 	const struct bpf_kfunc_desc *desc;
17300 	void *xdp_kfunc;
17301 
17302 	if (!insn->imm) {
17303 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
17304 		return -EINVAL;
17305 	}
17306 
17307 	*cnt = 0;
17308 
17309 	if (bpf_dev_bound_kfunc_id(insn->imm)) {
17310 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
17311 		if (xdp_kfunc) {
17312 			insn->imm = BPF_CALL_IMM(xdp_kfunc);
17313 			return 0;
17314 		}
17315 
17316 		/* fallback to default kfunc when not supported by netdev */
17317 	}
17318 
17319 	/* insn->imm has the btf func_id. Replace it with
17320 	 * an address (relative to __bpf_call_base).
17321 	 */
17322 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
17323 	if (!desc) {
17324 		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
17325 			insn->imm);
17326 		return -EFAULT;
17327 	}
17328 
17329 	insn->imm = desc->imm;
17330 	if (insn->off)
17331 		return 0;
17332 	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
17333 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
17334 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
17335 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
17336 
17337 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
17338 		insn_buf[1] = addr[0];
17339 		insn_buf[2] = addr[1];
17340 		insn_buf[3] = *insn;
17341 		*cnt = 4;
17342 	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
17343 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
17344 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
17345 
17346 		insn_buf[0] = addr[0];
17347 		insn_buf[1] = addr[1];
17348 		insn_buf[2] = *insn;
17349 		*cnt = 3;
17350 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
17351 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
17352 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
17353 		*cnt = 1;
17354 	} else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
17355 		bool seen_direct_write = env->seen_direct_write;
17356 		bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
17357 
17358 		if (is_rdonly)
17359 			insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
17360 
17361 		/* restore env->seen_direct_write to its original value, since
17362 		 * may_access_direct_pkt_data mutates it
17363 		 */
17364 		env->seen_direct_write = seen_direct_write;
17365 	}
17366 	return 0;
17367 }
17368 
17369 /* Do various post-verification rewrites in a single program pass.
17370  * These rewrites simplify JIT and interpreter implementations.
17371  */
17372 static int do_misc_fixups(struct bpf_verifier_env *env)
17373 {
17374 	struct bpf_prog *prog = env->prog;
17375 	enum bpf_attach_type eatype = prog->expected_attach_type;
17376 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17377 	struct bpf_insn *insn = prog->insnsi;
17378 	const struct bpf_func_proto *fn;
17379 	const int insn_cnt = prog->len;
17380 	const struct bpf_map_ops *ops;
17381 	struct bpf_insn_aux_data *aux;
17382 	struct bpf_insn insn_buf[16];
17383 	struct bpf_prog *new_prog;
17384 	struct bpf_map *map_ptr;
17385 	int i, ret, cnt, delta = 0;
17386 
17387 	for (i = 0; i < insn_cnt; i++, insn++) {
17388 		/* Make divide-by-zero exceptions impossible. */
17389 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
17390 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
17391 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
17392 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
17393 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
17394 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
17395 			struct bpf_insn *patchlet;
17396 			struct bpf_insn chk_and_div[] = {
17397 				/* [R,W]x div 0 -> 0 */
17398 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
17399 					     BPF_JNE | BPF_K, insn->src_reg,
17400 					     0, 2, 0),
17401 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
17402 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
17403 				*insn,
17404 			};
17405 			struct bpf_insn chk_and_mod[] = {
17406 				/* [R,W]x mod 0 -> [R,W]x */
17407 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
17408 					     BPF_JEQ | BPF_K, insn->src_reg,
17409 					     0, 1 + (is64 ? 0 : 1), 0),
17410 				*insn,
17411 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
17412 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
17413 			};
17414 
17415 			patchlet = isdiv ? chk_and_div : chk_and_mod;
17416 			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
17417 				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
17418 
17419 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
17420 			if (!new_prog)
17421 				return -ENOMEM;
17422 
17423 			delta    += cnt - 1;
17424 			env->prog = prog = new_prog;
17425 			insn      = new_prog->insnsi + i + delta;
17426 			continue;
17427 		}
17428 
17429 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
17430 		if (BPF_CLASS(insn->code) == BPF_LD &&
17431 		    (BPF_MODE(insn->code) == BPF_ABS ||
17432 		     BPF_MODE(insn->code) == BPF_IND)) {
17433 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
17434 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
17435 				verbose(env, "bpf verifier is misconfigured\n");
17436 				return -EINVAL;
17437 			}
17438 
17439 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17440 			if (!new_prog)
17441 				return -ENOMEM;
17442 
17443 			delta    += cnt - 1;
17444 			env->prog = prog = new_prog;
17445 			insn      = new_prog->insnsi + i + delta;
17446 			continue;
17447 		}
17448 
17449 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
17450 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
17451 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
17452 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
17453 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
17454 			struct bpf_insn *patch = &insn_buf[0];
17455 			bool issrc, isneg, isimm;
17456 			u32 off_reg;
17457 
17458 			aux = &env->insn_aux_data[i + delta];
17459 			if (!aux->alu_state ||
17460 			    aux->alu_state == BPF_ALU_NON_POINTER)
17461 				continue;
17462 
17463 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
17464 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
17465 				BPF_ALU_SANITIZE_SRC;
17466 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
17467 
17468 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
17469 			if (isimm) {
17470 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
17471 			} else {
17472 				if (isneg)
17473 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
17474 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
17475 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
17476 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
17477 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
17478 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
17479 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
17480 			}
17481 			if (!issrc)
17482 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
17483 			insn->src_reg = BPF_REG_AX;
17484 			if (isneg)
17485 				insn->code = insn->code == code_add ?
17486 					     code_sub : code_add;
17487 			*patch++ = *insn;
17488 			if (issrc && isneg && !isimm)
17489 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
17490 			cnt = patch - insn_buf;
17491 
17492 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17493 			if (!new_prog)
17494 				return -ENOMEM;
17495 
17496 			delta    += cnt - 1;
17497 			env->prog = prog = new_prog;
17498 			insn      = new_prog->insnsi + i + delta;
17499 			continue;
17500 		}
17501 
17502 		if (insn->code != (BPF_JMP | BPF_CALL))
17503 			continue;
17504 		if (insn->src_reg == BPF_PSEUDO_CALL)
17505 			continue;
17506 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
17507 			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
17508 			if (ret)
17509 				return ret;
17510 			if (cnt == 0)
17511 				continue;
17512 
17513 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17514 			if (!new_prog)
17515 				return -ENOMEM;
17516 
17517 			delta	 += cnt - 1;
17518 			env->prog = prog = new_prog;
17519 			insn	  = new_prog->insnsi + i + delta;
17520 			continue;
17521 		}
17522 
17523 		if (insn->imm == BPF_FUNC_get_route_realm)
17524 			prog->dst_needed = 1;
17525 		if (insn->imm == BPF_FUNC_get_prandom_u32)
17526 			bpf_user_rnd_init_once();
17527 		if (insn->imm == BPF_FUNC_override_return)
17528 			prog->kprobe_override = 1;
17529 		if (insn->imm == BPF_FUNC_tail_call) {
17530 			/* If we tail call into other programs, we
17531 			 * cannot make any assumptions since they can
17532 			 * be replaced dynamically during runtime in
17533 			 * the program array.
17534 			 */
17535 			prog->cb_access = 1;
17536 			if (!allow_tail_call_in_subprogs(env))
17537 				prog->aux->stack_depth = MAX_BPF_STACK;
17538 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
17539 
17540 			/* mark bpf_tail_call as different opcode to avoid
17541 			 * conditional branch in the interpreter for every normal
17542 			 * call and to prevent accidental JITing by JIT compiler
17543 			 * that doesn't support bpf_tail_call yet
17544 			 */
17545 			insn->imm = 0;
17546 			insn->code = BPF_JMP | BPF_TAIL_CALL;
17547 
17548 			aux = &env->insn_aux_data[i + delta];
17549 			if (env->bpf_capable && !prog->blinding_requested &&
17550 			    prog->jit_requested &&
17551 			    !bpf_map_key_poisoned(aux) &&
17552 			    !bpf_map_ptr_poisoned(aux) &&
17553 			    !bpf_map_ptr_unpriv(aux)) {
17554 				struct bpf_jit_poke_descriptor desc = {
17555 					.reason = BPF_POKE_REASON_TAIL_CALL,
17556 					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
17557 					.tail_call.key = bpf_map_key_immediate(aux),
17558 					.insn_idx = i + delta,
17559 				};
17560 
17561 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
17562 				if (ret < 0) {
17563 					verbose(env, "adding tail call poke descriptor failed\n");
17564 					return ret;
17565 				}
17566 
17567 				insn->imm = ret + 1;
17568 				continue;
17569 			}
17570 
17571 			if (!bpf_map_ptr_unpriv(aux))
17572 				continue;
17573 
17574 			/* instead of changing every JIT dealing with tail_call
17575 			 * emit two extra insns:
17576 			 * if (index >= max_entries) goto out;
17577 			 * index &= array->index_mask;
17578 			 * to avoid out-of-bounds cpu speculation
17579 			 */
17580 			if (bpf_map_ptr_poisoned(aux)) {
17581 				verbose(env, "tail_call abusing map_ptr\n");
17582 				return -EINVAL;
17583 			}
17584 
17585 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
17586 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
17587 						  map_ptr->max_entries, 2);
17588 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
17589 						    container_of(map_ptr,
17590 								 struct bpf_array,
17591 								 map)->index_mask);
17592 			insn_buf[2] = *insn;
17593 			cnt = 3;
17594 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17595 			if (!new_prog)
17596 				return -ENOMEM;
17597 
17598 			delta    += cnt - 1;
17599 			env->prog = prog = new_prog;
17600 			insn      = new_prog->insnsi + i + delta;
17601 			continue;
17602 		}
17603 
17604 		if (insn->imm == BPF_FUNC_timer_set_callback) {
17605 			/* The verifier will process callback_fn as many times as necessary
17606 			 * with different maps and the register states prepared by
17607 			 * set_timer_callback_state will be accurate.
17608 			 *
17609 			 * The following use case is valid:
17610 			 *   map1 is shared by prog1, prog2, prog3.
17611 			 *   prog1 calls bpf_timer_init for some map1 elements
17612 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
17613 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
17614 			 *   prog3 calls bpf_timer_start for some map1 elements.
17615 			 *     Those that were not both bpf_timer_init-ed and
17616 			 *     bpf_timer_set_callback-ed will return -EINVAL.
17617 			 */
17618 			struct bpf_insn ld_addrs[2] = {
17619 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
17620 			};
17621 
17622 			insn_buf[0] = ld_addrs[0];
17623 			insn_buf[1] = ld_addrs[1];
17624 			insn_buf[2] = *insn;
17625 			cnt = 3;
17626 
17627 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17628 			if (!new_prog)
17629 				return -ENOMEM;
17630 
17631 			delta    += cnt - 1;
17632 			env->prog = prog = new_prog;
17633 			insn      = new_prog->insnsi + i + delta;
17634 			goto patch_call_imm;
17635 		}
17636 
17637 		if (is_storage_get_function(insn->imm)) {
17638 			if (!env->prog->aux->sleepable ||
17639 			    env->insn_aux_data[i + delta].storage_get_func_atomic)
17640 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
17641 			else
17642 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
17643 			insn_buf[1] = *insn;
17644 			cnt = 2;
17645 
17646 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17647 			if (!new_prog)
17648 				return -ENOMEM;
17649 
17650 			delta += cnt - 1;
17651 			env->prog = prog = new_prog;
17652 			insn = new_prog->insnsi + i + delta;
17653 			goto patch_call_imm;
17654 		}
17655 
17656 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
17657 		 * and other inlining handlers are currently limited to 64 bit
17658 		 * only.
17659 		 */
17660 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
17661 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
17662 		     insn->imm == BPF_FUNC_map_update_elem ||
17663 		     insn->imm == BPF_FUNC_map_delete_elem ||
17664 		     insn->imm == BPF_FUNC_map_push_elem   ||
17665 		     insn->imm == BPF_FUNC_map_pop_elem    ||
17666 		     insn->imm == BPF_FUNC_map_peek_elem   ||
17667 		     insn->imm == BPF_FUNC_redirect_map    ||
17668 		     insn->imm == BPF_FUNC_for_each_map_elem ||
17669 		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
17670 			aux = &env->insn_aux_data[i + delta];
17671 			if (bpf_map_ptr_poisoned(aux))
17672 				goto patch_call_imm;
17673 
17674 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
17675 			ops = map_ptr->ops;
17676 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
17677 			    ops->map_gen_lookup) {
17678 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
17679 				if (cnt == -EOPNOTSUPP)
17680 					goto patch_map_ops_generic;
17681 				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
17682 					verbose(env, "bpf verifier is misconfigured\n");
17683 					return -EINVAL;
17684 				}
17685 
17686 				new_prog = bpf_patch_insn_data(env, i + delta,
17687 							       insn_buf, cnt);
17688 				if (!new_prog)
17689 					return -ENOMEM;
17690 
17691 				delta    += cnt - 1;
17692 				env->prog = prog = new_prog;
17693 				insn      = new_prog->insnsi + i + delta;
17694 				continue;
17695 			}
17696 
17697 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
17698 				     (void *(*)(struct bpf_map *map, void *key))NULL));
17699 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
17700 				     (long (*)(struct bpf_map *map, void *key))NULL));
17701 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
17702 				     (long (*)(struct bpf_map *map, void *key, void *value,
17703 					      u64 flags))NULL));
17704 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
17705 				     (long (*)(struct bpf_map *map, void *value,
17706 					      u64 flags))NULL));
17707 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
17708 				     (long (*)(struct bpf_map *map, void *value))NULL));
17709 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
17710 				     (long (*)(struct bpf_map *map, void *value))NULL));
17711 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
17712 				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
17713 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
17714 				     (long (*)(struct bpf_map *map,
17715 					      bpf_callback_t callback_fn,
17716 					      void *callback_ctx,
17717 					      u64 flags))NULL));
17718 			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
17719 				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
17720 
17721 patch_map_ops_generic:
17722 			switch (insn->imm) {
17723 			case BPF_FUNC_map_lookup_elem:
17724 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
17725 				continue;
17726 			case BPF_FUNC_map_update_elem:
17727 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
17728 				continue;
17729 			case BPF_FUNC_map_delete_elem:
17730 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
17731 				continue;
17732 			case BPF_FUNC_map_push_elem:
17733 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
17734 				continue;
17735 			case BPF_FUNC_map_pop_elem:
17736 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
17737 				continue;
17738 			case BPF_FUNC_map_peek_elem:
17739 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
17740 				continue;
17741 			case BPF_FUNC_redirect_map:
17742 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
17743 				continue;
17744 			case BPF_FUNC_for_each_map_elem:
17745 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
17746 				continue;
17747 			case BPF_FUNC_map_lookup_percpu_elem:
17748 				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
17749 				continue;
17750 			}
17751 
17752 			goto patch_call_imm;
17753 		}
17754 
17755 		/* Implement bpf_jiffies64 inline. */
17756 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
17757 		    insn->imm == BPF_FUNC_jiffies64) {
17758 			struct bpf_insn ld_jiffies_addr[2] = {
17759 				BPF_LD_IMM64(BPF_REG_0,
17760 					     (unsigned long)&jiffies),
17761 			};
17762 
17763 			insn_buf[0] = ld_jiffies_addr[0];
17764 			insn_buf[1] = ld_jiffies_addr[1];
17765 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
17766 						  BPF_REG_0, 0);
17767 			cnt = 3;
17768 
17769 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
17770 						       cnt);
17771 			if (!new_prog)
17772 				return -ENOMEM;
17773 
17774 			delta    += cnt - 1;
17775 			env->prog = prog = new_prog;
17776 			insn      = new_prog->insnsi + i + delta;
17777 			continue;
17778 		}
17779 
17780 		/* Implement bpf_get_func_arg inline. */
17781 		if (prog_type == BPF_PROG_TYPE_TRACING &&
17782 		    insn->imm == BPF_FUNC_get_func_arg) {
17783 			/* Load nr_args from ctx - 8 */
17784 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17785 			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
17786 			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
17787 			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
17788 			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
17789 			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17790 			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
17791 			insn_buf[7] = BPF_JMP_A(1);
17792 			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
17793 			cnt = 9;
17794 
17795 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17796 			if (!new_prog)
17797 				return -ENOMEM;
17798 
17799 			delta    += cnt - 1;
17800 			env->prog = prog = new_prog;
17801 			insn      = new_prog->insnsi + i + delta;
17802 			continue;
17803 		}
17804 
17805 		/* Implement bpf_get_func_ret inline. */
17806 		if (prog_type == BPF_PROG_TYPE_TRACING &&
17807 		    insn->imm == BPF_FUNC_get_func_ret) {
17808 			if (eatype == BPF_TRACE_FEXIT ||
17809 			    eatype == BPF_MODIFY_RETURN) {
17810 				/* Load nr_args from ctx - 8 */
17811 				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17812 				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
17813 				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
17814 				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17815 				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
17816 				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
17817 				cnt = 6;
17818 			} else {
17819 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
17820 				cnt = 1;
17821 			}
17822 
17823 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17824 			if (!new_prog)
17825 				return -ENOMEM;
17826 
17827 			delta    += cnt - 1;
17828 			env->prog = prog = new_prog;
17829 			insn      = new_prog->insnsi + i + delta;
17830 			continue;
17831 		}
17832 
17833 		/* Implement get_func_arg_cnt inline. */
17834 		if (prog_type == BPF_PROG_TYPE_TRACING &&
17835 		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
17836 			/* Load nr_args from ctx - 8 */
17837 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17838 
17839 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17840 			if (!new_prog)
17841 				return -ENOMEM;
17842 
17843 			env->prog = prog = new_prog;
17844 			insn      = new_prog->insnsi + i + delta;
17845 			continue;
17846 		}
17847 
17848 		/* Implement bpf_get_func_ip inline. */
17849 		if (prog_type == BPF_PROG_TYPE_TRACING &&
17850 		    insn->imm == BPF_FUNC_get_func_ip) {
17851 			/* Load IP address from ctx - 16 */
17852 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
17853 
17854 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17855 			if (!new_prog)
17856 				return -ENOMEM;
17857 
17858 			env->prog = prog = new_prog;
17859 			insn      = new_prog->insnsi + i + delta;
17860 			continue;
17861 		}
17862 
17863 patch_call_imm:
17864 		fn = env->ops->get_func_proto(insn->imm, env->prog);
17865 		/* all functions that have prototype and verifier allowed
17866 		 * programs to call them, must be real in-kernel functions
17867 		 */
17868 		if (!fn->func) {
17869 			verbose(env,
17870 				"kernel subsystem misconfigured func %s#%d\n",
17871 				func_id_name(insn->imm), insn->imm);
17872 			return -EFAULT;
17873 		}
17874 		insn->imm = fn->func - __bpf_call_base;
17875 	}
17876 
17877 	/* Since poke tab is now finalized, publish aux to tracker. */
17878 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
17879 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
17880 		if (!map_ptr->ops->map_poke_track ||
17881 		    !map_ptr->ops->map_poke_untrack ||
17882 		    !map_ptr->ops->map_poke_run) {
17883 			verbose(env, "bpf verifier is misconfigured\n");
17884 			return -EINVAL;
17885 		}
17886 
17887 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
17888 		if (ret < 0) {
17889 			verbose(env, "tracking tail call prog failed\n");
17890 			return ret;
17891 		}
17892 	}
17893 
17894 	sort_kfunc_descs_by_imm(env->prog);
17895 
17896 	return 0;
17897 }
17898 
17899 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
17900 					int position,
17901 					s32 stack_base,
17902 					u32 callback_subprogno,
17903 					u32 *cnt)
17904 {
17905 	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
17906 	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
17907 	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
17908 	int reg_loop_max = BPF_REG_6;
17909 	int reg_loop_cnt = BPF_REG_7;
17910 	int reg_loop_ctx = BPF_REG_8;
17911 
17912 	struct bpf_prog *new_prog;
17913 	u32 callback_start;
17914 	u32 call_insn_offset;
17915 	s32 callback_offset;
17916 
17917 	/* This represents an inlined version of bpf_iter.c:bpf_loop,
17918 	 * be careful to modify this code in sync.
17919 	 */
17920 	struct bpf_insn insn_buf[] = {
17921 		/* Return error and jump to the end of the patch if
17922 		 * expected number of iterations is too big.
17923 		 */
17924 		BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
17925 		BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
17926 		BPF_JMP_IMM(BPF_JA, 0, 0, 16),
17927 		/* spill R6, R7, R8 to use these as loop vars */
17928 		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
17929 		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
17930 		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
17931 		/* initialize loop vars */
17932 		BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
17933 		BPF_MOV32_IMM(reg_loop_cnt, 0),
17934 		BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
17935 		/* loop header,
17936 		 * if reg_loop_cnt >= reg_loop_max skip the loop body
17937 		 */
17938 		BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
17939 		/* callback call,
17940 		 * correct callback offset would be set after patching
17941 		 */
17942 		BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
17943 		BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
17944 		BPF_CALL_REL(0),
17945 		/* increment loop counter */
17946 		BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
17947 		/* jump to loop header if callback returned 0 */
17948 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
17949 		/* return value of bpf_loop,
17950 		 * set R0 to the number of iterations
17951 		 */
17952 		BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
17953 		/* restore original values of R6, R7, R8 */
17954 		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
17955 		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
17956 		BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
17957 	};
17958 
17959 	*cnt = ARRAY_SIZE(insn_buf);
17960 	new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
17961 	if (!new_prog)
17962 		return new_prog;
17963 
17964 	/* callback start is known only after patching */
17965 	callback_start = env->subprog_info[callback_subprogno].start;
17966 	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
17967 	call_insn_offset = position + 12;
17968 	callback_offset = callback_start - call_insn_offset - 1;
17969 	new_prog->insnsi[call_insn_offset].imm = callback_offset;
17970 
17971 	return new_prog;
17972 }
17973 
17974 static bool is_bpf_loop_call(struct bpf_insn *insn)
17975 {
17976 	return insn->code == (BPF_JMP | BPF_CALL) &&
17977 		insn->src_reg == 0 &&
17978 		insn->imm == BPF_FUNC_loop;
17979 }
17980 
17981 /* For all sub-programs in the program (including main) check
17982  * insn_aux_data to see if there are bpf_loop calls that require
17983  * inlining. If such calls are found the calls are replaced with a
17984  * sequence of instructions produced by `inline_bpf_loop` function and
17985  * subprog stack_depth is increased by the size of 3 registers.
17986  * This stack space is used to spill values of the R6, R7, R8.  These
17987  * registers are used to store the loop bound, counter and context
17988  * variables.
17989  */
17990 static int optimize_bpf_loop(struct bpf_verifier_env *env)
17991 {
17992 	struct bpf_subprog_info *subprogs = env->subprog_info;
17993 	int i, cur_subprog = 0, cnt, delta = 0;
17994 	struct bpf_insn *insn = env->prog->insnsi;
17995 	int insn_cnt = env->prog->len;
17996 	u16 stack_depth = subprogs[cur_subprog].stack_depth;
17997 	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
17998 	u16 stack_depth_extra = 0;
17999 
18000 	for (i = 0; i < insn_cnt; i++, insn++) {
18001 		struct bpf_loop_inline_state *inline_state =
18002 			&env->insn_aux_data[i + delta].loop_inline_state;
18003 
18004 		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
18005 			struct bpf_prog *new_prog;
18006 
18007 			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
18008 			new_prog = inline_bpf_loop(env,
18009 						   i + delta,
18010 						   -(stack_depth + stack_depth_extra),
18011 						   inline_state->callback_subprogno,
18012 						   &cnt);
18013 			if (!new_prog)
18014 				return -ENOMEM;
18015 
18016 			delta     += cnt - 1;
18017 			env->prog  = new_prog;
18018 			insn       = new_prog->insnsi + i + delta;
18019 		}
18020 
18021 		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
18022 			subprogs[cur_subprog].stack_depth += stack_depth_extra;
18023 			cur_subprog++;
18024 			stack_depth = subprogs[cur_subprog].stack_depth;
18025 			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
18026 			stack_depth_extra = 0;
18027 		}
18028 	}
18029 
18030 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18031 
18032 	return 0;
18033 }
18034 
18035 static void free_states(struct bpf_verifier_env *env)
18036 {
18037 	struct bpf_verifier_state_list *sl, *sln;
18038 	int i;
18039 
18040 	sl = env->free_list;
18041 	while (sl) {
18042 		sln = sl->next;
18043 		free_verifier_state(&sl->state, false);
18044 		kfree(sl);
18045 		sl = sln;
18046 	}
18047 	env->free_list = NULL;
18048 
18049 	if (!env->explored_states)
18050 		return;
18051 
18052 	for (i = 0; i < state_htab_size(env); i++) {
18053 		sl = env->explored_states[i];
18054 
18055 		while (sl) {
18056 			sln = sl->next;
18057 			free_verifier_state(&sl->state, false);
18058 			kfree(sl);
18059 			sl = sln;
18060 		}
18061 		env->explored_states[i] = NULL;
18062 	}
18063 }
18064 
18065 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18066 {
18067 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18068 	struct bpf_verifier_state *state;
18069 	struct bpf_reg_state *regs;
18070 	int ret, i;
18071 
18072 	env->prev_linfo = NULL;
18073 	env->pass_cnt++;
18074 
18075 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
18076 	if (!state)
18077 		return -ENOMEM;
18078 	state->curframe = 0;
18079 	state->speculative = false;
18080 	state->branches = 1;
18081 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
18082 	if (!state->frame[0]) {
18083 		kfree(state);
18084 		return -ENOMEM;
18085 	}
18086 	env->cur_state = state;
18087 	init_func_state(env, state->frame[0],
18088 			BPF_MAIN_FUNC /* callsite */,
18089 			0 /* frameno */,
18090 			subprog);
18091 	state->first_insn_idx = env->subprog_info[subprog].start;
18092 	state->last_insn_idx = -1;
18093 
18094 	regs = state->frame[state->curframe]->regs;
18095 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18096 		ret = btf_prepare_func_args(env, subprog, regs);
18097 		if (ret)
18098 			goto out;
18099 		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
18100 			if (regs[i].type == PTR_TO_CTX)
18101 				mark_reg_known_zero(env, regs, i);
18102 			else if (regs[i].type == SCALAR_VALUE)
18103 				mark_reg_unknown(env, regs, i);
18104 			else if (base_type(regs[i].type) == PTR_TO_MEM) {
18105 				const u32 mem_size = regs[i].mem_size;
18106 
18107 				mark_reg_known_zero(env, regs, i);
18108 				regs[i].mem_size = mem_size;
18109 				regs[i].id = ++env->id_gen;
18110 			}
18111 		}
18112 	} else {
18113 		/* 1st arg to a function */
18114 		regs[BPF_REG_1].type = PTR_TO_CTX;
18115 		mark_reg_known_zero(env, regs, BPF_REG_1);
18116 		ret = btf_check_subprog_arg_match(env, subprog, regs);
18117 		if (ret == -EFAULT)
18118 			/* unlikely verifier bug. abort.
18119 			 * ret == 0 and ret < 0 are sadly acceptable for
18120 			 * main() function due to backward compatibility.
18121 			 * Like socket filter program may be written as:
18122 			 * int bpf_prog(struct pt_regs *ctx)
18123 			 * and never dereference that ctx in the program.
18124 			 * 'struct pt_regs' is a type mismatch for socket
18125 			 * filter that should be using 'struct __sk_buff'.
18126 			 */
18127 			goto out;
18128 	}
18129 
18130 	ret = do_check(env);
18131 out:
18132 	/* check for NULL is necessary, since cur_state can be freed inside
18133 	 * do_check() under memory pressure.
18134 	 */
18135 	if (env->cur_state) {
18136 		free_verifier_state(env->cur_state, true);
18137 		env->cur_state = NULL;
18138 	}
18139 	while (!pop_stack(env, NULL, NULL, false));
18140 	if (!ret && pop_log)
18141 		bpf_vlog_reset(&env->log, 0);
18142 	free_states(env);
18143 	return ret;
18144 }
18145 
18146 /* Verify all global functions in a BPF program one by one based on their BTF.
18147  * All global functions must pass verification. Otherwise the whole program is rejected.
18148  * Consider:
18149  * int bar(int);
18150  * int foo(int f)
18151  * {
18152  *    return bar(f);
18153  * }
18154  * int bar(int b)
18155  * {
18156  *    ...
18157  * }
18158  * foo() will be verified first for R1=any_scalar_value. During verification it
18159  * will be assumed that bar() already verified successfully and call to bar()
18160  * from foo() will be checked for type match only. Later bar() will be verified
18161  * independently to check that it's safe for R1=any_scalar_value.
18162  */
18163 static int do_check_subprogs(struct bpf_verifier_env *env)
18164 {
18165 	struct bpf_prog_aux *aux = env->prog->aux;
18166 	int i, ret;
18167 
18168 	if (!aux->func_info)
18169 		return 0;
18170 
18171 	for (i = 1; i < env->subprog_cnt; i++) {
18172 		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
18173 			continue;
18174 		env->insn_idx = env->subprog_info[i].start;
18175 		WARN_ON_ONCE(env->insn_idx == 0);
18176 		ret = do_check_common(env, i);
18177 		if (ret) {
18178 			return ret;
18179 		} else if (env->log.level & BPF_LOG_LEVEL) {
18180 			verbose(env,
18181 				"Func#%d is safe for any args that match its prototype\n",
18182 				i);
18183 		}
18184 	}
18185 	return 0;
18186 }
18187 
18188 static int do_check_main(struct bpf_verifier_env *env)
18189 {
18190 	int ret;
18191 
18192 	env->insn_idx = 0;
18193 	ret = do_check_common(env, 0);
18194 	if (!ret)
18195 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18196 	return ret;
18197 }
18198 
18199 
18200 static void print_verification_stats(struct bpf_verifier_env *env)
18201 {
18202 	int i;
18203 
18204 	if (env->log.level & BPF_LOG_STATS) {
18205 		verbose(env, "verification time %lld usec\n",
18206 			div_u64(env->verification_time, 1000));
18207 		verbose(env, "stack depth ");
18208 		for (i = 0; i < env->subprog_cnt; i++) {
18209 			u32 depth = env->subprog_info[i].stack_depth;
18210 
18211 			verbose(env, "%d", depth);
18212 			if (i + 1 < env->subprog_cnt)
18213 				verbose(env, "+");
18214 		}
18215 		verbose(env, "\n");
18216 	}
18217 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18218 		"total_states %d peak_states %d mark_read %d\n",
18219 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18220 		env->max_states_per_insn, env->total_states,
18221 		env->peak_states, env->longest_mark_read_walk);
18222 }
18223 
18224 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18225 {
18226 	const struct btf_type *t, *func_proto;
18227 	const struct bpf_struct_ops *st_ops;
18228 	const struct btf_member *member;
18229 	struct bpf_prog *prog = env->prog;
18230 	u32 btf_id, member_idx;
18231 	const char *mname;
18232 
18233 	if (!prog->gpl_compatible) {
18234 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18235 		return -EINVAL;
18236 	}
18237 
18238 	btf_id = prog->aux->attach_btf_id;
18239 	st_ops = bpf_struct_ops_find(btf_id);
18240 	if (!st_ops) {
18241 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18242 			btf_id);
18243 		return -ENOTSUPP;
18244 	}
18245 
18246 	t = st_ops->type;
18247 	member_idx = prog->expected_attach_type;
18248 	if (member_idx >= btf_type_vlen(t)) {
18249 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18250 			member_idx, st_ops->name);
18251 		return -EINVAL;
18252 	}
18253 
18254 	member = &btf_type_member(t)[member_idx];
18255 	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
18256 	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
18257 					       NULL);
18258 	if (!func_proto) {
18259 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18260 			mname, member_idx, st_ops->name);
18261 		return -EINVAL;
18262 	}
18263 
18264 	if (st_ops->check_member) {
18265 		int err = st_ops->check_member(t, member, prog);
18266 
18267 		if (err) {
18268 			verbose(env, "attach to unsupported member %s of struct %s\n",
18269 				mname, st_ops->name);
18270 			return err;
18271 		}
18272 	}
18273 
18274 	prog->aux->attach_func_proto = func_proto;
18275 	prog->aux->attach_func_name = mname;
18276 	env->ops = st_ops->verifier_ops;
18277 
18278 	return 0;
18279 }
18280 #define SECURITY_PREFIX "security_"
18281 
18282 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18283 {
18284 	if (within_error_injection_list(addr) ||
18285 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18286 		return 0;
18287 
18288 	return -EINVAL;
18289 }
18290 
18291 /* list of non-sleepable functions that are otherwise on
18292  * ALLOW_ERROR_INJECTION list
18293  */
18294 BTF_SET_START(btf_non_sleepable_error_inject)
18295 /* Three functions below can be called from sleepable and non-sleepable context.
18296  * Assume non-sleepable from bpf safety point of view.
18297  */
18298 BTF_ID(func, __filemap_add_folio)
18299 BTF_ID(func, should_fail_alloc_page)
18300 BTF_ID(func, should_failslab)
18301 BTF_SET_END(btf_non_sleepable_error_inject)
18302 
18303 static int check_non_sleepable_error_inject(u32 btf_id)
18304 {
18305 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
18306 }
18307 
18308 int bpf_check_attach_target(struct bpf_verifier_log *log,
18309 			    const struct bpf_prog *prog,
18310 			    const struct bpf_prog *tgt_prog,
18311 			    u32 btf_id,
18312 			    struct bpf_attach_target_info *tgt_info)
18313 {
18314 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
18315 	const char prefix[] = "btf_trace_";
18316 	int ret = 0, subprog = -1, i;
18317 	const struct btf_type *t;
18318 	bool conservative = true;
18319 	const char *tname;
18320 	struct btf *btf;
18321 	long addr = 0;
18322 	struct module *mod = NULL;
18323 
18324 	if (!btf_id) {
18325 		bpf_log(log, "Tracing programs must provide btf_id\n");
18326 		return -EINVAL;
18327 	}
18328 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
18329 	if (!btf) {
18330 		bpf_log(log,
18331 			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
18332 		return -EINVAL;
18333 	}
18334 	t = btf_type_by_id(btf, btf_id);
18335 	if (!t) {
18336 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
18337 		return -EINVAL;
18338 	}
18339 	tname = btf_name_by_offset(btf, t->name_off);
18340 	if (!tname) {
18341 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
18342 		return -EINVAL;
18343 	}
18344 	if (tgt_prog) {
18345 		struct bpf_prog_aux *aux = tgt_prog->aux;
18346 
18347 		if (bpf_prog_is_dev_bound(prog->aux) &&
18348 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
18349 			bpf_log(log, "Target program bound device mismatch");
18350 			return -EINVAL;
18351 		}
18352 
18353 		for (i = 0; i < aux->func_info_cnt; i++)
18354 			if (aux->func_info[i].type_id == btf_id) {
18355 				subprog = i;
18356 				break;
18357 			}
18358 		if (subprog == -1) {
18359 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
18360 			return -EINVAL;
18361 		}
18362 		conservative = aux->func_info_aux[subprog].unreliable;
18363 		if (prog_extension) {
18364 			if (conservative) {
18365 				bpf_log(log,
18366 					"Cannot replace static functions\n");
18367 				return -EINVAL;
18368 			}
18369 			if (!prog->jit_requested) {
18370 				bpf_log(log,
18371 					"Extension programs should be JITed\n");
18372 				return -EINVAL;
18373 			}
18374 		}
18375 		if (!tgt_prog->jited) {
18376 			bpf_log(log, "Can attach to only JITed progs\n");
18377 			return -EINVAL;
18378 		}
18379 		if (tgt_prog->type == prog->type) {
18380 			/* Cannot fentry/fexit another fentry/fexit program.
18381 			 * Cannot attach program extension to another extension.
18382 			 * It's ok to attach fentry/fexit to extension program.
18383 			 */
18384 			bpf_log(log, "Cannot recursively attach\n");
18385 			return -EINVAL;
18386 		}
18387 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
18388 		    prog_extension &&
18389 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
18390 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
18391 			/* Program extensions can extend all program types
18392 			 * except fentry/fexit. The reason is the following.
18393 			 * The fentry/fexit programs are used for performance
18394 			 * analysis, stats and can be attached to any program
18395 			 * type except themselves. When extension program is
18396 			 * replacing XDP function it is necessary to allow
18397 			 * performance analysis of all functions. Both original
18398 			 * XDP program and its program extension. Hence
18399 			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
18400 			 * allowed. If extending of fentry/fexit was allowed it
18401 			 * would be possible to create long call chain
18402 			 * fentry->extension->fentry->extension beyond
18403 			 * reasonable stack size. Hence extending fentry is not
18404 			 * allowed.
18405 			 */
18406 			bpf_log(log, "Cannot extend fentry/fexit\n");
18407 			return -EINVAL;
18408 		}
18409 	} else {
18410 		if (prog_extension) {
18411 			bpf_log(log, "Cannot replace kernel functions\n");
18412 			return -EINVAL;
18413 		}
18414 	}
18415 
18416 	switch (prog->expected_attach_type) {
18417 	case BPF_TRACE_RAW_TP:
18418 		if (tgt_prog) {
18419 			bpf_log(log,
18420 				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
18421 			return -EINVAL;
18422 		}
18423 		if (!btf_type_is_typedef(t)) {
18424 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
18425 				btf_id);
18426 			return -EINVAL;
18427 		}
18428 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
18429 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
18430 				btf_id, tname);
18431 			return -EINVAL;
18432 		}
18433 		tname += sizeof(prefix) - 1;
18434 		t = btf_type_by_id(btf, t->type);
18435 		if (!btf_type_is_ptr(t))
18436 			/* should never happen in valid vmlinux build */
18437 			return -EINVAL;
18438 		t = btf_type_by_id(btf, t->type);
18439 		if (!btf_type_is_func_proto(t))
18440 			/* should never happen in valid vmlinux build */
18441 			return -EINVAL;
18442 
18443 		break;
18444 	case BPF_TRACE_ITER:
18445 		if (!btf_type_is_func(t)) {
18446 			bpf_log(log, "attach_btf_id %u is not a function\n",
18447 				btf_id);
18448 			return -EINVAL;
18449 		}
18450 		t = btf_type_by_id(btf, t->type);
18451 		if (!btf_type_is_func_proto(t))
18452 			return -EINVAL;
18453 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
18454 		if (ret)
18455 			return ret;
18456 		break;
18457 	default:
18458 		if (!prog_extension)
18459 			return -EINVAL;
18460 		fallthrough;
18461 	case BPF_MODIFY_RETURN:
18462 	case BPF_LSM_MAC:
18463 	case BPF_LSM_CGROUP:
18464 	case BPF_TRACE_FENTRY:
18465 	case BPF_TRACE_FEXIT:
18466 		if (!btf_type_is_func(t)) {
18467 			bpf_log(log, "attach_btf_id %u is not a function\n",
18468 				btf_id);
18469 			return -EINVAL;
18470 		}
18471 		if (prog_extension &&
18472 		    btf_check_type_match(log, prog, btf, t))
18473 			return -EINVAL;
18474 		t = btf_type_by_id(btf, t->type);
18475 		if (!btf_type_is_func_proto(t))
18476 			return -EINVAL;
18477 
18478 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
18479 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
18480 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
18481 			return -EINVAL;
18482 
18483 		if (tgt_prog && conservative)
18484 			t = NULL;
18485 
18486 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
18487 		if (ret < 0)
18488 			return ret;
18489 
18490 		if (tgt_prog) {
18491 			if (subprog == 0)
18492 				addr = (long) tgt_prog->bpf_func;
18493 			else
18494 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
18495 		} else {
18496 			if (btf_is_module(btf)) {
18497 				mod = btf_try_get_module(btf);
18498 				if (mod)
18499 					addr = find_kallsyms_symbol_value(mod, tname);
18500 				else
18501 					addr = 0;
18502 			} else {
18503 				addr = kallsyms_lookup_name(tname);
18504 			}
18505 			if (!addr) {
18506 				module_put(mod);
18507 				bpf_log(log,
18508 					"The address of function %s cannot be found\n",
18509 					tname);
18510 				return -ENOENT;
18511 			}
18512 		}
18513 
18514 		if (prog->aux->sleepable) {
18515 			ret = -EINVAL;
18516 			switch (prog->type) {
18517 			case BPF_PROG_TYPE_TRACING:
18518 
18519 				/* fentry/fexit/fmod_ret progs can be sleepable if they are
18520 				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
18521 				 */
18522 				if (!check_non_sleepable_error_inject(btf_id) &&
18523 				    within_error_injection_list(addr))
18524 					ret = 0;
18525 				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
18526 				 * in the fmodret id set with the KF_SLEEPABLE flag.
18527 				 */
18528 				else {
18529 					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
18530 
18531 					if (flags && (*flags & KF_SLEEPABLE))
18532 						ret = 0;
18533 				}
18534 				break;
18535 			case BPF_PROG_TYPE_LSM:
18536 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
18537 				 * Only some of them are sleepable.
18538 				 */
18539 				if (bpf_lsm_is_sleepable_hook(btf_id))
18540 					ret = 0;
18541 				break;
18542 			default:
18543 				break;
18544 			}
18545 			if (ret) {
18546 				module_put(mod);
18547 				bpf_log(log, "%s is not sleepable\n", tname);
18548 				return ret;
18549 			}
18550 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
18551 			if (tgt_prog) {
18552 				module_put(mod);
18553 				bpf_log(log, "can't modify return codes of BPF programs\n");
18554 				return -EINVAL;
18555 			}
18556 			ret = -EINVAL;
18557 			if (btf_kfunc_is_modify_return(btf, btf_id) ||
18558 			    !check_attach_modify_return(addr, tname))
18559 				ret = 0;
18560 			if (ret) {
18561 				module_put(mod);
18562 				bpf_log(log, "%s() is not modifiable\n", tname);
18563 				return ret;
18564 			}
18565 		}
18566 
18567 		break;
18568 	}
18569 	tgt_info->tgt_addr = addr;
18570 	tgt_info->tgt_name = tname;
18571 	tgt_info->tgt_type = t;
18572 	tgt_info->tgt_mod = mod;
18573 	return 0;
18574 }
18575 
18576 BTF_SET_START(btf_id_deny)
18577 BTF_ID_UNUSED
18578 #ifdef CONFIG_SMP
18579 BTF_ID(func, migrate_disable)
18580 BTF_ID(func, migrate_enable)
18581 #endif
18582 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
18583 BTF_ID(func, rcu_read_unlock_strict)
18584 #endif
18585 BTF_SET_END(btf_id_deny)
18586 
18587 static bool can_be_sleepable(struct bpf_prog *prog)
18588 {
18589 	if (prog->type == BPF_PROG_TYPE_TRACING) {
18590 		switch (prog->expected_attach_type) {
18591 		case BPF_TRACE_FENTRY:
18592 		case BPF_TRACE_FEXIT:
18593 		case BPF_MODIFY_RETURN:
18594 		case BPF_TRACE_ITER:
18595 			return true;
18596 		default:
18597 			return false;
18598 		}
18599 	}
18600 	return prog->type == BPF_PROG_TYPE_LSM ||
18601 	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
18602 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
18603 }
18604 
18605 static int check_attach_btf_id(struct bpf_verifier_env *env)
18606 {
18607 	struct bpf_prog *prog = env->prog;
18608 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
18609 	struct bpf_attach_target_info tgt_info = {};
18610 	u32 btf_id = prog->aux->attach_btf_id;
18611 	struct bpf_trampoline *tr;
18612 	int ret;
18613 	u64 key;
18614 
18615 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
18616 		if (prog->aux->sleepable)
18617 			/* attach_btf_id checked to be zero already */
18618 			return 0;
18619 		verbose(env, "Syscall programs can only be sleepable\n");
18620 		return -EINVAL;
18621 	}
18622 
18623 	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
18624 		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
18625 		return -EINVAL;
18626 	}
18627 
18628 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
18629 		return check_struct_ops_btf_id(env);
18630 
18631 	if (prog->type != BPF_PROG_TYPE_TRACING &&
18632 	    prog->type != BPF_PROG_TYPE_LSM &&
18633 	    prog->type != BPF_PROG_TYPE_EXT)
18634 		return 0;
18635 
18636 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
18637 	if (ret)
18638 		return ret;
18639 
18640 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
18641 		/* to make freplace equivalent to their targets, they need to
18642 		 * inherit env->ops and expected_attach_type for the rest of the
18643 		 * verification
18644 		 */
18645 		env->ops = bpf_verifier_ops[tgt_prog->type];
18646 		prog->expected_attach_type = tgt_prog->expected_attach_type;
18647 	}
18648 
18649 	/* store info about the attachment target that will be used later */
18650 	prog->aux->attach_func_proto = tgt_info.tgt_type;
18651 	prog->aux->attach_func_name = tgt_info.tgt_name;
18652 	prog->aux->mod = tgt_info.tgt_mod;
18653 
18654 	if (tgt_prog) {
18655 		prog->aux->saved_dst_prog_type = tgt_prog->type;
18656 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
18657 	}
18658 
18659 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
18660 		prog->aux->attach_btf_trace = true;
18661 		return 0;
18662 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
18663 		if (!bpf_iter_prog_supported(prog))
18664 			return -EINVAL;
18665 		return 0;
18666 	}
18667 
18668 	if (prog->type == BPF_PROG_TYPE_LSM) {
18669 		ret = bpf_lsm_verify_prog(&env->log, prog);
18670 		if (ret < 0)
18671 			return ret;
18672 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
18673 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
18674 		return -EINVAL;
18675 	}
18676 
18677 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
18678 	tr = bpf_trampoline_get(key, &tgt_info);
18679 	if (!tr)
18680 		return -ENOMEM;
18681 
18682 	prog->aux->dst_trampoline = tr;
18683 	return 0;
18684 }
18685 
18686 struct btf *bpf_get_btf_vmlinux(void)
18687 {
18688 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
18689 		mutex_lock(&bpf_verifier_lock);
18690 		if (!btf_vmlinux)
18691 			btf_vmlinux = btf_parse_vmlinux();
18692 		mutex_unlock(&bpf_verifier_lock);
18693 	}
18694 	return btf_vmlinux;
18695 }
18696 
18697 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
18698 {
18699 	u64 start_time = ktime_get_ns();
18700 	struct bpf_verifier_env *env;
18701 	int i, len, ret = -EINVAL, err;
18702 	u32 log_true_size;
18703 	bool is_priv;
18704 
18705 	/* no program is valid */
18706 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
18707 		return -EINVAL;
18708 
18709 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
18710 	 * allocate/free it every time bpf_check() is called
18711 	 */
18712 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
18713 	if (!env)
18714 		return -ENOMEM;
18715 
18716 	len = (*prog)->len;
18717 	env->insn_aux_data =
18718 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
18719 	ret = -ENOMEM;
18720 	if (!env->insn_aux_data)
18721 		goto err_free_env;
18722 	for (i = 0; i < len; i++)
18723 		env->insn_aux_data[i].orig_idx = i;
18724 	env->prog = *prog;
18725 	env->ops = bpf_verifier_ops[env->prog->type];
18726 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
18727 	is_priv = bpf_capable();
18728 
18729 	bpf_get_btf_vmlinux();
18730 
18731 	/* grab the mutex to protect few globals used by verifier */
18732 	if (!is_priv)
18733 		mutex_lock(&bpf_verifier_lock);
18734 
18735 	/* user could have requested verbose verifier output
18736 	 * and supplied buffer to store the verification trace
18737 	 */
18738 	ret = bpf_vlog_init(&env->log, attr->log_level,
18739 			    (char __user *) (unsigned long) attr->log_buf,
18740 			    attr->log_size);
18741 	if (ret)
18742 		goto err_unlock;
18743 
18744 	mark_verifier_state_clean(env);
18745 
18746 	if (IS_ERR(btf_vmlinux)) {
18747 		/* Either gcc or pahole or kernel are broken. */
18748 		verbose(env, "in-kernel BTF is malformed\n");
18749 		ret = PTR_ERR(btf_vmlinux);
18750 		goto skip_full_check;
18751 	}
18752 
18753 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
18754 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
18755 		env->strict_alignment = true;
18756 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
18757 		env->strict_alignment = false;
18758 
18759 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
18760 	env->allow_uninit_stack = bpf_allow_uninit_stack();
18761 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
18762 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
18763 	env->bpf_capable = bpf_capable();
18764 
18765 	if (is_priv)
18766 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
18767 
18768 	env->explored_states = kvcalloc(state_htab_size(env),
18769 				       sizeof(struct bpf_verifier_state_list *),
18770 				       GFP_USER);
18771 	ret = -ENOMEM;
18772 	if (!env->explored_states)
18773 		goto skip_full_check;
18774 
18775 	ret = add_subprog_and_kfunc(env);
18776 	if (ret < 0)
18777 		goto skip_full_check;
18778 
18779 	ret = check_subprogs(env);
18780 	if (ret < 0)
18781 		goto skip_full_check;
18782 
18783 	ret = check_btf_info(env, attr, uattr);
18784 	if (ret < 0)
18785 		goto skip_full_check;
18786 
18787 	ret = check_attach_btf_id(env);
18788 	if (ret)
18789 		goto skip_full_check;
18790 
18791 	ret = resolve_pseudo_ldimm64(env);
18792 	if (ret < 0)
18793 		goto skip_full_check;
18794 
18795 	if (bpf_prog_is_offloaded(env->prog->aux)) {
18796 		ret = bpf_prog_offload_verifier_prep(env->prog);
18797 		if (ret)
18798 			goto skip_full_check;
18799 	}
18800 
18801 	ret = check_cfg(env);
18802 	if (ret < 0)
18803 		goto skip_full_check;
18804 
18805 	ret = do_check_subprogs(env);
18806 	ret = ret ?: do_check_main(env);
18807 
18808 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
18809 		ret = bpf_prog_offload_finalize(env);
18810 
18811 skip_full_check:
18812 	kvfree(env->explored_states);
18813 
18814 	if (ret == 0)
18815 		ret = check_max_stack_depth(env);
18816 
18817 	/* instruction rewrites happen after this point */
18818 	if (ret == 0)
18819 		ret = optimize_bpf_loop(env);
18820 
18821 	if (is_priv) {
18822 		if (ret == 0)
18823 			opt_hard_wire_dead_code_branches(env);
18824 		if (ret == 0)
18825 			ret = opt_remove_dead_code(env);
18826 		if (ret == 0)
18827 			ret = opt_remove_nops(env);
18828 	} else {
18829 		if (ret == 0)
18830 			sanitize_dead_code(env);
18831 	}
18832 
18833 	if (ret == 0)
18834 		/* program is valid, convert *(u32*)(ctx + off) accesses */
18835 		ret = convert_ctx_accesses(env);
18836 
18837 	if (ret == 0)
18838 		ret = do_misc_fixups(env);
18839 
18840 	/* do 32-bit optimization after insn patching has done so those patched
18841 	 * insns could be handled correctly.
18842 	 */
18843 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
18844 		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
18845 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
18846 								     : false;
18847 	}
18848 
18849 	if (ret == 0)
18850 		ret = fixup_call_args(env);
18851 
18852 	env->verification_time = ktime_get_ns() - start_time;
18853 	print_verification_stats(env);
18854 	env->prog->aux->verified_insns = env->insn_processed;
18855 
18856 	/* preserve original error even if log finalization is successful */
18857 	err = bpf_vlog_finalize(&env->log, &log_true_size);
18858 	if (err)
18859 		ret = err;
18860 
18861 	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
18862 	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
18863 				  &log_true_size, sizeof(log_true_size))) {
18864 		ret = -EFAULT;
18865 		goto err_release_maps;
18866 	}
18867 
18868 	if (ret)
18869 		goto err_release_maps;
18870 
18871 	if (env->used_map_cnt) {
18872 		/* if program passed verifier, update used_maps in bpf_prog_info */
18873 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
18874 							  sizeof(env->used_maps[0]),
18875 							  GFP_KERNEL);
18876 
18877 		if (!env->prog->aux->used_maps) {
18878 			ret = -ENOMEM;
18879 			goto err_release_maps;
18880 		}
18881 
18882 		memcpy(env->prog->aux->used_maps, env->used_maps,
18883 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
18884 		env->prog->aux->used_map_cnt = env->used_map_cnt;
18885 	}
18886 	if (env->used_btf_cnt) {
18887 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
18888 		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
18889 							  sizeof(env->used_btfs[0]),
18890 							  GFP_KERNEL);
18891 		if (!env->prog->aux->used_btfs) {
18892 			ret = -ENOMEM;
18893 			goto err_release_maps;
18894 		}
18895 
18896 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
18897 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
18898 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
18899 	}
18900 	if (env->used_map_cnt || env->used_btf_cnt) {
18901 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
18902 		 * bpf_ld_imm64 instructions
18903 		 */
18904 		convert_pseudo_ld_imm64(env);
18905 	}
18906 
18907 	adjust_btf_func(env);
18908 
18909 err_release_maps:
18910 	if (!env->prog->aux->used_maps)
18911 		/* if we didn't copy map pointers into bpf_prog_info, release
18912 		 * them now. Otherwise free_used_maps() will release them.
18913 		 */
18914 		release_maps(env);
18915 	if (!env->prog->aux->used_btfs)
18916 		release_btfs(env);
18917 
18918 	/* extension progs temporarily inherit the attach_type of their targets
18919 	   for verification purposes, so set it back to zero before returning
18920 	 */
18921 	if (env->prog->type == BPF_PROG_TYPE_EXT)
18922 		env->prog->expected_attach_type = 0;
18923 
18924 	*prog = env->prog;
18925 err_unlock:
18926 	if (!is_priv)
18927 		mutex_unlock(&bpf_verifier_lock);
18928 	vfree(env->insn_aux_data);
18929 err_free_env:
18930 	kfree(env);
18931 	return ret;
18932 }
18933