xref: /openbmc/linux/kernel/bpf/verifier.c (revision 9cf0666f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/bpf_verifier.h>
13 #include <linux/filter.h>
14 #include <net/netlink.h>
15 #include <linux/file.h>
16 #include <linux/vmalloc.h>
17 #include <linux/stringify.h>
18 #include <linux/bsearch.h>
19 #include <linux/sort.h>
20 #include <linux/perf_event.h>
21 #include <linux/ctype.h>
22 #include <linux/error-injection.h>
23 #include <linux/bpf_lsm.h>
24 #include <linux/btf_ids.h>
25 
26 #include "disasm.h"
27 
28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
30 	[_id] = & _name ## _verifier_ops,
31 #define BPF_MAP_TYPE(_id, _ops)
32 #define BPF_LINK_TYPE(_id, _name)
33 #include <linux/bpf_types.h>
34 #undef BPF_PROG_TYPE
35 #undef BPF_MAP_TYPE
36 #undef BPF_LINK_TYPE
37 };
38 
39 /* bpf_check() is a static code analyzer that walks eBPF program
40  * instruction by instruction and updates register/stack state.
41  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
42  *
43  * The first pass is depth-first-search to check that the program is a DAG.
44  * It rejects the following programs:
45  * - larger than BPF_MAXINSNS insns
46  * - if loop is present (detected via back-edge)
47  * - unreachable insns exist (shouldn't be a forest. program = one function)
48  * - out of bounds or malformed jumps
49  * The second pass is all possible path descent from the 1st insn.
50  * Since it's analyzing all paths through the program, the length of the
51  * analysis is limited to 64k insn, which may be hit even if total number of
52  * insn is less then 4K, but there are too many branches that change stack/regs.
53  * Number of 'branches to be analyzed' is limited to 1k
54  *
55  * On entry to each instruction, each register has a type, and the instruction
56  * changes the types of the registers depending on instruction semantics.
57  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
58  * copied to R1.
59  *
60  * All registers are 64-bit.
61  * R0 - return register
62  * R1-R5 argument passing registers
63  * R6-R9 callee saved registers
64  * R10 - frame pointer read-only
65  *
66  * At the start of BPF program the register R1 contains a pointer to bpf_context
67  * and has type PTR_TO_CTX.
68  *
69  * Verifier tracks arithmetic operations on pointers in case:
70  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
71  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
72  * 1st insn copies R10 (which has FRAME_PTR) type into R1
73  * and 2nd arithmetic instruction is pattern matched to recognize
74  * that it wants to construct a pointer to some element within stack.
75  * So after 2nd insn, the register R1 has type PTR_TO_STACK
76  * (and -20 constant is saved for further stack bounds checking).
77  * Meaning that this reg is a pointer to stack plus known immediate constant.
78  *
79  * Most of the time the registers have SCALAR_VALUE type, which
80  * means the register has some value, but it's not a valid pointer.
81  * (like pointer plus pointer becomes SCALAR_VALUE type)
82  *
83  * When verifier sees load or store instructions the type of base register
84  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
85  * four pointer types recognized by check_mem_access() function.
86  *
87  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
88  * and the range of [ptr, ptr + map's value_size) is accessible.
89  *
90  * registers used to pass values to function calls are checked against
91  * function argument constraints.
92  *
93  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
94  * It means that the register type passed to this function must be
95  * PTR_TO_STACK and it will be used inside the function as
96  * 'pointer to map element key'
97  *
98  * For example the argument constraints for bpf_map_lookup_elem():
99  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
100  *   .arg1_type = ARG_CONST_MAP_PTR,
101  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
102  *
103  * ret_type says that this function returns 'pointer to map elem value or null'
104  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
105  * 2nd argument should be a pointer to stack, which will be used inside
106  * the helper function as a pointer to map element key.
107  *
108  * On the kernel side the helper function looks like:
109  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
110  * {
111  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
112  *    void *key = (void *) (unsigned long) r2;
113  *    void *value;
114  *
115  *    here kernel can access 'key' and 'map' pointers safely, knowing that
116  *    [key, key + map->key_size) bytes are valid and were initialized on
117  *    the stack of eBPF program.
118  * }
119  *
120  * Corresponding eBPF program may look like:
121  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
122  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
123  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
124  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
125  * here verifier looks at prototype of map_lookup_elem() and sees:
126  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
127  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
128  *
129  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
130  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
131  * and were initialized prior to this call.
132  * If it's ok, then verifier allows this BPF_CALL insn and looks at
133  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
134  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
135  * returns either pointer to map value or NULL.
136  *
137  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
138  * insn, the register holding that pointer in the true branch changes state to
139  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
140  * branch. See check_cond_jmp_op().
141  *
142  * After the call R0 is set to return type of the function and registers R1-R5
143  * are set to NOT_INIT to indicate that they are no longer readable.
144  *
145  * The following reference types represent a potential reference to a kernel
146  * resource which, after first being allocated, must be checked and freed by
147  * the BPF program:
148  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
149  *
150  * When the verifier sees a helper call return a reference type, it allocates a
151  * pointer id for the reference and stores it in the current function state.
152  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
153  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
154  * passes through a NULL-check conditional. For the branch wherein the state is
155  * changed to CONST_IMM, the verifier releases the reference.
156  *
157  * For each helper function that allocates a reference, such as
158  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
159  * bpf_sk_release(). When a reference type passes into the release function,
160  * the verifier also releases the reference. If any unchecked or unreleased
161  * reference remains at the end of the program, the verifier rejects it.
162  */
163 
164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
165 struct bpf_verifier_stack_elem {
166 	/* verifer state is 'st'
167 	 * before processing instruction 'insn_idx'
168 	 * and after processing instruction 'prev_insn_idx'
169 	 */
170 	struct bpf_verifier_state st;
171 	int insn_idx;
172 	int prev_insn_idx;
173 	struct bpf_verifier_stack_elem *next;
174 	/* length of verifier log at the time this state was pushed on stack */
175 	u32 log_pos;
176 };
177 
178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
179 #define BPF_COMPLEXITY_LIMIT_STATES	64
180 
181 #define BPF_MAP_KEY_POISON	(1ULL << 63)
182 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
183 
184 #define BPF_MAP_PTR_UNPRIV	1UL
185 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
186 					  POISON_POINTER_DELTA))
187 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
188 
189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
190 {
191 	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
192 }
193 
194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
195 {
196 	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
197 }
198 
199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
200 			      const struct bpf_map *map, bool unpriv)
201 {
202 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
203 	unpriv |= bpf_map_ptr_unpriv(aux);
204 	aux->map_ptr_state = (unsigned long)map |
205 			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
206 }
207 
208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
209 {
210 	return aux->map_key_state & BPF_MAP_KEY_POISON;
211 }
212 
213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
214 {
215 	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
216 }
217 
218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
219 {
220 	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
221 }
222 
223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
224 {
225 	bool poisoned = bpf_map_key_poisoned(aux);
226 
227 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
228 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
229 }
230 
231 static bool bpf_pseudo_call(const struct bpf_insn *insn)
232 {
233 	return insn->code == (BPF_JMP | BPF_CALL) &&
234 	       insn->src_reg == BPF_PSEUDO_CALL;
235 }
236 
237 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
238 {
239 	return insn->code == (BPF_JMP | BPF_CALL) &&
240 	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
241 }
242 
243 struct bpf_call_arg_meta {
244 	struct bpf_map *map_ptr;
245 	bool raw_mode;
246 	bool pkt_access;
247 	int regno;
248 	int access_size;
249 	int mem_size;
250 	u64 msize_max_value;
251 	int ref_obj_id;
252 	int map_uid;
253 	int func_id;
254 	struct btf *btf;
255 	u32 btf_id;
256 	struct btf *ret_btf;
257 	u32 ret_btf_id;
258 	u32 subprogno;
259 };
260 
261 struct btf *btf_vmlinux;
262 
263 static DEFINE_MUTEX(bpf_verifier_lock);
264 
265 static const struct bpf_line_info *
266 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
267 {
268 	const struct bpf_line_info *linfo;
269 	const struct bpf_prog *prog;
270 	u32 i, nr_linfo;
271 
272 	prog = env->prog;
273 	nr_linfo = prog->aux->nr_linfo;
274 
275 	if (!nr_linfo || insn_off >= prog->len)
276 		return NULL;
277 
278 	linfo = prog->aux->linfo;
279 	for (i = 1; i < nr_linfo; i++)
280 		if (insn_off < linfo[i].insn_off)
281 			break;
282 
283 	return &linfo[i - 1];
284 }
285 
286 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
287 		       va_list args)
288 {
289 	unsigned int n;
290 
291 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
292 
293 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
294 		  "verifier log line truncated - local buffer too short\n");
295 
296 	n = min(log->len_total - log->len_used - 1, n);
297 	log->kbuf[n] = '\0';
298 
299 	if (log->level == BPF_LOG_KERNEL) {
300 		pr_err("BPF:%s\n", log->kbuf);
301 		return;
302 	}
303 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
304 		log->len_used += n;
305 	else
306 		log->ubuf = NULL;
307 }
308 
309 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
310 {
311 	char zero = 0;
312 
313 	if (!bpf_verifier_log_needed(log))
314 		return;
315 
316 	log->len_used = new_pos;
317 	if (put_user(zero, log->ubuf + new_pos))
318 		log->ubuf = NULL;
319 }
320 
321 /* log_level controls verbosity level of eBPF verifier.
322  * bpf_verifier_log_write() is used to dump the verification trace to the log,
323  * so the user can figure out what's wrong with the program
324  */
325 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
326 					   const char *fmt, ...)
327 {
328 	va_list args;
329 
330 	if (!bpf_verifier_log_needed(&env->log))
331 		return;
332 
333 	va_start(args, fmt);
334 	bpf_verifier_vlog(&env->log, fmt, args);
335 	va_end(args);
336 }
337 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
338 
339 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
340 {
341 	struct bpf_verifier_env *env = private_data;
342 	va_list args;
343 
344 	if (!bpf_verifier_log_needed(&env->log))
345 		return;
346 
347 	va_start(args, fmt);
348 	bpf_verifier_vlog(&env->log, fmt, args);
349 	va_end(args);
350 }
351 
352 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
353 			    const char *fmt, ...)
354 {
355 	va_list args;
356 
357 	if (!bpf_verifier_log_needed(log))
358 		return;
359 
360 	va_start(args, fmt);
361 	bpf_verifier_vlog(log, fmt, args);
362 	va_end(args);
363 }
364 
365 static const char *ltrim(const char *s)
366 {
367 	while (isspace(*s))
368 		s++;
369 
370 	return s;
371 }
372 
373 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
374 					 u32 insn_off,
375 					 const char *prefix_fmt, ...)
376 {
377 	const struct bpf_line_info *linfo;
378 
379 	if (!bpf_verifier_log_needed(&env->log))
380 		return;
381 
382 	linfo = find_linfo(env, insn_off);
383 	if (!linfo || linfo == env->prev_linfo)
384 		return;
385 
386 	if (prefix_fmt) {
387 		va_list args;
388 
389 		va_start(args, prefix_fmt);
390 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
391 		va_end(args);
392 	}
393 
394 	verbose(env, "%s\n",
395 		ltrim(btf_name_by_offset(env->prog->aux->btf,
396 					 linfo->line_off)));
397 
398 	env->prev_linfo = linfo;
399 }
400 
401 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
402 				   struct bpf_reg_state *reg,
403 				   struct tnum *range, const char *ctx,
404 				   const char *reg_name)
405 {
406 	char tn_buf[48];
407 
408 	verbose(env, "At %s the register %s ", ctx, reg_name);
409 	if (!tnum_is_unknown(reg->var_off)) {
410 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
411 		verbose(env, "has value %s", tn_buf);
412 	} else {
413 		verbose(env, "has unknown scalar value");
414 	}
415 	tnum_strn(tn_buf, sizeof(tn_buf), *range);
416 	verbose(env, " should have been in %s\n", tn_buf);
417 }
418 
419 static bool type_is_pkt_pointer(enum bpf_reg_type type)
420 {
421 	return type == PTR_TO_PACKET ||
422 	       type == PTR_TO_PACKET_META;
423 }
424 
425 static bool type_is_sk_pointer(enum bpf_reg_type type)
426 {
427 	return type == PTR_TO_SOCKET ||
428 		type == PTR_TO_SOCK_COMMON ||
429 		type == PTR_TO_TCP_SOCK ||
430 		type == PTR_TO_XDP_SOCK;
431 }
432 
433 static bool reg_type_not_null(enum bpf_reg_type type)
434 {
435 	return type == PTR_TO_SOCKET ||
436 		type == PTR_TO_TCP_SOCK ||
437 		type == PTR_TO_MAP_VALUE ||
438 		type == PTR_TO_MAP_KEY ||
439 		type == PTR_TO_SOCK_COMMON;
440 }
441 
442 static bool reg_type_may_be_null(enum bpf_reg_type type)
443 {
444 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
445 	       type == PTR_TO_SOCKET_OR_NULL ||
446 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
447 	       type == PTR_TO_TCP_SOCK_OR_NULL ||
448 	       type == PTR_TO_BTF_ID_OR_NULL ||
449 	       type == PTR_TO_MEM_OR_NULL ||
450 	       type == PTR_TO_RDONLY_BUF_OR_NULL ||
451 	       type == PTR_TO_RDWR_BUF_OR_NULL;
452 }
453 
454 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
455 {
456 	return reg->type == PTR_TO_MAP_VALUE &&
457 		map_value_has_spin_lock(reg->map_ptr);
458 }
459 
460 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
461 {
462 	return type == PTR_TO_SOCKET ||
463 		type == PTR_TO_SOCKET_OR_NULL ||
464 		type == PTR_TO_TCP_SOCK ||
465 		type == PTR_TO_TCP_SOCK_OR_NULL ||
466 		type == PTR_TO_MEM ||
467 		type == PTR_TO_MEM_OR_NULL;
468 }
469 
470 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
471 {
472 	return type == ARG_PTR_TO_SOCK_COMMON;
473 }
474 
475 static bool arg_type_may_be_null(enum bpf_arg_type type)
476 {
477 	return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
478 	       type == ARG_PTR_TO_MEM_OR_NULL ||
479 	       type == ARG_PTR_TO_CTX_OR_NULL ||
480 	       type == ARG_PTR_TO_SOCKET_OR_NULL ||
481 	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
482 	       type == ARG_PTR_TO_STACK_OR_NULL;
483 }
484 
485 /* Determine whether the function releases some resources allocated by another
486  * function call. The first reference type argument will be assumed to be
487  * released by release_reference().
488  */
489 static bool is_release_function(enum bpf_func_id func_id)
490 {
491 	return func_id == BPF_FUNC_sk_release ||
492 	       func_id == BPF_FUNC_ringbuf_submit ||
493 	       func_id == BPF_FUNC_ringbuf_discard;
494 }
495 
496 static bool may_be_acquire_function(enum bpf_func_id func_id)
497 {
498 	return func_id == BPF_FUNC_sk_lookup_tcp ||
499 		func_id == BPF_FUNC_sk_lookup_udp ||
500 		func_id == BPF_FUNC_skc_lookup_tcp ||
501 		func_id == BPF_FUNC_map_lookup_elem ||
502 	        func_id == BPF_FUNC_ringbuf_reserve;
503 }
504 
505 static bool is_acquire_function(enum bpf_func_id func_id,
506 				const struct bpf_map *map)
507 {
508 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
509 
510 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
511 	    func_id == BPF_FUNC_sk_lookup_udp ||
512 	    func_id == BPF_FUNC_skc_lookup_tcp ||
513 	    func_id == BPF_FUNC_ringbuf_reserve)
514 		return true;
515 
516 	if (func_id == BPF_FUNC_map_lookup_elem &&
517 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
518 	     map_type == BPF_MAP_TYPE_SOCKHASH))
519 		return true;
520 
521 	return false;
522 }
523 
524 static bool is_ptr_cast_function(enum bpf_func_id func_id)
525 {
526 	return func_id == BPF_FUNC_tcp_sock ||
527 		func_id == BPF_FUNC_sk_fullsock ||
528 		func_id == BPF_FUNC_skc_to_tcp_sock ||
529 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
530 		func_id == BPF_FUNC_skc_to_udp6_sock ||
531 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
532 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
533 }
534 
535 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
536 {
537 	return BPF_CLASS(insn->code) == BPF_STX &&
538 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
539 	       insn->imm == BPF_CMPXCHG;
540 }
541 
542 /* string representation of 'enum bpf_reg_type' */
543 static const char * const reg_type_str[] = {
544 	[NOT_INIT]		= "?",
545 	[SCALAR_VALUE]		= "inv",
546 	[PTR_TO_CTX]		= "ctx",
547 	[CONST_PTR_TO_MAP]	= "map_ptr",
548 	[PTR_TO_MAP_VALUE]	= "map_value",
549 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
550 	[PTR_TO_STACK]		= "fp",
551 	[PTR_TO_PACKET]		= "pkt",
552 	[PTR_TO_PACKET_META]	= "pkt_meta",
553 	[PTR_TO_PACKET_END]	= "pkt_end",
554 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
555 	[PTR_TO_SOCKET]		= "sock",
556 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
557 	[PTR_TO_SOCK_COMMON]	= "sock_common",
558 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
559 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
560 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
561 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
562 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
563 	[PTR_TO_BTF_ID]		= "ptr_",
564 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
565 	[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
566 	[PTR_TO_MEM]		= "mem",
567 	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
568 	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
569 	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
570 	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
571 	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
572 	[PTR_TO_FUNC]		= "func",
573 	[PTR_TO_MAP_KEY]	= "map_key",
574 };
575 
576 static char slot_type_char[] = {
577 	[STACK_INVALID]	= '?',
578 	[STACK_SPILL]	= 'r',
579 	[STACK_MISC]	= 'm',
580 	[STACK_ZERO]	= '0',
581 };
582 
583 static void print_liveness(struct bpf_verifier_env *env,
584 			   enum bpf_reg_liveness live)
585 {
586 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
587 	    verbose(env, "_");
588 	if (live & REG_LIVE_READ)
589 		verbose(env, "r");
590 	if (live & REG_LIVE_WRITTEN)
591 		verbose(env, "w");
592 	if (live & REG_LIVE_DONE)
593 		verbose(env, "D");
594 }
595 
596 static struct bpf_func_state *func(struct bpf_verifier_env *env,
597 				   const struct bpf_reg_state *reg)
598 {
599 	struct bpf_verifier_state *cur = env->cur_state;
600 
601 	return cur->frame[reg->frameno];
602 }
603 
604 static const char *kernel_type_name(const struct btf* btf, u32 id)
605 {
606 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
607 }
608 
609 /* The reg state of a pointer or a bounded scalar was saved when
610  * it was spilled to the stack.
611  */
612 static bool is_spilled_reg(const struct bpf_stack_state *stack)
613 {
614 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
615 }
616 
617 static void scrub_spilled_slot(u8 *stype)
618 {
619 	if (*stype != STACK_INVALID)
620 		*stype = STACK_MISC;
621 }
622 
623 static void print_verifier_state(struct bpf_verifier_env *env,
624 				 const struct bpf_func_state *state)
625 {
626 	const struct bpf_reg_state *reg;
627 	enum bpf_reg_type t;
628 	int i;
629 
630 	if (state->frameno)
631 		verbose(env, " frame%d:", state->frameno);
632 	for (i = 0; i < MAX_BPF_REG; i++) {
633 		reg = &state->regs[i];
634 		t = reg->type;
635 		if (t == NOT_INIT)
636 			continue;
637 		verbose(env, " R%d", i);
638 		print_liveness(env, reg->live);
639 		verbose(env, "=%s", reg_type_str[t]);
640 		if (t == SCALAR_VALUE && reg->precise)
641 			verbose(env, "P");
642 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
643 		    tnum_is_const(reg->var_off)) {
644 			/* reg->off should be 0 for SCALAR_VALUE */
645 			verbose(env, "%lld", reg->var_off.value + reg->off);
646 		} else {
647 			if (t == PTR_TO_BTF_ID ||
648 			    t == PTR_TO_BTF_ID_OR_NULL ||
649 			    t == PTR_TO_PERCPU_BTF_ID)
650 				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
651 			verbose(env, "(id=%d", reg->id);
652 			if (reg_type_may_be_refcounted_or_null(t))
653 				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
654 			if (t != SCALAR_VALUE)
655 				verbose(env, ",off=%d", reg->off);
656 			if (type_is_pkt_pointer(t))
657 				verbose(env, ",r=%d", reg->range);
658 			else if (t == CONST_PTR_TO_MAP ||
659 				 t == PTR_TO_MAP_KEY ||
660 				 t == PTR_TO_MAP_VALUE ||
661 				 t == PTR_TO_MAP_VALUE_OR_NULL)
662 				verbose(env, ",ks=%d,vs=%d",
663 					reg->map_ptr->key_size,
664 					reg->map_ptr->value_size);
665 			if (tnum_is_const(reg->var_off)) {
666 				/* Typically an immediate SCALAR_VALUE, but
667 				 * could be a pointer whose offset is too big
668 				 * for reg->off
669 				 */
670 				verbose(env, ",imm=%llx", reg->var_off.value);
671 			} else {
672 				if (reg->smin_value != reg->umin_value &&
673 				    reg->smin_value != S64_MIN)
674 					verbose(env, ",smin_value=%lld",
675 						(long long)reg->smin_value);
676 				if (reg->smax_value != reg->umax_value &&
677 				    reg->smax_value != S64_MAX)
678 					verbose(env, ",smax_value=%lld",
679 						(long long)reg->smax_value);
680 				if (reg->umin_value != 0)
681 					verbose(env, ",umin_value=%llu",
682 						(unsigned long long)reg->umin_value);
683 				if (reg->umax_value != U64_MAX)
684 					verbose(env, ",umax_value=%llu",
685 						(unsigned long long)reg->umax_value);
686 				if (!tnum_is_unknown(reg->var_off)) {
687 					char tn_buf[48];
688 
689 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
690 					verbose(env, ",var_off=%s", tn_buf);
691 				}
692 				if (reg->s32_min_value != reg->smin_value &&
693 				    reg->s32_min_value != S32_MIN)
694 					verbose(env, ",s32_min_value=%d",
695 						(int)(reg->s32_min_value));
696 				if (reg->s32_max_value != reg->smax_value &&
697 				    reg->s32_max_value != S32_MAX)
698 					verbose(env, ",s32_max_value=%d",
699 						(int)(reg->s32_max_value));
700 				if (reg->u32_min_value != reg->umin_value &&
701 				    reg->u32_min_value != U32_MIN)
702 					verbose(env, ",u32_min_value=%d",
703 						(int)(reg->u32_min_value));
704 				if (reg->u32_max_value != reg->umax_value &&
705 				    reg->u32_max_value != U32_MAX)
706 					verbose(env, ",u32_max_value=%d",
707 						(int)(reg->u32_max_value));
708 			}
709 			verbose(env, ")");
710 		}
711 	}
712 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
713 		char types_buf[BPF_REG_SIZE + 1];
714 		bool valid = false;
715 		int j;
716 
717 		for (j = 0; j < BPF_REG_SIZE; j++) {
718 			if (state->stack[i].slot_type[j] != STACK_INVALID)
719 				valid = true;
720 			types_buf[j] = slot_type_char[
721 					state->stack[i].slot_type[j]];
722 		}
723 		types_buf[BPF_REG_SIZE] = 0;
724 		if (!valid)
725 			continue;
726 		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
727 		print_liveness(env, state->stack[i].spilled_ptr.live);
728 		if (is_spilled_reg(&state->stack[i])) {
729 			reg = &state->stack[i].spilled_ptr;
730 			t = reg->type;
731 			verbose(env, "=%s", reg_type_str[t]);
732 			if (t == SCALAR_VALUE && reg->precise)
733 				verbose(env, "P");
734 			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
735 				verbose(env, "%lld", reg->var_off.value + reg->off);
736 		} else {
737 			verbose(env, "=%s", types_buf);
738 		}
739 	}
740 	if (state->acquired_refs && state->refs[0].id) {
741 		verbose(env, " refs=%d", state->refs[0].id);
742 		for (i = 1; i < state->acquired_refs; i++)
743 			if (state->refs[i].id)
744 				verbose(env, ",%d", state->refs[i].id);
745 	}
746 	if (state->in_callback_fn)
747 		verbose(env, " cb");
748 	if (state->in_async_callback_fn)
749 		verbose(env, " async_cb");
750 	verbose(env, "\n");
751 }
752 
753 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
754  * small to hold src. This is different from krealloc since we don't want to preserve
755  * the contents of dst.
756  *
757  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
758  * not be allocated.
759  */
760 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
761 {
762 	size_t bytes;
763 
764 	if (ZERO_OR_NULL_PTR(src))
765 		goto out;
766 
767 	if (unlikely(check_mul_overflow(n, size, &bytes)))
768 		return NULL;
769 
770 	if (ksize(dst) < bytes) {
771 		kfree(dst);
772 		dst = kmalloc_track_caller(bytes, flags);
773 		if (!dst)
774 			return NULL;
775 	}
776 
777 	memcpy(dst, src, bytes);
778 out:
779 	return dst ? dst : ZERO_SIZE_PTR;
780 }
781 
782 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
783  * small to hold new_n items. new items are zeroed out if the array grows.
784  *
785  * Contrary to krealloc_array, does not free arr if new_n is zero.
786  */
787 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
788 {
789 	if (!new_n || old_n == new_n)
790 		goto out;
791 
792 	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
793 	if (!arr)
794 		return NULL;
795 
796 	if (new_n > old_n)
797 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
798 
799 out:
800 	return arr ? arr : ZERO_SIZE_PTR;
801 }
802 
803 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
804 {
805 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
806 			       sizeof(struct bpf_reference_state), GFP_KERNEL);
807 	if (!dst->refs)
808 		return -ENOMEM;
809 
810 	dst->acquired_refs = src->acquired_refs;
811 	return 0;
812 }
813 
814 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
815 {
816 	size_t n = src->allocated_stack / BPF_REG_SIZE;
817 
818 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
819 				GFP_KERNEL);
820 	if (!dst->stack)
821 		return -ENOMEM;
822 
823 	dst->allocated_stack = src->allocated_stack;
824 	return 0;
825 }
826 
827 static int resize_reference_state(struct bpf_func_state *state, size_t n)
828 {
829 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
830 				    sizeof(struct bpf_reference_state));
831 	if (!state->refs)
832 		return -ENOMEM;
833 
834 	state->acquired_refs = n;
835 	return 0;
836 }
837 
838 static int grow_stack_state(struct bpf_func_state *state, int size)
839 {
840 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
841 
842 	if (old_n >= n)
843 		return 0;
844 
845 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
846 	if (!state->stack)
847 		return -ENOMEM;
848 
849 	state->allocated_stack = size;
850 	return 0;
851 }
852 
853 /* Acquire a pointer id from the env and update the state->refs to include
854  * this new pointer reference.
855  * On success, returns a valid pointer id to associate with the register
856  * On failure, returns a negative errno.
857  */
858 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
859 {
860 	struct bpf_func_state *state = cur_func(env);
861 	int new_ofs = state->acquired_refs;
862 	int id, err;
863 
864 	err = resize_reference_state(state, state->acquired_refs + 1);
865 	if (err)
866 		return err;
867 	id = ++env->id_gen;
868 	state->refs[new_ofs].id = id;
869 	state->refs[new_ofs].insn_idx = insn_idx;
870 
871 	return id;
872 }
873 
874 /* release function corresponding to acquire_reference_state(). Idempotent. */
875 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
876 {
877 	int i, last_idx;
878 
879 	last_idx = state->acquired_refs - 1;
880 	for (i = 0; i < state->acquired_refs; i++) {
881 		if (state->refs[i].id == ptr_id) {
882 			if (last_idx && i != last_idx)
883 				memcpy(&state->refs[i], &state->refs[last_idx],
884 				       sizeof(*state->refs));
885 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
886 			state->acquired_refs--;
887 			return 0;
888 		}
889 	}
890 	return -EINVAL;
891 }
892 
893 static void free_func_state(struct bpf_func_state *state)
894 {
895 	if (!state)
896 		return;
897 	kfree(state->refs);
898 	kfree(state->stack);
899 	kfree(state);
900 }
901 
902 static void clear_jmp_history(struct bpf_verifier_state *state)
903 {
904 	kfree(state->jmp_history);
905 	state->jmp_history = NULL;
906 	state->jmp_history_cnt = 0;
907 }
908 
909 static void free_verifier_state(struct bpf_verifier_state *state,
910 				bool free_self)
911 {
912 	int i;
913 
914 	for (i = 0; i <= state->curframe; i++) {
915 		free_func_state(state->frame[i]);
916 		state->frame[i] = NULL;
917 	}
918 	clear_jmp_history(state);
919 	if (free_self)
920 		kfree(state);
921 }
922 
923 /* copy verifier state from src to dst growing dst stack space
924  * when necessary to accommodate larger src stack
925  */
926 static int copy_func_state(struct bpf_func_state *dst,
927 			   const struct bpf_func_state *src)
928 {
929 	int err;
930 
931 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
932 	err = copy_reference_state(dst, src);
933 	if (err)
934 		return err;
935 	return copy_stack_state(dst, src);
936 }
937 
938 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
939 			       const struct bpf_verifier_state *src)
940 {
941 	struct bpf_func_state *dst;
942 	int i, err;
943 
944 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
945 					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
946 					    GFP_USER);
947 	if (!dst_state->jmp_history)
948 		return -ENOMEM;
949 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
950 
951 	/* if dst has more stack frames then src frame, free them */
952 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
953 		free_func_state(dst_state->frame[i]);
954 		dst_state->frame[i] = NULL;
955 	}
956 	dst_state->speculative = src->speculative;
957 	dst_state->curframe = src->curframe;
958 	dst_state->active_spin_lock = src->active_spin_lock;
959 	dst_state->branches = src->branches;
960 	dst_state->parent = src->parent;
961 	dst_state->first_insn_idx = src->first_insn_idx;
962 	dst_state->last_insn_idx = src->last_insn_idx;
963 	for (i = 0; i <= src->curframe; i++) {
964 		dst = dst_state->frame[i];
965 		if (!dst) {
966 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
967 			if (!dst)
968 				return -ENOMEM;
969 			dst_state->frame[i] = dst;
970 		}
971 		err = copy_func_state(dst, src->frame[i]);
972 		if (err)
973 			return err;
974 	}
975 	return 0;
976 }
977 
978 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
979 {
980 	while (st) {
981 		u32 br = --st->branches;
982 
983 		/* WARN_ON(br > 1) technically makes sense here,
984 		 * but see comment in push_stack(), hence:
985 		 */
986 		WARN_ONCE((int)br < 0,
987 			  "BUG update_branch_counts:branches_to_explore=%d\n",
988 			  br);
989 		if (br)
990 			break;
991 		st = st->parent;
992 	}
993 }
994 
995 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
996 		     int *insn_idx, bool pop_log)
997 {
998 	struct bpf_verifier_state *cur = env->cur_state;
999 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1000 	int err;
1001 
1002 	if (env->head == NULL)
1003 		return -ENOENT;
1004 
1005 	if (cur) {
1006 		err = copy_verifier_state(cur, &head->st);
1007 		if (err)
1008 			return err;
1009 	}
1010 	if (pop_log)
1011 		bpf_vlog_reset(&env->log, head->log_pos);
1012 	if (insn_idx)
1013 		*insn_idx = head->insn_idx;
1014 	if (prev_insn_idx)
1015 		*prev_insn_idx = head->prev_insn_idx;
1016 	elem = head->next;
1017 	free_verifier_state(&head->st, false);
1018 	kfree(head);
1019 	env->head = elem;
1020 	env->stack_size--;
1021 	return 0;
1022 }
1023 
1024 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1025 					     int insn_idx, int prev_insn_idx,
1026 					     bool speculative)
1027 {
1028 	struct bpf_verifier_state *cur = env->cur_state;
1029 	struct bpf_verifier_stack_elem *elem;
1030 	int err;
1031 
1032 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1033 	if (!elem)
1034 		goto err;
1035 
1036 	elem->insn_idx = insn_idx;
1037 	elem->prev_insn_idx = prev_insn_idx;
1038 	elem->next = env->head;
1039 	elem->log_pos = env->log.len_used;
1040 	env->head = elem;
1041 	env->stack_size++;
1042 	err = copy_verifier_state(&elem->st, cur);
1043 	if (err)
1044 		goto err;
1045 	elem->st.speculative |= speculative;
1046 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1047 		verbose(env, "The sequence of %d jumps is too complex.\n",
1048 			env->stack_size);
1049 		goto err;
1050 	}
1051 	if (elem->st.parent) {
1052 		++elem->st.parent->branches;
1053 		/* WARN_ON(branches > 2) technically makes sense here,
1054 		 * but
1055 		 * 1. speculative states will bump 'branches' for non-branch
1056 		 * instructions
1057 		 * 2. is_state_visited() heuristics may decide not to create
1058 		 * a new state for a sequence of branches and all such current
1059 		 * and cloned states will be pointing to a single parent state
1060 		 * which might have large 'branches' count.
1061 		 */
1062 	}
1063 	return &elem->st;
1064 err:
1065 	free_verifier_state(env->cur_state, true);
1066 	env->cur_state = NULL;
1067 	/* pop all elements and return */
1068 	while (!pop_stack(env, NULL, NULL, false));
1069 	return NULL;
1070 }
1071 
1072 #define CALLER_SAVED_REGS 6
1073 static const int caller_saved[CALLER_SAVED_REGS] = {
1074 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1075 };
1076 
1077 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1078 				struct bpf_reg_state *reg);
1079 
1080 /* This helper doesn't clear reg->id */
1081 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1082 {
1083 	reg->var_off = tnum_const(imm);
1084 	reg->smin_value = (s64)imm;
1085 	reg->smax_value = (s64)imm;
1086 	reg->umin_value = imm;
1087 	reg->umax_value = imm;
1088 
1089 	reg->s32_min_value = (s32)imm;
1090 	reg->s32_max_value = (s32)imm;
1091 	reg->u32_min_value = (u32)imm;
1092 	reg->u32_max_value = (u32)imm;
1093 }
1094 
1095 /* Mark the unknown part of a register (variable offset or scalar value) as
1096  * known to have the value @imm.
1097  */
1098 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1099 {
1100 	/* Clear id, off, and union(map_ptr, range) */
1101 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1102 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1103 	___mark_reg_known(reg, imm);
1104 }
1105 
1106 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1107 {
1108 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1109 	reg->s32_min_value = (s32)imm;
1110 	reg->s32_max_value = (s32)imm;
1111 	reg->u32_min_value = (u32)imm;
1112 	reg->u32_max_value = (u32)imm;
1113 }
1114 
1115 /* Mark the 'variable offset' part of a register as zero.  This should be
1116  * used only on registers holding a pointer type.
1117  */
1118 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1119 {
1120 	__mark_reg_known(reg, 0);
1121 }
1122 
1123 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1124 {
1125 	__mark_reg_known(reg, 0);
1126 	reg->type = SCALAR_VALUE;
1127 }
1128 
1129 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1130 				struct bpf_reg_state *regs, u32 regno)
1131 {
1132 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1133 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1134 		/* Something bad happened, let's kill all regs */
1135 		for (regno = 0; regno < MAX_BPF_REG; regno++)
1136 			__mark_reg_not_init(env, regs + regno);
1137 		return;
1138 	}
1139 	__mark_reg_known_zero(regs + regno);
1140 }
1141 
1142 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1143 {
1144 	switch (reg->type) {
1145 	case PTR_TO_MAP_VALUE_OR_NULL: {
1146 		const struct bpf_map *map = reg->map_ptr;
1147 
1148 		if (map->inner_map_meta) {
1149 			reg->type = CONST_PTR_TO_MAP;
1150 			reg->map_ptr = map->inner_map_meta;
1151 			/* transfer reg's id which is unique for every map_lookup_elem
1152 			 * as UID of the inner map.
1153 			 */
1154 			reg->map_uid = reg->id;
1155 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1156 			reg->type = PTR_TO_XDP_SOCK;
1157 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1158 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1159 			reg->type = PTR_TO_SOCKET;
1160 		} else {
1161 			reg->type = PTR_TO_MAP_VALUE;
1162 		}
1163 		break;
1164 	}
1165 	case PTR_TO_SOCKET_OR_NULL:
1166 		reg->type = PTR_TO_SOCKET;
1167 		break;
1168 	case PTR_TO_SOCK_COMMON_OR_NULL:
1169 		reg->type = PTR_TO_SOCK_COMMON;
1170 		break;
1171 	case PTR_TO_TCP_SOCK_OR_NULL:
1172 		reg->type = PTR_TO_TCP_SOCK;
1173 		break;
1174 	case PTR_TO_BTF_ID_OR_NULL:
1175 		reg->type = PTR_TO_BTF_ID;
1176 		break;
1177 	case PTR_TO_MEM_OR_NULL:
1178 		reg->type = PTR_TO_MEM;
1179 		break;
1180 	case PTR_TO_RDONLY_BUF_OR_NULL:
1181 		reg->type = PTR_TO_RDONLY_BUF;
1182 		break;
1183 	case PTR_TO_RDWR_BUF_OR_NULL:
1184 		reg->type = PTR_TO_RDWR_BUF;
1185 		break;
1186 	default:
1187 		WARN_ONCE(1, "unknown nullable register type");
1188 	}
1189 }
1190 
1191 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1192 {
1193 	return type_is_pkt_pointer(reg->type);
1194 }
1195 
1196 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1197 {
1198 	return reg_is_pkt_pointer(reg) ||
1199 	       reg->type == PTR_TO_PACKET_END;
1200 }
1201 
1202 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1203 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1204 				    enum bpf_reg_type which)
1205 {
1206 	/* The register can already have a range from prior markings.
1207 	 * This is fine as long as it hasn't been advanced from its
1208 	 * origin.
1209 	 */
1210 	return reg->type == which &&
1211 	       reg->id == 0 &&
1212 	       reg->off == 0 &&
1213 	       tnum_equals_const(reg->var_off, 0);
1214 }
1215 
1216 /* Reset the min/max bounds of a register */
1217 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1218 {
1219 	reg->smin_value = S64_MIN;
1220 	reg->smax_value = S64_MAX;
1221 	reg->umin_value = 0;
1222 	reg->umax_value = U64_MAX;
1223 
1224 	reg->s32_min_value = S32_MIN;
1225 	reg->s32_max_value = S32_MAX;
1226 	reg->u32_min_value = 0;
1227 	reg->u32_max_value = U32_MAX;
1228 }
1229 
1230 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1231 {
1232 	reg->smin_value = S64_MIN;
1233 	reg->smax_value = S64_MAX;
1234 	reg->umin_value = 0;
1235 	reg->umax_value = U64_MAX;
1236 }
1237 
1238 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1239 {
1240 	reg->s32_min_value = S32_MIN;
1241 	reg->s32_max_value = S32_MAX;
1242 	reg->u32_min_value = 0;
1243 	reg->u32_max_value = U32_MAX;
1244 }
1245 
1246 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1247 {
1248 	struct tnum var32_off = tnum_subreg(reg->var_off);
1249 
1250 	/* min signed is max(sign bit) | min(other bits) */
1251 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1252 			var32_off.value | (var32_off.mask & S32_MIN));
1253 	/* max signed is min(sign bit) | max(other bits) */
1254 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1255 			var32_off.value | (var32_off.mask & S32_MAX));
1256 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1257 	reg->u32_max_value = min(reg->u32_max_value,
1258 				 (u32)(var32_off.value | var32_off.mask));
1259 }
1260 
1261 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1262 {
1263 	/* min signed is max(sign bit) | min(other bits) */
1264 	reg->smin_value = max_t(s64, reg->smin_value,
1265 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1266 	/* max signed is min(sign bit) | max(other bits) */
1267 	reg->smax_value = min_t(s64, reg->smax_value,
1268 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1269 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1270 	reg->umax_value = min(reg->umax_value,
1271 			      reg->var_off.value | reg->var_off.mask);
1272 }
1273 
1274 static void __update_reg_bounds(struct bpf_reg_state *reg)
1275 {
1276 	__update_reg32_bounds(reg);
1277 	__update_reg64_bounds(reg);
1278 }
1279 
1280 /* Uses signed min/max values to inform unsigned, and vice-versa */
1281 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1282 {
1283 	/* Learn sign from signed bounds.
1284 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1285 	 * are the same, so combine.  This works even in the negative case, e.g.
1286 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1287 	 */
1288 	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1289 		reg->s32_min_value = reg->u32_min_value =
1290 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1291 		reg->s32_max_value = reg->u32_max_value =
1292 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1293 		return;
1294 	}
1295 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1296 	 * boundary, so we must be careful.
1297 	 */
1298 	if ((s32)reg->u32_max_value >= 0) {
1299 		/* Positive.  We can't learn anything from the smin, but smax
1300 		 * is positive, hence safe.
1301 		 */
1302 		reg->s32_min_value = reg->u32_min_value;
1303 		reg->s32_max_value = reg->u32_max_value =
1304 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1305 	} else if ((s32)reg->u32_min_value < 0) {
1306 		/* Negative.  We can't learn anything from the smax, but smin
1307 		 * is negative, hence safe.
1308 		 */
1309 		reg->s32_min_value = reg->u32_min_value =
1310 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1311 		reg->s32_max_value = reg->u32_max_value;
1312 	}
1313 }
1314 
1315 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1316 {
1317 	/* Learn sign from signed bounds.
1318 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1319 	 * are the same, so combine.  This works even in the negative case, e.g.
1320 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1321 	 */
1322 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
1323 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1324 							  reg->umin_value);
1325 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1326 							  reg->umax_value);
1327 		return;
1328 	}
1329 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1330 	 * boundary, so we must be careful.
1331 	 */
1332 	if ((s64)reg->umax_value >= 0) {
1333 		/* Positive.  We can't learn anything from the smin, but smax
1334 		 * is positive, hence safe.
1335 		 */
1336 		reg->smin_value = reg->umin_value;
1337 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1338 							  reg->umax_value);
1339 	} else if ((s64)reg->umin_value < 0) {
1340 		/* Negative.  We can't learn anything from the smax, but smin
1341 		 * is negative, hence safe.
1342 		 */
1343 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1344 							  reg->umin_value);
1345 		reg->smax_value = reg->umax_value;
1346 	}
1347 }
1348 
1349 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1350 {
1351 	__reg32_deduce_bounds(reg);
1352 	__reg64_deduce_bounds(reg);
1353 }
1354 
1355 /* Attempts to improve var_off based on unsigned min/max information */
1356 static void __reg_bound_offset(struct bpf_reg_state *reg)
1357 {
1358 	struct tnum var64_off = tnum_intersect(reg->var_off,
1359 					       tnum_range(reg->umin_value,
1360 							  reg->umax_value));
1361 	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1362 						tnum_range(reg->u32_min_value,
1363 							   reg->u32_max_value));
1364 
1365 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1366 }
1367 
1368 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1369 {
1370 	reg->umin_value = reg->u32_min_value;
1371 	reg->umax_value = reg->u32_max_value;
1372 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds
1373 	 * but must be positive otherwise set to worse case bounds
1374 	 * and refine later from tnum.
1375 	 */
1376 	if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
1377 		reg->smax_value = reg->s32_max_value;
1378 	else
1379 		reg->smax_value = U32_MAX;
1380 	if (reg->s32_min_value >= 0)
1381 		reg->smin_value = reg->s32_min_value;
1382 	else
1383 		reg->smin_value = 0;
1384 }
1385 
1386 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1387 {
1388 	/* special case when 64-bit register has upper 32-bit register
1389 	 * zeroed. Typically happens after zext or <<32, >>32 sequence
1390 	 * allowing us to use 32-bit bounds directly,
1391 	 */
1392 	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1393 		__reg_assign_32_into_64(reg);
1394 	} else {
1395 		/* Otherwise the best we can do is push lower 32bit known and
1396 		 * unknown bits into register (var_off set from jmp logic)
1397 		 * then learn as much as possible from the 64-bit tnum
1398 		 * known and unknown bits. The previous smin/smax bounds are
1399 		 * invalid here because of jmp32 compare so mark them unknown
1400 		 * so they do not impact tnum bounds calculation.
1401 		 */
1402 		__mark_reg64_unbounded(reg);
1403 		__update_reg_bounds(reg);
1404 	}
1405 
1406 	/* Intersecting with the old var_off might have improved our bounds
1407 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1408 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1409 	 */
1410 	__reg_deduce_bounds(reg);
1411 	__reg_bound_offset(reg);
1412 	__update_reg_bounds(reg);
1413 }
1414 
1415 static bool __reg64_bound_s32(s64 a)
1416 {
1417 	return a >= S32_MIN && a <= S32_MAX;
1418 }
1419 
1420 static bool __reg64_bound_u32(u64 a)
1421 {
1422 	return a >= U32_MIN && a <= U32_MAX;
1423 }
1424 
1425 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1426 {
1427 	__mark_reg32_unbounded(reg);
1428 
1429 	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1430 		reg->s32_min_value = (s32)reg->smin_value;
1431 		reg->s32_max_value = (s32)reg->smax_value;
1432 	}
1433 	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1434 		reg->u32_min_value = (u32)reg->umin_value;
1435 		reg->u32_max_value = (u32)reg->umax_value;
1436 	}
1437 
1438 	/* Intersecting with the old var_off might have improved our bounds
1439 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1440 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1441 	 */
1442 	__reg_deduce_bounds(reg);
1443 	__reg_bound_offset(reg);
1444 	__update_reg_bounds(reg);
1445 }
1446 
1447 /* Mark a register as having a completely unknown (scalar) value. */
1448 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1449 			       struct bpf_reg_state *reg)
1450 {
1451 	/*
1452 	 * Clear type, id, off, and union(map_ptr, range) and
1453 	 * padding between 'type' and union
1454 	 */
1455 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1456 	reg->type = SCALAR_VALUE;
1457 	reg->var_off = tnum_unknown;
1458 	reg->frameno = 0;
1459 	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1460 	__mark_reg_unbounded(reg);
1461 }
1462 
1463 static void mark_reg_unknown(struct bpf_verifier_env *env,
1464 			     struct bpf_reg_state *regs, u32 regno)
1465 {
1466 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1467 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1468 		/* Something bad happened, let's kill all regs except FP */
1469 		for (regno = 0; regno < BPF_REG_FP; regno++)
1470 			__mark_reg_not_init(env, regs + regno);
1471 		return;
1472 	}
1473 	__mark_reg_unknown(env, regs + regno);
1474 }
1475 
1476 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1477 				struct bpf_reg_state *reg)
1478 {
1479 	__mark_reg_unknown(env, reg);
1480 	reg->type = NOT_INIT;
1481 }
1482 
1483 static void mark_reg_not_init(struct bpf_verifier_env *env,
1484 			      struct bpf_reg_state *regs, u32 regno)
1485 {
1486 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1487 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1488 		/* Something bad happened, let's kill all regs except FP */
1489 		for (regno = 0; regno < BPF_REG_FP; regno++)
1490 			__mark_reg_not_init(env, regs + regno);
1491 		return;
1492 	}
1493 	__mark_reg_not_init(env, regs + regno);
1494 }
1495 
1496 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1497 			    struct bpf_reg_state *regs, u32 regno,
1498 			    enum bpf_reg_type reg_type,
1499 			    struct btf *btf, u32 btf_id)
1500 {
1501 	if (reg_type == SCALAR_VALUE) {
1502 		mark_reg_unknown(env, regs, regno);
1503 		return;
1504 	}
1505 	mark_reg_known_zero(env, regs, regno);
1506 	regs[regno].type = PTR_TO_BTF_ID;
1507 	regs[regno].btf = btf;
1508 	regs[regno].btf_id = btf_id;
1509 }
1510 
1511 #define DEF_NOT_SUBREG	(0)
1512 static void init_reg_state(struct bpf_verifier_env *env,
1513 			   struct bpf_func_state *state)
1514 {
1515 	struct bpf_reg_state *regs = state->regs;
1516 	int i;
1517 
1518 	for (i = 0; i < MAX_BPF_REG; i++) {
1519 		mark_reg_not_init(env, regs, i);
1520 		regs[i].live = REG_LIVE_NONE;
1521 		regs[i].parent = NULL;
1522 		regs[i].subreg_def = DEF_NOT_SUBREG;
1523 	}
1524 
1525 	/* frame pointer */
1526 	regs[BPF_REG_FP].type = PTR_TO_STACK;
1527 	mark_reg_known_zero(env, regs, BPF_REG_FP);
1528 	regs[BPF_REG_FP].frameno = state->frameno;
1529 }
1530 
1531 #define BPF_MAIN_FUNC (-1)
1532 static void init_func_state(struct bpf_verifier_env *env,
1533 			    struct bpf_func_state *state,
1534 			    int callsite, int frameno, int subprogno)
1535 {
1536 	state->callsite = callsite;
1537 	state->frameno = frameno;
1538 	state->subprogno = subprogno;
1539 	init_reg_state(env, state);
1540 }
1541 
1542 /* Similar to push_stack(), but for async callbacks */
1543 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1544 						int insn_idx, int prev_insn_idx,
1545 						int subprog)
1546 {
1547 	struct bpf_verifier_stack_elem *elem;
1548 	struct bpf_func_state *frame;
1549 
1550 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1551 	if (!elem)
1552 		goto err;
1553 
1554 	elem->insn_idx = insn_idx;
1555 	elem->prev_insn_idx = prev_insn_idx;
1556 	elem->next = env->head;
1557 	elem->log_pos = env->log.len_used;
1558 	env->head = elem;
1559 	env->stack_size++;
1560 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1561 		verbose(env,
1562 			"The sequence of %d jumps is too complex for async cb.\n",
1563 			env->stack_size);
1564 		goto err;
1565 	}
1566 	/* Unlike push_stack() do not copy_verifier_state().
1567 	 * The caller state doesn't matter.
1568 	 * This is async callback. It starts in a fresh stack.
1569 	 * Initialize it similar to do_check_common().
1570 	 */
1571 	elem->st.branches = 1;
1572 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1573 	if (!frame)
1574 		goto err;
1575 	init_func_state(env, frame,
1576 			BPF_MAIN_FUNC /* callsite */,
1577 			0 /* frameno within this callchain */,
1578 			subprog /* subprog number within this prog */);
1579 	elem->st.frame[0] = frame;
1580 	return &elem->st;
1581 err:
1582 	free_verifier_state(env->cur_state, true);
1583 	env->cur_state = NULL;
1584 	/* pop all elements and return */
1585 	while (!pop_stack(env, NULL, NULL, false));
1586 	return NULL;
1587 }
1588 
1589 
1590 enum reg_arg_type {
1591 	SRC_OP,		/* register is used as source operand */
1592 	DST_OP,		/* register is used as destination operand */
1593 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1594 };
1595 
1596 static int cmp_subprogs(const void *a, const void *b)
1597 {
1598 	return ((struct bpf_subprog_info *)a)->start -
1599 	       ((struct bpf_subprog_info *)b)->start;
1600 }
1601 
1602 static int find_subprog(struct bpf_verifier_env *env, int off)
1603 {
1604 	struct bpf_subprog_info *p;
1605 
1606 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1607 		    sizeof(env->subprog_info[0]), cmp_subprogs);
1608 	if (!p)
1609 		return -ENOENT;
1610 	return p - env->subprog_info;
1611 
1612 }
1613 
1614 static int add_subprog(struct bpf_verifier_env *env, int off)
1615 {
1616 	int insn_cnt = env->prog->len;
1617 	int ret;
1618 
1619 	if (off >= insn_cnt || off < 0) {
1620 		verbose(env, "call to invalid destination\n");
1621 		return -EINVAL;
1622 	}
1623 	ret = find_subprog(env, off);
1624 	if (ret >= 0)
1625 		return ret;
1626 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1627 		verbose(env, "too many subprograms\n");
1628 		return -E2BIG;
1629 	}
1630 	/* determine subprog starts. The end is one before the next starts */
1631 	env->subprog_info[env->subprog_cnt++].start = off;
1632 	sort(env->subprog_info, env->subprog_cnt,
1633 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1634 	return env->subprog_cnt - 1;
1635 }
1636 
1637 #define MAX_KFUNC_DESCS 256
1638 #define MAX_KFUNC_BTFS	256
1639 
1640 struct bpf_kfunc_desc {
1641 	struct btf_func_model func_model;
1642 	u32 func_id;
1643 	s32 imm;
1644 	u16 offset;
1645 };
1646 
1647 struct bpf_kfunc_btf {
1648 	struct btf *btf;
1649 	struct module *module;
1650 	u16 offset;
1651 };
1652 
1653 struct bpf_kfunc_desc_tab {
1654 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1655 	u32 nr_descs;
1656 };
1657 
1658 struct bpf_kfunc_btf_tab {
1659 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1660 	u32 nr_descs;
1661 };
1662 
1663 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1664 {
1665 	const struct bpf_kfunc_desc *d0 = a;
1666 	const struct bpf_kfunc_desc *d1 = b;
1667 
1668 	/* func_id is not greater than BTF_MAX_TYPE */
1669 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1670 }
1671 
1672 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1673 {
1674 	const struct bpf_kfunc_btf *d0 = a;
1675 	const struct bpf_kfunc_btf *d1 = b;
1676 
1677 	return d0->offset - d1->offset;
1678 }
1679 
1680 static const struct bpf_kfunc_desc *
1681 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1682 {
1683 	struct bpf_kfunc_desc desc = {
1684 		.func_id = func_id,
1685 		.offset = offset,
1686 	};
1687 	struct bpf_kfunc_desc_tab *tab;
1688 
1689 	tab = prog->aux->kfunc_tab;
1690 	return bsearch(&desc, tab->descs, tab->nr_descs,
1691 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1692 }
1693 
1694 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1695 					 s16 offset, struct module **btf_modp)
1696 {
1697 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
1698 	struct bpf_kfunc_btf_tab *tab;
1699 	struct bpf_kfunc_btf *b;
1700 	struct module *mod;
1701 	struct btf *btf;
1702 	int btf_fd;
1703 
1704 	tab = env->prog->aux->kfunc_btf_tab;
1705 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1706 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1707 	if (!b) {
1708 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
1709 			verbose(env, "too many different module BTFs\n");
1710 			return ERR_PTR(-E2BIG);
1711 		}
1712 
1713 		if (bpfptr_is_null(env->fd_array)) {
1714 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1715 			return ERR_PTR(-EPROTO);
1716 		}
1717 
1718 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1719 					    offset * sizeof(btf_fd),
1720 					    sizeof(btf_fd)))
1721 			return ERR_PTR(-EFAULT);
1722 
1723 		btf = btf_get_by_fd(btf_fd);
1724 		if (IS_ERR(btf)) {
1725 			verbose(env, "invalid module BTF fd specified\n");
1726 			return btf;
1727 		}
1728 
1729 		if (!btf_is_module(btf)) {
1730 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
1731 			btf_put(btf);
1732 			return ERR_PTR(-EINVAL);
1733 		}
1734 
1735 		mod = btf_try_get_module(btf);
1736 		if (!mod) {
1737 			btf_put(btf);
1738 			return ERR_PTR(-ENXIO);
1739 		}
1740 
1741 		b = &tab->descs[tab->nr_descs++];
1742 		b->btf = btf;
1743 		b->module = mod;
1744 		b->offset = offset;
1745 
1746 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1747 		     kfunc_btf_cmp_by_off, NULL);
1748 	}
1749 	if (btf_modp)
1750 		*btf_modp = b->module;
1751 	return b->btf;
1752 }
1753 
1754 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1755 {
1756 	if (!tab)
1757 		return;
1758 
1759 	while (tab->nr_descs--) {
1760 		module_put(tab->descs[tab->nr_descs].module);
1761 		btf_put(tab->descs[tab->nr_descs].btf);
1762 	}
1763 	kfree(tab);
1764 }
1765 
1766 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
1767 				       u32 func_id, s16 offset,
1768 				       struct module **btf_modp)
1769 {
1770 	if (offset) {
1771 		if (offset < 0) {
1772 			/* In the future, this can be allowed to increase limit
1773 			 * of fd index into fd_array, interpreted as u16.
1774 			 */
1775 			verbose(env, "negative offset disallowed for kernel module function call\n");
1776 			return ERR_PTR(-EINVAL);
1777 		}
1778 
1779 		return __find_kfunc_desc_btf(env, offset, btf_modp);
1780 	}
1781 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
1782 }
1783 
1784 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1785 {
1786 	const struct btf_type *func, *func_proto;
1787 	struct bpf_kfunc_btf_tab *btf_tab;
1788 	struct bpf_kfunc_desc_tab *tab;
1789 	struct bpf_prog_aux *prog_aux;
1790 	struct bpf_kfunc_desc *desc;
1791 	const char *func_name;
1792 	struct btf *desc_btf;
1793 	unsigned long addr;
1794 	int err;
1795 
1796 	prog_aux = env->prog->aux;
1797 	tab = prog_aux->kfunc_tab;
1798 	btf_tab = prog_aux->kfunc_btf_tab;
1799 	if (!tab) {
1800 		if (!btf_vmlinux) {
1801 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1802 			return -ENOTSUPP;
1803 		}
1804 
1805 		if (!env->prog->jit_requested) {
1806 			verbose(env, "JIT is required for calling kernel function\n");
1807 			return -ENOTSUPP;
1808 		}
1809 
1810 		if (!bpf_jit_supports_kfunc_call()) {
1811 			verbose(env, "JIT does not support calling kernel function\n");
1812 			return -ENOTSUPP;
1813 		}
1814 
1815 		if (!env->prog->gpl_compatible) {
1816 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1817 			return -EINVAL;
1818 		}
1819 
1820 		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1821 		if (!tab)
1822 			return -ENOMEM;
1823 		prog_aux->kfunc_tab = tab;
1824 	}
1825 
1826 	/* func_id == 0 is always invalid, but instead of returning an error, be
1827 	 * conservative and wait until the code elimination pass before returning
1828 	 * error, so that invalid calls that get pruned out can be in BPF programs
1829 	 * loaded from userspace.  It is also required that offset be untouched
1830 	 * for such calls.
1831 	 */
1832 	if (!func_id && !offset)
1833 		return 0;
1834 
1835 	if (!btf_tab && offset) {
1836 		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1837 		if (!btf_tab)
1838 			return -ENOMEM;
1839 		prog_aux->kfunc_btf_tab = btf_tab;
1840 	}
1841 
1842 	desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
1843 	if (IS_ERR(desc_btf)) {
1844 		verbose(env, "failed to find BTF for kernel function\n");
1845 		return PTR_ERR(desc_btf);
1846 	}
1847 
1848 	if (find_kfunc_desc(env->prog, func_id, offset))
1849 		return 0;
1850 
1851 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
1852 		verbose(env, "too many different kernel function calls\n");
1853 		return -E2BIG;
1854 	}
1855 
1856 	func = btf_type_by_id(desc_btf, func_id);
1857 	if (!func || !btf_type_is_func(func)) {
1858 		verbose(env, "kernel btf_id %u is not a function\n",
1859 			func_id);
1860 		return -EINVAL;
1861 	}
1862 	func_proto = btf_type_by_id(desc_btf, func->type);
1863 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1864 		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1865 			func_id);
1866 		return -EINVAL;
1867 	}
1868 
1869 	func_name = btf_name_by_offset(desc_btf, func->name_off);
1870 	addr = kallsyms_lookup_name(func_name);
1871 	if (!addr) {
1872 		verbose(env, "cannot find address for kernel function %s\n",
1873 			func_name);
1874 		return -EINVAL;
1875 	}
1876 
1877 	desc = &tab->descs[tab->nr_descs++];
1878 	desc->func_id = func_id;
1879 	desc->imm = BPF_CALL_IMM(addr);
1880 	desc->offset = offset;
1881 	err = btf_distill_func_proto(&env->log, desc_btf,
1882 				     func_proto, func_name,
1883 				     &desc->func_model);
1884 	if (!err)
1885 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1886 		     kfunc_desc_cmp_by_id_off, NULL);
1887 	return err;
1888 }
1889 
1890 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1891 {
1892 	const struct bpf_kfunc_desc *d0 = a;
1893 	const struct bpf_kfunc_desc *d1 = b;
1894 
1895 	if (d0->imm > d1->imm)
1896 		return 1;
1897 	else if (d0->imm < d1->imm)
1898 		return -1;
1899 	return 0;
1900 }
1901 
1902 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1903 {
1904 	struct bpf_kfunc_desc_tab *tab;
1905 
1906 	tab = prog->aux->kfunc_tab;
1907 	if (!tab)
1908 		return;
1909 
1910 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1911 	     kfunc_desc_cmp_by_imm, NULL);
1912 }
1913 
1914 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1915 {
1916 	return !!prog->aux->kfunc_tab;
1917 }
1918 
1919 const struct btf_func_model *
1920 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1921 			 const struct bpf_insn *insn)
1922 {
1923 	const struct bpf_kfunc_desc desc = {
1924 		.imm = insn->imm,
1925 	};
1926 	const struct bpf_kfunc_desc *res;
1927 	struct bpf_kfunc_desc_tab *tab;
1928 
1929 	tab = prog->aux->kfunc_tab;
1930 	res = bsearch(&desc, tab->descs, tab->nr_descs,
1931 		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1932 
1933 	return res ? &res->func_model : NULL;
1934 }
1935 
1936 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
1937 {
1938 	struct bpf_subprog_info *subprog = env->subprog_info;
1939 	struct bpf_insn *insn = env->prog->insnsi;
1940 	int i, ret, insn_cnt = env->prog->len;
1941 
1942 	/* Add entry function. */
1943 	ret = add_subprog(env, 0);
1944 	if (ret)
1945 		return ret;
1946 
1947 	for (i = 0; i < insn_cnt; i++, insn++) {
1948 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
1949 		    !bpf_pseudo_kfunc_call(insn))
1950 			continue;
1951 
1952 		if (!env->bpf_capable) {
1953 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1954 			return -EPERM;
1955 		}
1956 
1957 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
1958 			ret = add_subprog(env, i + insn->imm + 1);
1959 		else
1960 			ret = add_kfunc_call(env, insn->imm, insn->off);
1961 
1962 		if (ret < 0)
1963 			return ret;
1964 	}
1965 
1966 	/* Add a fake 'exit' subprog which could simplify subprog iteration
1967 	 * logic. 'subprog_cnt' should not be increased.
1968 	 */
1969 	subprog[env->subprog_cnt].start = insn_cnt;
1970 
1971 	if (env->log.level & BPF_LOG_LEVEL2)
1972 		for (i = 0; i < env->subprog_cnt; i++)
1973 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1974 
1975 	return 0;
1976 }
1977 
1978 static int check_subprogs(struct bpf_verifier_env *env)
1979 {
1980 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
1981 	struct bpf_subprog_info *subprog = env->subprog_info;
1982 	struct bpf_insn *insn = env->prog->insnsi;
1983 	int insn_cnt = env->prog->len;
1984 
1985 	/* now check that all jumps are within the same subprog */
1986 	subprog_start = subprog[cur_subprog].start;
1987 	subprog_end = subprog[cur_subprog + 1].start;
1988 	for (i = 0; i < insn_cnt; i++) {
1989 		u8 code = insn[i].code;
1990 
1991 		if (code == (BPF_JMP | BPF_CALL) &&
1992 		    insn[i].imm == BPF_FUNC_tail_call &&
1993 		    insn[i].src_reg != BPF_PSEUDO_CALL)
1994 			subprog[cur_subprog].has_tail_call = true;
1995 		if (BPF_CLASS(code) == BPF_LD &&
1996 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1997 			subprog[cur_subprog].has_ld_abs = true;
1998 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1999 			goto next;
2000 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2001 			goto next;
2002 		off = i + insn[i].off + 1;
2003 		if (off < subprog_start || off >= subprog_end) {
2004 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2005 			return -EINVAL;
2006 		}
2007 next:
2008 		if (i == subprog_end - 1) {
2009 			/* to avoid fall-through from one subprog into another
2010 			 * the last insn of the subprog should be either exit
2011 			 * or unconditional jump back
2012 			 */
2013 			if (code != (BPF_JMP | BPF_EXIT) &&
2014 			    code != (BPF_JMP | BPF_JA)) {
2015 				verbose(env, "last insn is not an exit or jmp\n");
2016 				return -EINVAL;
2017 			}
2018 			subprog_start = subprog_end;
2019 			cur_subprog++;
2020 			if (cur_subprog < env->subprog_cnt)
2021 				subprog_end = subprog[cur_subprog + 1].start;
2022 		}
2023 	}
2024 	return 0;
2025 }
2026 
2027 /* Parentage chain of this register (or stack slot) should take care of all
2028  * issues like callee-saved registers, stack slot allocation time, etc.
2029  */
2030 static int mark_reg_read(struct bpf_verifier_env *env,
2031 			 const struct bpf_reg_state *state,
2032 			 struct bpf_reg_state *parent, u8 flag)
2033 {
2034 	bool writes = parent == state->parent; /* Observe write marks */
2035 	int cnt = 0;
2036 
2037 	while (parent) {
2038 		/* if read wasn't screened by an earlier write ... */
2039 		if (writes && state->live & REG_LIVE_WRITTEN)
2040 			break;
2041 		if (parent->live & REG_LIVE_DONE) {
2042 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2043 				reg_type_str[parent->type],
2044 				parent->var_off.value, parent->off);
2045 			return -EFAULT;
2046 		}
2047 		/* The first condition is more likely to be true than the
2048 		 * second, checked it first.
2049 		 */
2050 		if ((parent->live & REG_LIVE_READ) == flag ||
2051 		    parent->live & REG_LIVE_READ64)
2052 			/* The parentage chain never changes and
2053 			 * this parent was already marked as LIVE_READ.
2054 			 * There is no need to keep walking the chain again and
2055 			 * keep re-marking all parents as LIVE_READ.
2056 			 * This case happens when the same register is read
2057 			 * multiple times without writes into it in-between.
2058 			 * Also, if parent has the stronger REG_LIVE_READ64 set,
2059 			 * then no need to set the weak REG_LIVE_READ32.
2060 			 */
2061 			break;
2062 		/* ... then we depend on parent's value */
2063 		parent->live |= flag;
2064 		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2065 		if (flag == REG_LIVE_READ64)
2066 			parent->live &= ~REG_LIVE_READ32;
2067 		state = parent;
2068 		parent = state->parent;
2069 		writes = true;
2070 		cnt++;
2071 	}
2072 
2073 	if (env->longest_mark_read_walk < cnt)
2074 		env->longest_mark_read_walk = cnt;
2075 	return 0;
2076 }
2077 
2078 /* This function is supposed to be used by the following 32-bit optimization
2079  * code only. It returns TRUE if the source or destination register operates
2080  * on 64-bit, otherwise return FALSE.
2081  */
2082 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2083 		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2084 {
2085 	u8 code, class, op;
2086 
2087 	code = insn->code;
2088 	class = BPF_CLASS(code);
2089 	op = BPF_OP(code);
2090 	if (class == BPF_JMP) {
2091 		/* BPF_EXIT for "main" will reach here. Return TRUE
2092 		 * conservatively.
2093 		 */
2094 		if (op == BPF_EXIT)
2095 			return true;
2096 		if (op == BPF_CALL) {
2097 			/* BPF to BPF call will reach here because of marking
2098 			 * caller saved clobber with DST_OP_NO_MARK for which we
2099 			 * don't care the register def because they are anyway
2100 			 * marked as NOT_INIT already.
2101 			 */
2102 			if (insn->src_reg == BPF_PSEUDO_CALL)
2103 				return false;
2104 			/* Helper call will reach here because of arg type
2105 			 * check, conservatively return TRUE.
2106 			 */
2107 			if (t == SRC_OP)
2108 				return true;
2109 
2110 			return false;
2111 		}
2112 	}
2113 
2114 	if (class == BPF_ALU64 || class == BPF_JMP ||
2115 	    /* BPF_END always use BPF_ALU class. */
2116 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2117 		return true;
2118 
2119 	if (class == BPF_ALU || class == BPF_JMP32)
2120 		return false;
2121 
2122 	if (class == BPF_LDX) {
2123 		if (t != SRC_OP)
2124 			return BPF_SIZE(code) == BPF_DW;
2125 		/* LDX source must be ptr. */
2126 		return true;
2127 	}
2128 
2129 	if (class == BPF_STX) {
2130 		/* BPF_STX (including atomic variants) has multiple source
2131 		 * operands, one of which is a ptr. Check whether the caller is
2132 		 * asking about it.
2133 		 */
2134 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
2135 			return true;
2136 		return BPF_SIZE(code) == BPF_DW;
2137 	}
2138 
2139 	if (class == BPF_LD) {
2140 		u8 mode = BPF_MODE(code);
2141 
2142 		/* LD_IMM64 */
2143 		if (mode == BPF_IMM)
2144 			return true;
2145 
2146 		/* Both LD_IND and LD_ABS return 32-bit data. */
2147 		if (t != SRC_OP)
2148 			return  false;
2149 
2150 		/* Implicit ctx ptr. */
2151 		if (regno == BPF_REG_6)
2152 			return true;
2153 
2154 		/* Explicit source could be any width. */
2155 		return true;
2156 	}
2157 
2158 	if (class == BPF_ST)
2159 		/* The only source register for BPF_ST is a ptr. */
2160 		return true;
2161 
2162 	/* Conservatively return true at default. */
2163 	return true;
2164 }
2165 
2166 /* Return the regno defined by the insn, or -1. */
2167 static int insn_def_regno(const struct bpf_insn *insn)
2168 {
2169 	switch (BPF_CLASS(insn->code)) {
2170 	case BPF_JMP:
2171 	case BPF_JMP32:
2172 	case BPF_ST:
2173 		return -1;
2174 	case BPF_STX:
2175 		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2176 		    (insn->imm & BPF_FETCH)) {
2177 			if (insn->imm == BPF_CMPXCHG)
2178 				return BPF_REG_0;
2179 			else
2180 				return insn->src_reg;
2181 		} else {
2182 			return -1;
2183 		}
2184 	default:
2185 		return insn->dst_reg;
2186 	}
2187 }
2188 
2189 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2190 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2191 {
2192 	int dst_reg = insn_def_regno(insn);
2193 
2194 	if (dst_reg == -1)
2195 		return false;
2196 
2197 	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2198 }
2199 
2200 static void mark_insn_zext(struct bpf_verifier_env *env,
2201 			   struct bpf_reg_state *reg)
2202 {
2203 	s32 def_idx = reg->subreg_def;
2204 
2205 	if (def_idx == DEF_NOT_SUBREG)
2206 		return;
2207 
2208 	env->insn_aux_data[def_idx - 1].zext_dst = true;
2209 	/* The dst will be zero extended, so won't be sub-register anymore. */
2210 	reg->subreg_def = DEF_NOT_SUBREG;
2211 }
2212 
2213 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2214 			 enum reg_arg_type t)
2215 {
2216 	struct bpf_verifier_state *vstate = env->cur_state;
2217 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2218 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2219 	struct bpf_reg_state *reg, *regs = state->regs;
2220 	bool rw64;
2221 
2222 	if (regno >= MAX_BPF_REG) {
2223 		verbose(env, "R%d is invalid\n", regno);
2224 		return -EINVAL;
2225 	}
2226 
2227 	reg = &regs[regno];
2228 	rw64 = is_reg64(env, insn, regno, reg, t);
2229 	if (t == SRC_OP) {
2230 		/* check whether register used as source operand can be read */
2231 		if (reg->type == NOT_INIT) {
2232 			verbose(env, "R%d !read_ok\n", regno);
2233 			return -EACCES;
2234 		}
2235 		/* We don't need to worry about FP liveness because it's read-only */
2236 		if (regno == BPF_REG_FP)
2237 			return 0;
2238 
2239 		if (rw64)
2240 			mark_insn_zext(env, reg);
2241 
2242 		return mark_reg_read(env, reg, reg->parent,
2243 				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2244 	} else {
2245 		/* check whether register used as dest operand can be written to */
2246 		if (regno == BPF_REG_FP) {
2247 			verbose(env, "frame pointer is read only\n");
2248 			return -EACCES;
2249 		}
2250 		reg->live |= REG_LIVE_WRITTEN;
2251 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2252 		if (t == DST_OP)
2253 			mark_reg_unknown(env, regs, regno);
2254 	}
2255 	return 0;
2256 }
2257 
2258 /* for any branch, call, exit record the history of jmps in the given state */
2259 static int push_jmp_history(struct bpf_verifier_env *env,
2260 			    struct bpf_verifier_state *cur)
2261 {
2262 	u32 cnt = cur->jmp_history_cnt;
2263 	struct bpf_idx_pair *p;
2264 
2265 	cnt++;
2266 	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2267 	if (!p)
2268 		return -ENOMEM;
2269 	p[cnt - 1].idx = env->insn_idx;
2270 	p[cnt - 1].prev_idx = env->prev_insn_idx;
2271 	cur->jmp_history = p;
2272 	cur->jmp_history_cnt = cnt;
2273 	return 0;
2274 }
2275 
2276 /* Backtrack one insn at a time. If idx is not at the top of recorded
2277  * history then previous instruction came from straight line execution.
2278  */
2279 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2280 			     u32 *history)
2281 {
2282 	u32 cnt = *history;
2283 
2284 	if (cnt && st->jmp_history[cnt - 1].idx == i) {
2285 		i = st->jmp_history[cnt - 1].prev_idx;
2286 		(*history)--;
2287 	} else {
2288 		i--;
2289 	}
2290 	return i;
2291 }
2292 
2293 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2294 {
2295 	const struct btf_type *func;
2296 	struct btf *desc_btf;
2297 
2298 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2299 		return NULL;
2300 
2301 	desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
2302 	if (IS_ERR(desc_btf))
2303 		return "<error>";
2304 
2305 	func = btf_type_by_id(desc_btf, insn->imm);
2306 	return btf_name_by_offset(desc_btf, func->name_off);
2307 }
2308 
2309 /* For given verifier state backtrack_insn() is called from the last insn to
2310  * the first insn. Its purpose is to compute a bitmask of registers and
2311  * stack slots that needs precision in the parent verifier state.
2312  */
2313 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2314 			  u32 *reg_mask, u64 *stack_mask)
2315 {
2316 	const struct bpf_insn_cbs cbs = {
2317 		.cb_call	= disasm_kfunc_name,
2318 		.cb_print	= verbose,
2319 		.private_data	= env,
2320 	};
2321 	struct bpf_insn *insn = env->prog->insnsi + idx;
2322 	u8 class = BPF_CLASS(insn->code);
2323 	u8 opcode = BPF_OP(insn->code);
2324 	u8 mode = BPF_MODE(insn->code);
2325 	u32 dreg = 1u << insn->dst_reg;
2326 	u32 sreg = 1u << insn->src_reg;
2327 	u32 spi;
2328 
2329 	if (insn->code == 0)
2330 		return 0;
2331 	if (env->log.level & BPF_LOG_LEVEL) {
2332 		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2333 		verbose(env, "%d: ", idx);
2334 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2335 	}
2336 
2337 	if (class == BPF_ALU || class == BPF_ALU64) {
2338 		if (!(*reg_mask & dreg))
2339 			return 0;
2340 		if (opcode == BPF_MOV) {
2341 			if (BPF_SRC(insn->code) == BPF_X) {
2342 				/* dreg = sreg
2343 				 * dreg needs precision after this insn
2344 				 * sreg needs precision before this insn
2345 				 */
2346 				*reg_mask &= ~dreg;
2347 				*reg_mask |= sreg;
2348 			} else {
2349 				/* dreg = K
2350 				 * dreg needs precision after this insn.
2351 				 * Corresponding register is already marked
2352 				 * as precise=true in this verifier state.
2353 				 * No further markings in parent are necessary
2354 				 */
2355 				*reg_mask &= ~dreg;
2356 			}
2357 		} else {
2358 			if (BPF_SRC(insn->code) == BPF_X) {
2359 				/* dreg += sreg
2360 				 * both dreg and sreg need precision
2361 				 * before this insn
2362 				 */
2363 				*reg_mask |= sreg;
2364 			} /* else dreg += K
2365 			   * dreg still needs precision before this insn
2366 			   */
2367 		}
2368 	} else if (class == BPF_LDX) {
2369 		if (!(*reg_mask & dreg))
2370 			return 0;
2371 		*reg_mask &= ~dreg;
2372 
2373 		/* scalars can only be spilled into stack w/o losing precision.
2374 		 * Load from any other memory can be zero extended.
2375 		 * The desire to keep that precision is already indicated
2376 		 * by 'precise' mark in corresponding register of this state.
2377 		 * No further tracking necessary.
2378 		 */
2379 		if (insn->src_reg != BPF_REG_FP)
2380 			return 0;
2381 		if (BPF_SIZE(insn->code) != BPF_DW)
2382 			return 0;
2383 
2384 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
2385 		 * that [fp - off] slot contains scalar that needs to be
2386 		 * tracked with precision
2387 		 */
2388 		spi = (-insn->off - 1) / BPF_REG_SIZE;
2389 		if (spi >= 64) {
2390 			verbose(env, "BUG spi %d\n", spi);
2391 			WARN_ONCE(1, "verifier backtracking bug");
2392 			return -EFAULT;
2393 		}
2394 		*stack_mask |= 1ull << spi;
2395 	} else if (class == BPF_STX || class == BPF_ST) {
2396 		if (*reg_mask & dreg)
2397 			/* stx & st shouldn't be using _scalar_ dst_reg
2398 			 * to access memory. It means backtracking
2399 			 * encountered a case of pointer subtraction.
2400 			 */
2401 			return -ENOTSUPP;
2402 		/* scalars can only be spilled into stack */
2403 		if (insn->dst_reg != BPF_REG_FP)
2404 			return 0;
2405 		if (BPF_SIZE(insn->code) != BPF_DW)
2406 			return 0;
2407 		spi = (-insn->off - 1) / BPF_REG_SIZE;
2408 		if (spi >= 64) {
2409 			verbose(env, "BUG spi %d\n", spi);
2410 			WARN_ONCE(1, "verifier backtracking bug");
2411 			return -EFAULT;
2412 		}
2413 		if (!(*stack_mask & (1ull << spi)))
2414 			return 0;
2415 		*stack_mask &= ~(1ull << spi);
2416 		if (class == BPF_STX)
2417 			*reg_mask |= sreg;
2418 	} else if (class == BPF_JMP || class == BPF_JMP32) {
2419 		if (opcode == BPF_CALL) {
2420 			if (insn->src_reg == BPF_PSEUDO_CALL)
2421 				return -ENOTSUPP;
2422 			/* regular helper call sets R0 */
2423 			*reg_mask &= ~1;
2424 			if (*reg_mask & 0x3f) {
2425 				/* if backtracing was looking for registers R1-R5
2426 				 * they should have been found already.
2427 				 */
2428 				verbose(env, "BUG regs %x\n", *reg_mask);
2429 				WARN_ONCE(1, "verifier backtracking bug");
2430 				return -EFAULT;
2431 			}
2432 		} else if (opcode == BPF_EXIT) {
2433 			return -ENOTSUPP;
2434 		}
2435 	} else if (class == BPF_LD) {
2436 		if (!(*reg_mask & dreg))
2437 			return 0;
2438 		*reg_mask &= ~dreg;
2439 		/* It's ld_imm64 or ld_abs or ld_ind.
2440 		 * For ld_imm64 no further tracking of precision
2441 		 * into parent is necessary
2442 		 */
2443 		if (mode == BPF_IND || mode == BPF_ABS)
2444 			/* to be analyzed */
2445 			return -ENOTSUPP;
2446 	}
2447 	return 0;
2448 }
2449 
2450 /* the scalar precision tracking algorithm:
2451  * . at the start all registers have precise=false.
2452  * . scalar ranges are tracked as normal through alu and jmp insns.
2453  * . once precise value of the scalar register is used in:
2454  *   .  ptr + scalar alu
2455  *   . if (scalar cond K|scalar)
2456  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2457  *   backtrack through the verifier states and mark all registers and
2458  *   stack slots with spilled constants that these scalar regisers
2459  *   should be precise.
2460  * . during state pruning two registers (or spilled stack slots)
2461  *   are equivalent if both are not precise.
2462  *
2463  * Note the verifier cannot simply walk register parentage chain,
2464  * since many different registers and stack slots could have been
2465  * used to compute single precise scalar.
2466  *
2467  * The approach of starting with precise=true for all registers and then
2468  * backtrack to mark a register as not precise when the verifier detects
2469  * that program doesn't care about specific value (e.g., when helper
2470  * takes register as ARG_ANYTHING parameter) is not safe.
2471  *
2472  * It's ok to walk single parentage chain of the verifier states.
2473  * It's possible that this backtracking will go all the way till 1st insn.
2474  * All other branches will be explored for needing precision later.
2475  *
2476  * The backtracking needs to deal with cases like:
2477  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2478  * r9 -= r8
2479  * r5 = r9
2480  * if r5 > 0x79f goto pc+7
2481  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2482  * r5 += 1
2483  * ...
2484  * call bpf_perf_event_output#25
2485  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2486  *
2487  * and this case:
2488  * r6 = 1
2489  * call foo // uses callee's r6 inside to compute r0
2490  * r0 += r6
2491  * if r0 == 0 goto
2492  *
2493  * to track above reg_mask/stack_mask needs to be independent for each frame.
2494  *
2495  * Also if parent's curframe > frame where backtracking started,
2496  * the verifier need to mark registers in both frames, otherwise callees
2497  * may incorrectly prune callers. This is similar to
2498  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2499  *
2500  * For now backtracking falls back into conservative marking.
2501  */
2502 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2503 				     struct bpf_verifier_state *st)
2504 {
2505 	struct bpf_func_state *func;
2506 	struct bpf_reg_state *reg;
2507 	int i, j;
2508 
2509 	/* big hammer: mark all scalars precise in this path.
2510 	 * pop_stack may still get !precise scalars.
2511 	 */
2512 	for (; st; st = st->parent)
2513 		for (i = 0; i <= st->curframe; i++) {
2514 			func = st->frame[i];
2515 			for (j = 0; j < BPF_REG_FP; j++) {
2516 				reg = &func->regs[j];
2517 				if (reg->type != SCALAR_VALUE)
2518 					continue;
2519 				reg->precise = true;
2520 			}
2521 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2522 				if (!is_spilled_reg(&func->stack[j]))
2523 					continue;
2524 				reg = &func->stack[j].spilled_ptr;
2525 				if (reg->type != SCALAR_VALUE)
2526 					continue;
2527 				reg->precise = true;
2528 			}
2529 		}
2530 }
2531 
2532 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2533 				  int spi)
2534 {
2535 	struct bpf_verifier_state *st = env->cur_state;
2536 	int first_idx = st->first_insn_idx;
2537 	int last_idx = env->insn_idx;
2538 	struct bpf_func_state *func;
2539 	struct bpf_reg_state *reg;
2540 	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2541 	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2542 	bool skip_first = true;
2543 	bool new_marks = false;
2544 	int i, err;
2545 
2546 	if (!env->bpf_capable)
2547 		return 0;
2548 
2549 	func = st->frame[st->curframe];
2550 	if (regno >= 0) {
2551 		reg = &func->regs[regno];
2552 		if (reg->type != SCALAR_VALUE) {
2553 			WARN_ONCE(1, "backtracing misuse");
2554 			return -EFAULT;
2555 		}
2556 		if (!reg->precise)
2557 			new_marks = true;
2558 		else
2559 			reg_mask = 0;
2560 		reg->precise = true;
2561 	}
2562 
2563 	while (spi >= 0) {
2564 		if (!is_spilled_reg(&func->stack[spi])) {
2565 			stack_mask = 0;
2566 			break;
2567 		}
2568 		reg = &func->stack[spi].spilled_ptr;
2569 		if (reg->type != SCALAR_VALUE) {
2570 			stack_mask = 0;
2571 			break;
2572 		}
2573 		if (!reg->precise)
2574 			new_marks = true;
2575 		else
2576 			stack_mask = 0;
2577 		reg->precise = true;
2578 		break;
2579 	}
2580 
2581 	if (!new_marks)
2582 		return 0;
2583 	if (!reg_mask && !stack_mask)
2584 		return 0;
2585 	for (;;) {
2586 		DECLARE_BITMAP(mask, 64);
2587 		u32 history = st->jmp_history_cnt;
2588 
2589 		if (env->log.level & BPF_LOG_LEVEL)
2590 			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2591 		for (i = last_idx;;) {
2592 			if (skip_first) {
2593 				err = 0;
2594 				skip_first = false;
2595 			} else {
2596 				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2597 			}
2598 			if (err == -ENOTSUPP) {
2599 				mark_all_scalars_precise(env, st);
2600 				return 0;
2601 			} else if (err) {
2602 				return err;
2603 			}
2604 			if (!reg_mask && !stack_mask)
2605 				/* Found assignment(s) into tracked register in this state.
2606 				 * Since this state is already marked, just return.
2607 				 * Nothing to be tracked further in the parent state.
2608 				 */
2609 				return 0;
2610 			if (i == first_idx)
2611 				break;
2612 			i = get_prev_insn_idx(st, i, &history);
2613 			if (i >= env->prog->len) {
2614 				/* This can happen if backtracking reached insn 0
2615 				 * and there are still reg_mask or stack_mask
2616 				 * to backtrack.
2617 				 * It means the backtracking missed the spot where
2618 				 * particular register was initialized with a constant.
2619 				 */
2620 				verbose(env, "BUG backtracking idx %d\n", i);
2621 				WARN_ONCE(1, "verifier backtracking bug");
2622 				return -EFAULT;
2623 			}
2624 		}
2625 		st = st->parent;
2626 		if (!st)
2627 			break;
2628 
2629 		new_marks = false;
2630 		func = st->frame[st->curframe];
2631 		bitmap_from_u64(mask, reg_mask);
2632 		for_each_set_bit(i, mask, 32) {
2633 			reg = &func->regs[i];
2634 			if (reg->type != SCALAR_VALUE) {
2635 				reg_mask &= ~(1u << i);
2636 				continue;
2637 			}
2638 			if (!reg->precise)
2639 				new_marks = true;
2640 			reg->precise = true;
2641 		}
2642 
2643 		bitmap_from_u64(mask, stack_mask);
2644 		for_each_set_bit(i, mask, 64) {
2645 			if (i >= func->allocated_stack / BPF_REG_SIZE) {
2646 				/* the sequence of instructions:
2647 				 * 2: (bf) r3 = r10
2648 				 * 3: (7b) *(u64 *)(r3 -8) = r0
2649 				 * 4: (79) r4 = *(u64 *)(r10 -8)
2650 				 * doesn't contain jmps. It's backtracked
2651 				 * as a single block.
2652 				 * During backtracking insn 3 is not recognized as
2653 				 * stack access, so at the end of backtracking
2654 				 * stack slot fp-8 is still marked in stack_mask.
2655 				 * However the parent state may not have accessed
2656 				 * fp-8 and it's "unallocated" stack space.
2657 				 * In such case fallback to conservative.
2658 				 */
2659 				mark_all_scalars_precise(env, st);
2660 				return 0;
2661 			}
2662 
2663 			if (!is_spilled_reg(&func->stack[i])) {
2664 				stack_mask &= ~(1ull << i);
2665 				continue;
2666 			}
2667 			reg = &func->stack[i].spilled_ptr;
2668 			if (reg->type != SCALAR_VALUE) {
2669 				stack_mask &= ~(1ull << i);
2670 				continue;
2671 			}
2672 			if (!reg->precise)
2673 				new_marks = true;
2674 			reg->precise = true;
2675 		}
2676 		if (env->log.level & BPF_LOG_LEVEL) {
2677 			print_verifier_state(env, func);
2678 			verbose(env, "parent %s regs=%x stack=%llx marks\n",
2679 				new_marks ? "didn't have" : "already had",
2680 				reg_mask, stack_mask);
2681 		}
2682 
2683 		if (!reg_mask && !stack_mask)
2684 			break;
2685 		if (!new_marks)
2686 			break;
2687 
2688 		last_idx = st->last_insn_idx;
2689 		first_idx = st->first_insn_idx;
2690 	}
2691 	return 0;
2692 }
2693 
2694 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2695 {
2696 	return __mark_chain_precision(env, regno, -1);
2697 }
2698 
2699 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2700 {
2701 	return __mark_chain_precision(env, -1, spi);
2702 }
2703 
2704 static bool is_spillable_regtype(enum bpf_reg_type type)
2705 {
2706 	switch (type) {
2707 	case PTR_TO_MAP_VALUE:
2708 	case PTR_TO_MAP_VALUE_OR_NULL:
2709 	case PTR_TO_STACK:
2710 	case PTR_TO_CTX:
2711 	case PTR_TO_PACKET:
2712 	case PTR_TO_PACKET_META:
2713 	case PTR_TO_PACKET_END:
2714 	case PTR_TO_FLOW_KEYS:
2715 	case CONST_PTR_TO_MAP:
2716 	case PTR_TO_SOCKET:
2717 	case PTR_TO_SOCKET_OR_NULL:
2718 	case PTR_TO_SOCK_COMMON:
2719 	case PTR_TO_SOCK_COMMON_OR_NULL:
2720 	case PTR_TO_TCP_SOCK:
2721 	case PTR_TO_TCP_SOCK_OR_NULL:
2722 	case PTR_TO_XDP_SOCK:
2723 	case PTR_TO_BTF_ID:
2724 	case PTR_TO_BTF_ID_OR_NULL:
2725 	case PTR_TO_RDONLY_BUF:
2726 	case PTR_TO_RDONLY_BUF_OR_NULL:
2727 	case PTR_TO_RDWR_BUF:
2728 	case PTR_TO_RDWR_BUF_OR_NULL:
2729 	case PTR_TO_PERCPU_BTF_ID:
2730 	case PTR_TO_MEM:
2731 	case PTR_TO_MEM_OR_NULL:
2732 	case PTR_TO_FUNC:
2733 	case PTR_TO_MAP_KEY:
2734 		return true;
2735 	default:
2736 		return false;
2737 	}
2738 }
2739 
2740 /* Does this register contain a constant zero? */
2741 static bool register_is_null(struct bpf_reg_state *reg)
2742 {
2743 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2744 }
2745 
2746 static bool register_is_const(struct bpf_reg_state *reg)
2747 {
2748 	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2749 }
2750 
2751 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2752 {
2753 	return tnum_is_unknown(reg->var_off) &&
2754 	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2755 	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2756 	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2757 	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2758 }
2759 
2760 static bool register_is_bounded(struct bpf_reg_state *reg)
2761 {
2762 	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2763 }
2764 
2765 static bool __is_pointer_value(bool allow_ptr_leaks,
2766 			       const struct bpf_reg_state *reg)
2767 {
2768 	if (allow_ptr_leaks)
2769 		return false;
2770 
2771 	return reg->type != SCALAR_VALUE;
2772 }
2773 
2774 static void save_register_state(struct bpf_func_state *state,
2775 				int spi, struct bpf_reg_state *reg,
2776 				int size)
2777 {
2778 	int i;
2779 
2780 	state->stack[spi].spilled_ptr = *reg;
2781 	if (size == BPF_REG_SIZE)
2782 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2783 
2784 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2785 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2786 
2787 	/* size < 8 bytes spill */
2788 	for (; i; i--)
2789 		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2790 }
2791 
2792 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2793  * stack boundary and alignment are checked in check_mem_access()
2794  */
2795 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2796 				       /* stack frame we're writing to */
2797 				       struct bpf_func_state *state,
2798 				       int off, int size, int value_regno,
2799 				       int insn_idx)
2800 {
2801 	struct bpf_func_state *cur; /* state of the current function */
2802 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2803 	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2804 	struct bpf_reg_state *reg = NULL;
2805 
2806 	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
2807 	if (err)
2808 		return err;
2809 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2810 	 * so it's aligned access and [off, off + size) are within stack limits
2811 	 */
2812 	if (!env->allow_ptr_leaks &&
2813 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
2814 	    size != BPF_REG_SIZE) {
2815 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
2816 		return -EACCES;
2817 	}
2818 
2819 	cur = env->cur_state->frame[env->cur_state->curframe];
2820 	if (value_regno >= 0)
2821 		reg = &cur->regs[value_regno];
2822 	if (!env->bypass_spec_v4) {
2823 		bool sanitize = reg && is_spillable_regtype(reg->type);
2824 
2825 		for (i = 0; i < size; i++) {
2826 			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2827 				sanitize = true;
2828 				break;
2829 			}
2830 		}
2831 
2832 		if (sanitize)
2833 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2834 	}
2835 
2836 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2837 	    !register_is_null(reg) && env->bpf_capable) {
2838 		if (dst_reg != BPF_REG_FP) {
2839 			/* The backtracking logic can only recognize explicit
2840 			 * stack slot address like [fp - 8]. Other spill of
2841 			 * scalar via different register has to be conservative.
2842 			 * Backtrack from here and mark all registers as precise
2843 			 * that contributed into 'reg' being a constant.
2844 			 */
2845 			err = mark_chain_precision(env, value_regno);
2846 			if (err)
2847 				return err;
2848 		}
2849 		save_register_state(state, spi, reg, size);
2850 	} else if (reg && is_spillable_regtype(reg->type)) {
2851 		/* register containing pointer is being spilled into stack */
2852 		if (size != BPF_REG_SIZE) {
2853 			verbose_linfo(env, insn_idx, "; ");
2854 			verbose(env, "invalid size of register spill\n");
2855 			return -EACCES;
2856 		}
2857 		if (state != cur && reg->type == PTR_TO_STACK) {
2858 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2859 			return -EINVAL;
2860 		}
2861 		save_register_state(state, spi, reg, size);
2862 	} else {
2863 		u8 type = STACK_MISC;
2864 
2865 		/* regular write of data into stack destroys any spilled ptr */
2866 		state->stack[spi].spilled_ptr.type = NOT_INIT;
2867 		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2868 		if (is_spilled_reg(&state->stack[spi]))
2869 			for (i = 0; i < BPF_REG_SIZE; i++)
2870 				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
2871 
2872 		/* only mark the slot as written if all 8 bytes were written
2873 		 * otherwise read propagation may incorrectly stop too soon
2874 		 * when stack slots are partially written.
2875 		 * This heuristic means that read propagation will be
2876 		 * conservative, since it will add reg_live_read marks
2877 		 * to stack slots all the way to first state when programs
2878 		 * writes+reads less than 8 bytes
2879 		 */
2880 		if (size == BPF_REG_SIZE)
2881 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2882 
2883 		/* when we zero initialize stack slots mark them as such */
2884 		if (reg && register_is_null(reg)) {
2885 			/* backtracking doesn't work for STACK_ZERO yet. */
2886 			err = mark_chain_precision(env, value_regno);
2887 			if (err)
2888 				return err;
2889 			type = STACK_ZERO;
2890 		}
2891 
2892 		/* Mark slots affected by this stack write. */
2893 		for (i = 0; i < size; i++)
2894 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2895 				type;
2896 	}
2897 	return 0;
2898 }
2899 
2900 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2901  * known to contain a variable offset.
2902  * This function checks whether the write is permitted and conservatively
2903  * tracks the effects of the write, considering that each stack slot in the
2904  * dynamic range is potentially written to.
2905  *
2906  * 'off' includes 'regno->off'.
2907  * 'value_regno' can be -1, meaning that an unknown value is being written to
2908  * the stack.
2909  *
2910  * Spilled pointers in range are not marked as written because we don't know
2911  * what's going to be actually written. This means that read propagation for
2912  * future reads cannot be terminated by this write.
2913  *
2914  * For privileged programs, uninitialized stack slots are considered
2915  * initialized by this write (even though we don't know exactly what offsets
2916  * are going to be written to). The idea is that we don't want the verifier to
2917  * reject future reads that access slots written to through variable offsets.
2918  */
2919 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2920 				     /* func where register points to */
2921 				     struct bpf_func_state *state,
2922 				     int ptr_regno, int off, int size,
2923 				     int value_regno, int insn_idx)
2924 {
2925 	struct bpf_func_state *cur; /* state of the current function */
2926 	int min_off, max_off;
2927 	int i, err;
2928 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2929 	bool writing_zero = false;
2930 	/* set if the fact that we're writing a zero is used to let any
2931 	 * stack slots remain STACK_ZERO
2932 	 */
2933 	bool zero_used = false;
2934 
2935 	cur = env->cur_state->frame[env->cur_state->curframe];
2936 	ptr_reg = &cur->regs[ptr_regno];
2937 	min_off = ptr_reg->smin_value + off;
2938 	max_off = ptr_reg->smax_value + off + size;
2939 	if (value_regno >= 0)
2940 		value_reg = &cur->regs[value_regno];
2941 	if (value_reg && register_is_null(value_reg))
2942 		writing_zero = true;
2943 
2944 	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
2945 	if (err)
2946 		return err;
2947 
2948 
2949 	/* Variable offset writes destroy any spilled pointers in range. */
2950 	for (i = min_off; i < max_off; i++) {
2951 		u8 new_type, *stype;
2952 		int slot, spi;
2953 
2954 		slot = -i - 1;
2955 		spi = slot / BPF_REG_SIZE;
2956 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2957 
2958 		if (!env->allow_ptr_leaks
2959 				&& *stype != NOT_INIT
2960 				&& *stype != SCALAR_VALUE) {
2961 			/* Reject the write if there's are spilled pointers in
2962 			 * range. If we didn't reject here, the ptr status
2963 			 * would be erased below (even though not all slots are
2964 			 * actually overwritten), possibly opening the door to
2965 			 * leaks.
2966 			 */
2967 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2968 				insn_idx, i);
2969 			return -EINVAL;
2970 		}
2971 
2972 		/* Erase all spilled pointers. */
2973 		state->stack[spi].spilled_ptr.type = NOT_INIT;
2974 
2975 		/* Update the slot type. */
2976 		new_type = STACK_MISC;
2977 		if (writing_zero && *stype == STACK_ZERO) {
2978 			new_type = STACK_ZERO;
2979 			zero_used = true;
2980 		}
2981 		/* If the slot is STACK_INVALID, we check whether it's OK to
2982 		 * pretend that it will be initialized by this write. The slot
2983 		 * might not actually be written to, and so if we mark it as
2984 		 * initialized future reads might leak uninitialized memory.
2985 		 * For privileged programs, we will accept such reads to slots
2986 		 * that may or may not be written because, if we're reject
2987 		 * them, the error would be too confusing.
2988 		 */
2989 		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2990 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2991 					insn_idx, i);
2992 			return -EINVAL;
2993 		}
2994 		*stype = new_type;
2995 	}
2996 	if (zero_used) {
2997 		/* backtracking doesn't work for STACK_ZERO yet. */
2998 		err = mark_chain_precision(env, value_regno);
2999 		if (err)
3000 			return err;
3001 	}
3002 	return 0;
3003 }
3004 
3005 /* When register 'dst_regno' is assigned some values from stack[min_off,
3006  * max_off), we set the register's type according to the types of the
3007  * respective stack slots. If all the stack values are known to be zeros, then
3008  * so is the destination reg. Otherwise, the register is considered to be
3009  * SCALAR. This function does not deal with register filling; the caller must
3010  * ensure that all spilled registers in the stack range have been marked as
3011  * read.
3012  */
3013 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3014 				/* func where src register points to */
3015 				struct bpf_func_state *ptr_state,
3016 				int min_off, int max_off, int dst_regno)
3017 {
3018 	struct bpf_verifier_state *vstate = env->cur_state;
3019 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3020 	int i, slot, spi;
3021 	u8 *stype;
3022 	int zeros = 0;
3023 
3024 	for (i = min_off; i < max_off; i++) {
3025 		slot = -i - 1;
3026 		spi = slot / BPF_REG_SIZE;
3027 		stype = ptr_state->stack[spi].slot_type;
3028 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3029 			break;
3030 		zeros++;
3031 	}
3032 	if (zeros == max_off - min_off) {
3033 		/* any access_size read into register is zero extended,
3034 		 * so the whole register == const_zero
3035 		 */
3036 		__mark_reg_const_zero(&state->regs[dst_regno]);
3037 		/* backtracking doesn't support STACK_ZERO yet,
3038 		 * so mark it precise here, so that later
3039 		 * backtracking can stop here.
3040 		 * Backtracking may not need this if this register
3041 		 * doesn't participate in pointer adjustment.
3042 		 * Forward propagation of precise flag is not
3043 		 * necessary either. This mark is only to stop
3044 		 * backtracking. Any register that contributed
3045 		 * to const 0 was marked precise before spill.
3046 		 */
3047 		state->regs[dst_regno].precise = true;
3048 	} else {
3049 		/* have read misc data from the stack */
3050 		mark_reg_unknown(env, state->regs, dst_regno);
3051 	}
3052 	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3053 }
3054 
3055 /* Read the stack at 'off' and put the results into the register indicated by
3056  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3057  * spilled reg.
3058  *
3059  * 'dst_regno' can be -1, meaning that the read value is not going to a
3060  * register.
3061  *
3062  * The access is assumed to be within the current stack bounds.
3063  */
3064 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3065 				      /* func where src register points to */
3066 				      struct bpf_func_state *reg_state,
3067 				      int off, int size, int dst_regno)
3068 {
3069 	struct bpf_verifier_state *vstate = env->cur_state;
3070 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3071 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3072 	struct bpf_reg_state *reg;
3073 	u8 *stype, type;
3074 
3075 	stype = reg_state->stack[spi].slot_type;
3076 	reg = &reg_state->stack[spi].spilled_ptr;
3077 
3078 	if (is_spilled_reg(&reg_state->stack[spi])) {
3079 		u8 spill_size = 1;
3080 
3081 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3082 			spill_size++;
3083 
3084 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3085 			if (reg->type != SCALAR_VALUE) {
3086 				verbose_linfo(env, env->insn_idx, "; ");
3087 				verbose(env, "invalid size of register fill\n");
3088 				return -EACCES;
3089 			}
3090 
3091 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3092 			if (dst_regno < 0)
3093 				return 0;
3094 
3095 			if (!(off % BPF_REG_SIZE) && size == spill_size) {
3096 				/* The earlier check_reg_arg() has decided the
3097 				 * subreg_def for this insn.  Save it first.
3098 				 */
3099 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3100 
3101 				state->regs[dst_regno] = *reg;
3102 				state->regs[dst_regno].subreg_def = subreg_def;
3103 			} else {
3104 				for (i = 0; i < size; i++) {
3105 					type = stype[(slot - i) % BPF_REG_SIZE];
3106 					if (type == STACK_SPILL)
3107 						continue;
3108 					if (type == STACK_MISC)
3109 						continue;
3110 					verbose(env, "invalid read from stack off %d+%d size %d\n",
3111 						off, i, size);
3112 					return -EACCES;
3113 				}
3114 				mark_reg_unknown(env, state->regs, dst_regno);
3115 			}
3116 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3117 			return 0;
3118 		}
3119 
3120 		if (dst_regno >= 0) {
3121 			/* restore register state from stack */
3122 			state->regs[dst_regno] = *reg;
3123 			/* mark reg as written since spilled pointer state likely
3124 			 * has its liveness marks cleared by is_state_visited()
3125 			 * which resets stack/reg liveness for state transitions
3126 			 */
3127 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3128 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3129 			/* If dst_regno==-1, the caller is asking us whether
3130 			 * it is acceptable to use this value as a SCALAR_VALUE
3131 			 * (e.g. for XADD).
3132 			 * We must not allow unprivileged callers to do that
3133 			 * with spilled pointers.
3134 			 */
3135 			verbose(env, "leaking pointer from stack off %d\n",
3136 				off);
3137 			return -EACCES;
3138 		}
3139 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3140 	} else {
3141 		for (i = 0; i < size; i++) {
3142 			type = stype[(slot - i) % BPF_REG_SIZE];
3143 			if (type == STACK_MISC)
3144 				continue;
3145 			if (type == STACK_ZERO)
3146 				continue;
3147 			verbose(env, "invalid read from stack off %d+%d size %d\n",
3148 				off, i, size);
3149 			return -EACCES;
3150 		}
3151 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3152 		if (dst_regno >= 0)
3153 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3154 	}
3155 	return 0;
3156 }
3157 
3158 enum stack_access_src {
3159 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3160 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3161 };
3162 
3163 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3164 					 int regno, int off, int access_size,
3165 					 bool zero_size_allowed,
3166 					 enum stack_access_src type,
3167 					 struct bpf_call_arg_meta *meta);
3168 
3169 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3170 {
3171 	return cur_regs(env) + regno;
3172 }
3173 
3174 /* Read the stack at 'ptr_regno + off' and put the result into the register
3175  * 'dst_regno'.
3176  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3177  * but not its variable offset.
3178  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3179  *
3180  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3181  * filling registers (i.e. reads of spilled register cannot be detected when
3182  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3183  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3184  * offset; for a fixed offset check_stack_read_fixed_off should be used
3185  * instead.
3186  */
3187 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3188 				    int ptr_regno, int off, int size, int dst_regno)
3189 {
3190 	/* The state of the source register. */
3191 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3192 	struct bpf_func_state *ptr_state = func(env, reg);
3193 	int err;
3194 	int min_off, max_off;
3195 
3196 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3197 	 */
3198 	err = check_stack_range_initialized(env, ptr_regno, off, size,
3199 					    false, ACCESS_DIRECT, NULL);
3200 	if (err)
3201 		return err;
3202 
3203 	min_off = reg->smin_value + off;
3204 	max_off = reg->smax_value + off;
3205 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3206 	return 0;
3207 }
3208 
3209 /* check_stack_read dispatches to check_stack_read_fixed_off or
3210  * check_stack_read_var_off.
3211  *
3212  * The caller must ensure that the offset falls within the allocated stack
3213  * bounds.
3214  *
3215  * 'dst_regno' is a register which will receive the value from the stack. It
3216  * can be -1, meaning that the read value is not going to a register.
3217  */
3218 static int check_stack_read(struct bpf_verifier_env *env,
3219 			    int ptr_regno, int off, int size,
3220 			    int dst_regno)
3221 {
3222 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3223 	struct bpf_func_state *state = func(env, reg);
3224 	int err;
3225 	/* Some accesses are only permitted with a static offset. */
3226 	bool var_off = !tnum_is_const(reg->var_off);
3227 
3228 	/* The offset is required to be static when reads don't go to a
3229 	 * register, in order to not leak pointers (see
3230 	 * check_stack_read_fixed_off).
3231 	 */
3232 	if (dst_regno < 0 && var_off) {
3233 		char tn_buf[48];
3234 
3235 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3236 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3237 			tn_buf, off, size);
3238 		return -EACCES;
3239 	}
3240 	/* Variable offset is prohibited for unprivileged mode for simplicity
3241 	 * since it requires corresponding support in Spectre masking for stack
3242 	 * ALU. See also retrieve_ptr_limit().
3243 	 */
3244 	if (!env->bypass_spec_v1 && var_off) {
3245 		char tn_buf[48];
3246 
3247 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3248 		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3249 				ptr_regno, tn_buf);
3250 		return -EACCES;
3251 	}
3252 
3253 	if (!var_off) {
3254 		off += reg->var_off.value;
3255 		err = check_stack_read_fixed_off(env, state, off, size,
3256 						 dst_regno);
3257 	} else {
3258 		/* Variable offset stack reads need more conservative handling
3259 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3260 		 * branch.
3261 		 */
3262 		err = check_stack_read_var_off(env, ptr_regno, off, size,
3263 					       dst_regno);
3264 	}
3265 	return err;
3266 }
3267 
3268 
3269 /* check_stack_write dispatches to check_stack_write_fixed_off or
3270  * check_stack_write_var_off.
3271  *
3272  * 'ptr_regno' is the register used as a pointer into the stack.
3273  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3274  * 'value_regno' is the register whose value we're writing to the stack. It can
3275  * be -1, meaning that we're not writing from a register.
3276  *
3277  * The caller must ensure that the offset falls within the maximum stack size.
3278  */
3279 static int check_stack_write(struct bpf_verifier_env *env,
3280 			     int ptr_regno, int off, int size,
3281 			     int value_regno, int insn_idx)
3282 {
3283 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3284 	struct bpf_func_state *state = func(env, reg);
3285 	int err;
3286 
3287 	if (tnum_is_const(reg->var_off)) {
3288 		off += reg->var_off.value;
3289 		err = check_stack_write_fixed_off(env, state, off, size,
3290 						  value_regno, insn_idx);
3291 	} else {
3292 		/* Variable offset stack reads need more conservative handling
3293 		 * than fixed offset ones.
3294 		 */
3295 		err = check_stack_write_var_off(env, state,
3296 						ptr_regno, off, size,
3297 						value_regno, insn_idx);
3298 	}
3299 	return err;
3300 }
3301 
3302 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3303 				 int off, int size, enum bpf_access_type type)
3304 {
3305 	struct bpf_reg_state *regs = cur_regs(env);
3306 	struct bpf_map *map = regs[regno].map_ptr;
3307 	u32 cap = bpf_map_flags_to_cap(map);
3308 
3309 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3310 		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3311 			map->value_size, off, size);
3312 		return -EACCES;
3313 	}
3314 
3315 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3316 		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3317 			map->value_size, off, size);
3318 		return -EACCES;
3319 	}
3320 
3321 	return 0;
3322 }
3323 
3324 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3325 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3326 			      int off, int size, u32 mem_size,
3327 			      bool zero_size_allowed)
3328 {
3329 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3330 	struct bpf_reg_state *reg;
3331 
3332 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3333 		return 0;
3334 
3335 	reg = &cur_regs(env)[regno];
3336 	switch (reg->type) {
3337 	case PTR_TO_MAP_KEY:
3338 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3339 			mem_size, off, size);
3340 		break;
3341 	case PTR_TO_MAP_VALUE:
3342 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3343 			mem_size, off, size);
3344 		break;
3345 	case PTR_TO_PACKET:
3346 	case PTR_TO_PACKET_META:
3347 	case PTR_TO_PACKET_END:
3348 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3349 			off, size, regno, reg->id, off, mem_size);
3350 		break;
3351 	case PTR_TO_MEM:
3352 	default:
3353 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3354 			mem_size, off, size);
3355 	}
3356 
3357 	return -EACCES;
3358 }
3359 
3360 /* check read/write into a memory region with possible variable offset */
3361 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3362 				   int off, int size, u32 mem_size,
3363 				   bool zero_size_allowed)
3364 {
3365 	struct bpf_verifier_state *vstate = env->cur_state;
3366 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3367 	struct bpf_reg_state *reg = &state->regs[regno];
3368 	int err;
3369 
3370 	/* We may have adjusted the register pointing to memory region, so we
3371 	 * need to try adding each of min_value and max_value to off
3372 	 * to make sure our theoretical access will be safe.
3373 	 */
3374 	if (env->log.level & BPF_LOG_LEVEL)
3375 		print_verifier_state(env, state);
3376 
3377 	/* The minimum value is only important with signed
3378 	 * comparisons where we can't assume the floor of a
3379 	 * value is 0.  If we are using signed variables for our
3380 	 * index'es we need to make sure that whatever we use
3381 	 * will have a set floor within our range.
3382 	 */
3383 	if (reg->smin_value < 0 &&
3384 	    (reg->smin_value == S64_MIN ||
3385 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3386 	      reg->smin_value + off < 0)) {
3387 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3388 			regno);
3389 		return -EACCES;
3390 	}
3391 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
3392 				 mem_size, zero_size_allowed);
3393 	if (err) {
3394 		verbose(env, "R%d min value is outside of the allowed memory range\n",
3395 			regno);
3396 		return err;
3397 	}
3398 
3399 	/* If we haven't set a max value then we need to bail since we can't be
3400 	 * sure we won't do bad things.
3401 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
3402 	 */
3403 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3404 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3405 			regno);
3406 		return -EACCES;
3407 	}
3408 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
3409 				 mem_size, zero_size_allowed);
3410 	if (err) {
3411 		verbose(env, "R%d max value is outside of the allowed memory range\n",
3412 			regno);
3413 		return err;
3414 	}
3415 
3416 	return 0;
3417 }
3418 
3419 /* check read/write into a map element with possible variable offset */
3420 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3421 			    int off, int size, bool zero_size_allowed)
3422 {
3423 	struct bpf_verifier_state *vstate = env->cur_state;
3424 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3425 	struct bpf_reg_state *reg = &state->regs[regno];
3426 	struct bpf_map *map = reg->map_ptr;
3427 	int err;
3428 
3429 	err = check_mem_region_access(env, regno, off, size, map->value_size,
3430 				      zero_size_allowed);
3431 	if (err)
3432 		return err;
3433 
3434 	if (map_value_has_spin_lock(map)) {
3435 		u32 lock = map->spin_lock_off;
3436 
3437 		/* if any part of struct bpf_spin_lock can be touched by
3438 		 * load/store reject this program.
3439 		 * To check that [x1, x2) overlaps with [y1, y2)
3440 		 * it is sufficient to check x1 < y2 && y1 < x2.
3441 		 */
3442 		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3443 		     lock < reg->umax_value + off + size) {
3444 			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3445 			return -EACCES;
3446 		}
3447 	}
3448 	if (map_value_has_timer(map)) {
3449 		u32 t = map->timer_off;
3450 
3451 		if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3452 		     t < reg->umax_value + off + size) {
3453 			verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3454 			return -EACCES;
3455 		}
3456 	}
3457 	return err;
3458 }
3459 
3460 #define MAX_PACKET_OFF 0xffff
3461 
3462 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
3463 {
3464 	return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
3465 }
3466 
3467 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3468 				       const struct bpf_call_arg_meta *meta,
3469 				       enum bpf_access_type t)
3470 {
3471 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3472 
3473 	switch (prog_type) {
3474 	/* Program types only with direct read access go here! */
3475 	case BPF_PROG_TYPE_LWT_IN:
3476 	case BPF_PROG_TYPE_LWT_OUT:
3477 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3478 	case BPF_PROG_TYPE_SK_REUSEPORT:
3479 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
3480 	case BPF_PROG_TYPE_CGROUP_SKB:
3481 		if (t == BPF_WRITE)
3482 			return false;
3483 		fallthrough;
3484 
3485 	/* Program types with direct read + write access go here! */
3486 	case BPF_PROG_TYPE_SCHED_CLS:
3487 	case BPF_PROG_TYPE_SCHED_ACT:
3488 	case BPF_PROG_TYPE_XDP:
3489 	case BPF_PROG_TYPE_LWT_XMIT:
3490 	case BPF_PROG_TYPE_SK_SKB:
3491 	case BPF_PROG_TYPE_SK_MSG:
3492 		if (meta)
3493 			return meta->pkt_access;
3494 
3495 		env->seen_direct_write = true;
3496 		return true;
3497 
3498 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3499 		if (t == BPF_WRITE)
3500 			env->seen_direct_write = true;
3501 
3502 		return true;
3503 
3504 	default:
3505 		return false;
3506 	}
3507 }
3508 
3509 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3510 			       int size, bool zero_size_allowed)
3511 {
3512 	struct bpf_reg_state *regs = cur_regs(env);
3513 	struct bpf_reg_state *reg = &regs[regno];
3514 	int err;
3515 
3516 	/* We may have added a variable offset to the packet pointer; but any
3517 	 * reg->range we have comes after that.  We are only checking the fixed
3518 	 * offset.
3519 	 */
3520 
3521 	/* We don't allow negative numbers, because we aren't tracking enough
3522 	 * detail to prove they're safe.
3523 	 */
3524 	if (reg->smin_value < 0) {
3525 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3526 			regno);
3527 		return -EACCES;
3528 	}
3529 
3530 	err = reg->range < 0 ? -EINVAL :
3531 	      __check_mem_access(env, regno, off, size, reg->range,
3532 				 zero_size_allowed);
3533 	if (err) {
3534 		verbose(env, "R%d offset is outside of the packet\n", regno);
3535 		return err;
3536 	}
3537 
3538 	/* __check_mem_access has made sure "off + size - 1" is within u16.
3539 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3540 	 * otherwise find_good_pkt_pointers would have refused to set range info
3541 	 * that __check_mem_access would have rejected this pkt access.
3542 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3543 	 */
3544 	env->prog->aux->max_pkt_offset =
3545 		max_t(u32, env->prog->aux->max_pkt_offset,
3546 		      off + reg->umax_value + size - 1);
3547 
3548 	return err;
3549 }
3550 
3551 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3552 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3553 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
3554 			    struct btf **btf, u32 *btf_id)
3555 {
3556 	struct bpf_insn_access_aux info = {
3557 		.reg_type = *reg_type,
3558 		.log = &env->log,
3559 	};
3560 
3561 	if (env->ops->is_valid_access &&
3562 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3563 		/* A non zero info.ctx_field_size indicates that this field is a
3564 		 * candidate for later verifier transformation to load the whole
3565 		 * field and then apply a mask when accessed with a narrower
3566 		 * access than actual ctx access size. A zero info.ctx_field_size
3567 		 * will only allow for whole field access and rejects any other
3568 		 * type of narrower access.
3569 		 */
3570 		*reg_type = info.reg_type;
3571 
3572 		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
3573 			*btf = info.btf;
3574 			*btf_id = info.btf_id;
3575 		} else {
3576 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3577 		}
3578 		/* remember the offset of last byte accessed in ctx */
3579 		if (env->prog->aux->max_ctx_offset < off + size)
3580 			env->prog->aux->max_ctx_offset = off + size;
3581 		return 0;
3582 	}
3583 
3584 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3585 	return -EACCES;
3586 }
3587 
3588 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3589 				  int size)
3590 {
3591 	if (size < 0 || off < 0 ||
3592 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
3593 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
3594 			off, size);
3595 		return -EACCES;
3596 	}
3597 	return 0;
3598 }
3599 
3600 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3601 			     u32 regno, int off, int size,
3602 			     enum bpf_access_type t)
3603 {
3604 	struct bpf_reg_state *regs = cur_regs(env);
3605 	struct bpf_reg_state *reg = &regs[regno];
3606 	struct bpf_insn_access_aux info = {};
3607 	bool valid;
3608 
3609 	if (reg->smin_value < 0) {
3610 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3611 			regno);
3612 		return -EACCES;
3613 	}
3614 
3615 	switch (reg->type) {
3616 	case PTR_TO_SOCK_COMMON:
3617 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3618 		break;
3619 	case PTR_TO_SOCKET:
3620 		valid = bpf_sock_is_valid_access(off, size, t, &info);
3621 		break;
3622 	case PTR_TO_TCP_SOCK:
3623 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3624 		break;
3625 	case PTR_TO_XDP_SOCK:
3626 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3627 		break;
3628 	default:
3629 		valid = false;
3630 	}
3631 
3632 
3633 	if (valid) {
3634 		env->insn_aux_data[insn_idx].ctx_field_size =
3635 			info.ctx_field_size;
3636 		return 0;
3637 	}
3638 
3639 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
3640 		regno, reg_type_str[reg->type], off, size);
3641 
3642 	return -EACCES;
3643 }
3644 
3645 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3646 {
3647 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3648 }
3649 
3650 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3651 {
3652 	const struct bpf_reg_state *reg = reg_state(env, regno);
3653 
3654 	return reg->type == PTR_TO_CTX;
3655 }
3656 
3657 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3658 {
3659 	const struct bpf_reg_state *reg = reg_state(env, regno);
3660 
3661 	return type_is_sk_pointer(reg->type);
3662 }
3663 
3664 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3665 {
3666 	const struct bpf_reg_state *reg = reg_state(env, regno);
3667 
3668 	return type_is_pkt_pointer(reg->type);
3669 }
3670 
3671 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3672 {
3673 	const struct bpf_reg_state *reg = reg_state(env, regno);
3674 
3675 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3676 	return reg->type == PTR_TO_FLOW_KEYS;
3677 }
3678 
3679 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3680 				   const struct bpf_reg_state *reg,
3681 				   int off, int size, bool strict)
3682 {
3683 	struct tnum reg_off;
3684 	int ip_align;
3685 
3686 	/* Byte size accesses are always allowed. */
3687 	if (!strict || size == 1)
3688 		return 0;
3689 
3690 	/* For platforms that do not have a Kconfig enabling
3691 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3692 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
3693 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3694 	 * to this code only in strict mode where we want to emulate
3695 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
3696 	 * unconditional IP align value of '2'.
3697 	 */
3698 	ip_align = 2;
3699 
3700 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3701 	if (!tnum_is_aligned(reg_off, size)) {
3702 		char tn_buf[48];
3703 
3704 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3705 		verbose(env,
3706 			"misaligned packet access off %d+%s+%d+%d size %d\n",
3707 			ip_align, tn_buf, reg->off, off, size);
3708 		return -EACCES;
3709 	}
3710 
3711 	return 0;
3712 }
3713 
3714 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3715 				       const struct bpf_reg_state *reg,
3716 				       const char *pointer_desc,
3717 				       int off, int size, bool strict)
3718 {
3719 	struct tnum reg_off;
3720 
3721 	/* Byte size accesses are always allowed. */
3722 	if (!strict || size == 1)
3723 		return 0;
3724 
3725 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3726 	if (!tnum_is_aligned(reg_off, size)) {
3727 		char tn_buf[48];
3728 
3729 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3730 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3731 			pointer_desc, tn_buf, reg->off, off, size);
3732 		return -EACCES;
3733 	}
3734 
3735 	return 0;
3736 }
3737 
3738 static int check_ptr_alignment(struct bpf_verifier_env *env,
3739 			       const struct bpf_reg_state *reg, int off,
3740 			       int size, bool strict_alignment_once)
3741 {
3742 	bool strict = env->strict_alignment || strict_alignment_once;
3743 	const char *pointer_desc = "";
3744 
3745 	switch (reg->type) {
3746 	case PTR_TO_PACKET:
3747 	case PTR_TO_PACKET_META:
3748 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
3749 		 * right in front, treat it the very same way.
3750 		 */
3751 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
3752 	case PTR_TO_FLOW_KEYS:
3753 		pointer_desc = "flow keys ";
3754 		break;
3755 	case PTR_TO_MAP_KEY:
3756 		pointer_desc = "key ";
3757 		break;
3758 	case PTR_TO_MAP_VALUE:
3759 		pointer_desc = "value ";
3760 		break;
3761 	case PTR_TO_CTX:
3762 		pointer_desc = "context ";
3763 		break;
3764 	case PTR_TO_STACK:
3765 		pointer_desc = "stack ";
3766 		/* The stack spill tracking logic in check_stack_write_fixed_off()
3767 		 * and check_stack_read_fixed_off() relies on stack accesses being
3768 		 * aligned.
3769 		 */
3770 		strict = true;
3771 		break;
3772 	case PTR_TO_SOCKET:
3773 		pointer_desc = "sock ";
3774 		break;
3775 	case PTR_TO_SOCK_COMMON:
3776 		pointer_desc = "sock_common ";
3777 		break;
3778 	case PTR_TO_TCP_SOCK:
3779 		pointer_desc = "tcp_sock ";
3780 		break;
3781 	case PTR_TO_XDP_SOCK:
3782 		pointer_desc = "xdp_sock ";
3783 		break;
3784 	default:
3785 		break;
3786 	}
3787 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3788 					   strict);
3789 }
3790 
3791 static int update_stack_depth(struct bpf_verifier_env *env,
3792 			      const struct bpf_func_state *func,
3793 			      int off)
3794 {
3795 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
3796 
3797 	if (stack >= -off)
3798 		return 0;
3799 
3800 	/* update known max for given subprogram */
3801 	env->subprog_info[func->subprogno].stack_depth = -off;
3802 	return 0;
3803 }
3804 
3805 /* starting from main bpf function walk all instructions of the function
3806  * and recursively walk all callees that given function can call.
3807  * Ignore jump and exit insns.
3808  * Since recursion is prevented by check_cfg() this algorithm
3809  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3810  */
3811 static int check_max_stack_depth(struct bpf_verifier_env *env)
3812 {
3813 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3814 	struct bpf_subprog_info *subprog = env->subprog_info;
3815 	struct bpf_insn *insn = env->prog->insnsi;
3816 	bool tail_call_reachable = false;
3817 	int ret_insn[MAX_CALL_FRAMES];
3818 	int ret_prog[MAX_CALL_FRAMES];
3819 	int j;
3820 
3821 process_func:
3822 	/* protect against potential stack overflow that might happen when
3823 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3824 	 * depth for such case down to 256 so that the worst case scenario
3825 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
3826 	 * 8k).
3827 	 *
3828 	 * To get the idea what might happen, see an example:
3829 	 * func1 -> sub rsp, 128
3830 	 *  subfunc1 -> sub rsp, 256
3831 	 *  tailcall1 -> add rsp, 256
3832 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3833 	 *   subfunc2 -> sub rsp, 64
3834 	 *   subfunc22 -> sub rsp, 128
3835 	 *   tailcall2 -> add rsp, 128
3836 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3837 	 *
3838 	 * tailcall will unwind the current stack frame but it will not get rid
3839 	 * of caller's stack as shown on the example above.
3840 	 */
3841 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
3842 		verbose(env,
3843 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3844 			depth);
3845 		return -EACCES;
3846 	}
3847 	/* round up to 32-bytes, since this is granularity
3848 	 * of interpreter stack size
3849 	 */
3850 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3851 	if (depth > MAX_BPF_STACK) {
3852 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
3853 			frame + 1, depth);
3854 		return -EACCES;
3855 	}
3856 continue_func:
3857 	subprog_end = subprog[idx + 1].start;
3858 	for (; i < subprog_end; i++) {
3859 		int next_insn;
3860 
3861 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
3862 			continue;
3863 		/* remember insn and function to return to */
3864 		ret_insn[frame] = i + 1;
3865 		ret_prog[frame] = idx;
3866 
3867 		/* find the callee */
3868 		next_insn = i + insn[i].imm + 1;
3869 		idx = find_subprog(env, next_insn);
3870 		if (idx < 0) {
3871 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3872 				  next_insn);
3873 			return -EFAULT;
3874 		}
3875 		if (subprog[idx].is_async_cb) {
3876 			if (subprog[idx].has_tail_call) {
3877 				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
3878 				return -EFAULT;
3879 			}
3880 			 /* async callbacks don't increase bpf prog stack size */
3881 			continue;
3882 		}
3883 		i = next_insn;
3884 
3885 		if (subprog[idx].has_tail_call)
3886 			tail_call_reachable = true;
3887 
3888 		frame++;
3889 		if (frame >= MAX_CALL_FRAMES) {
3890 			verbose(env, "the call stack of %d frames is too deep !\n",
3891 				frame);
3892 			return -E2BIG;
3893 		}
3894 		goto process_func;
3895 	}
3896 	/* if tail call got detected across bpf2bpf calls then mark each of the
3897 	 * currently present subprog frames as tail call reachable subprogs;
3898 	 * this info will be utilized by JIT so that we will be preserving the
3899 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
3900 	 */
3901 	if (tail_call_reachable)
3902 		for (j = 0; j < frame; j++)
3903 			subprog[ret_prog[j]].tail_call_reachable = true;
3904 	if (subprog[0].tail_call_reachable)
3905 		env->prog->aux->tail_call_reachable = true;
3906 
3907 	/* end of for() loop means the last insn of the 'subprog'
3908 	 * was reached. Doesn't matter whether it was JA or EXIT
3909 	 */
3910 	if (frame == 0)
3911 		return 0;
3912 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3913 	frame--;
3914 	i = ret_insn[frame];
3915 	idx = ret_prog[frame];
3916 	goto continue_func;
3917 }
3918 
3919 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3920 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3921 				  const struct bpf_insn *insn, int idx)
3922 {
3923 	int start = idx + insn->imm + 1, subprog;
3924 
3925 	subprog = find_subprog(env, start);
3926 	if (subprog < 0) {
3927 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3928 			  start);
3929 		return -EFAULT;
3930 	}
3931 	return env->subprog_info[subprog].stack_depth;
3932 }
3933 #endif
3934 
3935 int check_ctx_reg(struct bpf_verifier_env *env,
3936 		  const struct bpf_reg_state *reg, int regno)
3937 {
3938 	/* Access to ctx or passing it to a helper is only allowed in
3939 	 * its original, unmodified form.
3940 	 */
3941 
3942 	if (reg->off) {
3943 		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3944 			regno, reg->off);
3945 		return -EACCES;
3946 	}
3947 
3948 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3949 		char tn_buf[48];
3950 
3951 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3952 		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3953 		return -EACCES;
3954 	}
3955 
3956 	return 0;
3957 }
3958 
3959 static int __check_buffer_access(struct bpf_verifier_env *env,
3960 				 const char *buf_info,
3961 				 const struct bpf_reg_state *reg,
3962 				 int regno, int off, int size)
3963 {
3964 	if (off < 0) {
3965 		verbose(env,
3966 			"R%d invalid %s buffer access: off=%d, size=%d\n",
3967 			regno, buf_info, off, size);
3968 		return -EACCES;
3969 	}
3970 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3971 		char tn_buf[48];
3972 
3973 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3974 		verbose(env,
3975 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3976 			regno, off, tn_buf);
3977 		return -EACCES;
3978 	}
3979 
3980 	return 0;
3981 }
3982 
3983 static int check_tp_buffer_access(struct bpf_verifier_env *env,
3984 				  const struct bpf_reg_state *reg,
3985 				  int regno, int off, int size)
3986 {
3987 	int err;
3988 
3989 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3990 	if (err)
3991 		return err;
3992 
3993 	if (off + size > env->prog->aux->max_tp_access)
3994 		env->prog->aux->max_tp_access = off + size;
3995 
3996 	return 0;
3997 }
3998 
3999 static int check_buffer_access(struct bpf_verifier_env *env,
4000 			       const struct bpf_reg_state *reg,
4001 			       int regno, int off, int size,
4002 			       bool zero_size_allowed,
4003 			       const char *buf_info,
4004 			       u32 *max_access)
4005 {
4006 	int err;
4007 
4008 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4009 	if (err)
4010 		return err;
4011 
4012 	if (off + size > *max_access)
4013 		*max_access = off + size;
4014 
4015 	return 0;
4016 }
4017 
4018 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4019 static void zext_32_to_64(struct bpf_reg_state *reg)
4020 {
4021 	reg->var_off = tnum_subreg(reg->var_off);
4022 	__reg_assign_32_into_64(reg);
4023 }
4024 
4025 /* truncate register to smaller size (in bytes)
4026  * must be called with size < BPF_REG_SIZE
4027  */
4028 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4029 {
4030 	u64 mask;
4031 
4032 	/* clear high bits in bit representation */
4033 	reg->var_off = tnum_cast(reg->var_off, size);
4034 
4035 	/* fix arithmetic bounds */
4036 	mask = ((u64)1 << (size * 8)) - 1;
4037 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4038 		reg->umin_value &= mask;
4039 		reg->umax_value &= mask;
4040 	} else {
4041 		reg->umin_value = 0;
4042 		reg->umax_value = mask;
4043 	}
4044 	reg->smin_value = reg->umin_value;
4045 	reg->smax_value = reg->umax_value;
4046 
4047 	/* If size is smaller than 32bit register the 32bit register
4048 	 * values are also truncated so we push 64-bit bounds into
4049 	 * 32-bit bounds. Above were truncated < 32-bits already.
4050 	 */
4051 	if (size >= 4)
4052 		return;
4053 	__reg_combine_64_into_32(reg);
4054 }
4055 
4056 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4057 {
4058 	return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
4059 }
4060 
4061 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4062 {
4063 	void *ptr;
4064 	u64 addr;
4065 	int err;
4066 
4067 	err = map->ops->map_direct_value_addr(map, &addr, off);
4068 	if (err)
4069 		return err;
4070 	ptr = (void *)(long)addr + off;
4071 
4072 	switch (size) {
4073 	case sizeof(u8):
4074 		*val = (u64)*(u8 *)ptr;
4075 		break;
4076 	case sizeof(u16):
4077 		*val = (u64)*(u16 *)ptr;
4078 		break;
4079 	case sizeof(u32):
4080 		*val = (u64)*(u32 *)ptr;
4081 		break;
4082 	case sizeof(u64):
4083 		*val = *(u64 *)ptr;
4084 		break;
4085 	default:
4086 		return -EINVAL;
4087 	}
4088 	return 0;
4089 }
4090 
4091 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4092 				   struct bpf_reg_state *regs,
4093 				   int regno, int off, int size,
4094 				   enum bpf_access_type atype,
4095 				   int value_regno)
4096 {
4097 	struct bpf_reg_state *reg = regs + regno;
4098 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4099 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4100 	u32 btf_id;
4101 	int ret;
4102 
4103 	if (off < 0) {
4104 		verbose(env,
4105 			"R%d is ptr_%s invalid negative access: off=%d\n",
4106 			regno, tname, off);
4107 		return -EACCES;
4108 	}
4109 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4110 		char tn_buf[48];
4111 
4112 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4113 		verbose(env,
4114 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4115 			regno, tname, off, tn_buf);
4116 		return -EACCES;
4117 	}
4118 
4119 	if (env->ops->btf_struct_access) {
4120 		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4121 						  off, size, atype, &btf_id);
4122 	} else {
4123 		if (atype != BPF_READ) {
4124 			verbose(env, "only read is supported\n");
4125 			return -EACCES;
4126 		}
4127 
4128 		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4129 					atype, &btf_id);
4130 	}
4131 
4132 	if (ret < 0)
4133 		return ret;
4134 
4135 	if (atype == BPF_READ && value_regno >= 0)
4136 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
4137 
4138 	return 0;
4139 }
4140 
4141 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4142 				   struct bpf_reg_state *regs,
4143 				   int regno, int off, int size,
4144 				   enum bpf_access_type atype,
4145 				   int value_regno)
4146 {
4147 	struct bpf_reg_state *reg = regs + regno;
4148 	struct bpf_map *map = reg->map_ptr;
4149 	const struct btf_type *t;
4150 	const char *tname;
4151 	u32 btf_id;
4152 	int ret;
4153 
4154 	if (!btf_vmlinux) {
4155 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4156 		return -ENOTSUPP;
4157 	}
4158 
4159 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4160 		verbose(env, "map_ptr access not supported for map type %d\n",
4161 			map->map_type);
4162 		return -ENOTSUPP;
4163 	}
4164 
4165 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4166 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4167 
4168 	if (!env->allow_ptr_to_map_access) {
4169 		verbose(env,
4170 			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4171 			tname);
4172 		return -EPERM;
4173 	}
4174 
4175 	if (off < 0) {
4176 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
4177 			regno, tname, off);
4178 		return -EACCES;
4179 	}
4180 
4181 	if (atype != BPF_READ) {
4182 		verbose(env, "only read from %s is supported\n", tname);
4183 		return -EACCES;
4184 	}
4185 
4186 	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
4187 	if (ret < 0)
4188 		return ret;
4189 
4190 	if (value_regno >= 0)
4191 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
4192 
4193 	return 0;
4194 }
4195 
4196 /* Check that the stack access at the given offset is within bounds. The
4197  * maximum valid offset is -1.
4198  *
4199  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4200  * -state->allocated_stack for reads.
4201  */
4202 static int check_stack_slot_within_bounds(int off,
4203 					  struct bpf_func_state *state,
4204 					  enum bpf_access_type t)
4205 {
4206 	int min_valid_off;
4207 
4208 	if (t == BPF_WRITE)
4209 		min_valid_off = -MAX_BPF_STACK;
4210 	else
4211 		min_valid_off = -state->allocated_stack;
4212 
4213 	if (off < min_valid_off || off > -1)
4214 		return -EACCES;
4215 	return 0;
4216 }
4217 
4218 /* Check that the stack access at 'regno + off' falls within the maximum stack
4219  * bounds.
4220  *
4221  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4222  */
4223 static int check_stack_access_within_bounds(
4224 		struct bpf_verifier_env *env,
4225 		int regno, int off, int access_size,
4226 		enum stack_access_src src, enum bpf_access_type type)
4227 {
4228 	struct bpf_reg_state *regs = cur_regs(env);
4229 	struct bpf_reg_state *reg = regs + regno;
4230 	struct bpf_func_state *state = func(env, reg);
4231 	int min_off, max_off;
4232 	int err;
4233 	char *err_extra;
4234 
4235 	if (src == ACCESS_HELPER)
4236 		/* We don't know if helpers are reading or writing (or both). */
4237 		err_extra = " indirect access to";
4238 	else if (type == BPF_READ)
4239 		err_extra = " read from";
4240 	else
4241 		err_extra = " write to";
4242 
4243 	if (tnum_is_const(reg->var_off)) {
4244 		min_off = reg->var_off.value + off;
4245 		if (access_size > 0)
4246 			max_off = min_off + access_size - 1;
4247 		else
4248 			max_off = min_off;
4249 	} else {
4250 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4251 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
4252 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4253 				err_extra, regno);
4254 			return -EACCES;
4255 		}
4256 		min_off = reg->smin_value + off;
4257 		if (access_size > 0)
4258 			max_off = reg->smax_value + off + access_size - 1;
4259 		else
4260 			max_off = min_off;
4261 	}
4262 
4263 	err = check_stack_slot_within_bounds(min_off, state, type);
4264 	if (!err)
4265 		err = check_stack_slot_within_bounds(max_off, state, type);
4266 
4267 	if (err) {
4268 		if (tnum_is_const(reg->var_off)) {
4269 			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4270 				err_extra, regno, off, access_size);
4271 		} else {
4272 			char tn_buf[48];
4273 
4274 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4275 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4276 				err_extra, regno, tn_buf, access_size);
4277 		}
4278 	}
4279 	return err;
4280 }
4281 
4282 /* check whether memory at (regno + off) is accessible for t = (read | write)
4283  * if t==write, value_regno is a register which value is stored into memory
4284  * if t==read, value_regno is a register which will receive the value from memory
4285  * if t==write && value_regno==-1, some unknown value is stored into memory
4286  * if t==read && value_regno==-1, don't care what we read from memory
4287  */
4288 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4289 			    int off, int bpf_size, enum bpf_access_type t,
4290 			    int value_regno, bool strict_alignment_once)
4291 {
4292 	struct bpf_reg_state *regs = cur_regs(env);
4293 	struct bpf_reg_state *reg = regs + regno;
4294 	struct bpf_func_state *state;
4295 	int size, err = 0;
4296 
4297 	size = bpf_size_to_bytes(bpf_size);
4298 	if (size < 0)
4299 		return size;
4300 
4301 	/* alignment checks will add in reg->off themselves */
4302 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4303 	if (err)
4304 		return err;
4305 
4306 	/* for access checks, reg->off is just part of off */
4307 	off += reg->off;
4308 
4309 	if (reg->type == PTR_TO_MAP_KEY) {
4310 		if (t == BPF_WRITE) {
4311 			verbose(env, "write to change key R%d not allowed\n", regno);
4312 			return -EACCES;
4313 		}
4314 
4315 		err = check_mem_region_access(env, regno, off, size,
4316 					      reg->map_ptr->key_size, false);
4317 		if (err)
4318 			return err;
4319 		if (value_regno >= 0)
4320 			mark_reg_unknown(env, regs, value_regno);
4321 	} else if (reg->type == PTR_TO_MAP_VALUE) {
4322 		if (t == BPF_WRITE && value_regno >= 0 &&
4323 		    is_pointer_value(env, value_regno)) {
4324 			verbose(env, "R%d leaks addr into map\n", value_regno);
4325 			return -EACCES;
4326 		}
4327 		err = check_map_access_type(env, regno, off, size, t);
4328 		if (err)
4329 			return err;
4330 		err = check_map_access(env, regno, off, size, false);
4331 		if (!err && t == BPF_READ && value_regno >= 0) {
4332 			struct bpf_map *map = reg->map_ptr;
4333 
4334 			/* if map is read-only, track its contents as scalars */
4335 			if (tnum_is_const(reg->var_off) &&
4336 			    bpf_map_is_rdonly(map) &&
4337 			    map->ops->map_direct_value_addr) {
4338 				int map_off = off + reg->var_off.value;
4339 				u64 val = 0;
4340 
4341 				err = bpf_map_direct_read(map, map_off, size,
4342 							  &val);
4343 				if (err)
4344 					return err;
4345 
4346 				regs[value_regno].type = SCALAR_VALUE;
4347 				__mark_reg_known(&regs[value_regno], val);
4348 			} else {
4349 				mark_reg_unknown(env, regs, value_regno);
4350 			}
4351 		}
4352 	} else if (reg->type == PTR_TO_MEM) {
4353 		if (t == BPF_WRITE && value_regno >= 0 &&
4354 		    is_pointer_value(env, value_regno)) {
4355 			verbose(env, "R%d leaks addr into mem\n", value_regno);
4356 			return -EACCES;
4357 		}
4358 		err = check_mem_region_access(env, regno, off, size,
4359 					      reg->mem_size, false);
4360 		if (!err && t == BPF_READ && value_regno >= 0)
4361 			mark_reg_unknown(env, regs, value_regno);
4362 	} else if (reg->type == PTR_TO_CTX) {
4363 		enum bpf_reg_type reg_type = SCALAR_VALUE;
4364 		struct btf *btf = NULL;
4365 		u32 btf_id = 0;
4366 
4367 		if (t == BPF_WRITE && value_regno >= 0 &&
4368 		    is_pointer_value(env, value_regno)) {
4369 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
4370 			return -EACCES;
4371 		}
4372 
4373 		err = check_ctx_reg(env, reg, regno);
4374 		if (err < 0)
4375 			return err;
4376 
4377 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
4378 		if (err)
4379 			verbose_linfo(env, insn_idx, "; ");
4380 		if (!err && t == BPF_READ && value_regno >= 0) {
4381 			/* ctx access returns either a scalar, or a
4382 			 * PTR_TO_PACKET[_META,_END]. In the latter
4383 			 * case, we know the offset is zero.
4384 			 */
4385 			if (reg_type == SCALAR_VALUE) {
4386 				mark_reg_unknown(env, regs, value_regno);
4387 			} else {
4388 				mark_reg_known_zero(env, regs,
4389 						    value_regno);
4390 				if (reg_type_may_be_null(reg_type))
4391 					regs[value_regno].id = ++env->id_gen;
4392 				/* A load of ctx field could have different
4393 				 * actual load size with the one encoded in the
4394 				 * insn. When the dst is PTR, it is for sure not
4395 				 * a sub-register.
4396 				 */
4397 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4398 				if (reg_type == PTR_TO_BTF_ID ||
4399 				    reg_type == PTR_TO_BTF_ID_OR_NULL) {
4400 					regs[value_regno].btf = btf;
4401 					regs[value_regno].btf_id = btf_id;
4402 				}
4403 			}
4404 			regs[value_regno].type = reg_type;
4405 		}
4406 
4407 	} else if (reg->type == PTR_TO_STACK) {
4408 		/* Basic bounds checks. */
4409 		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4410 		if (err)
4411 			return err;
4412 
4413 		state = func(env, reg);
4414 		err = update_stack_depth(env, state, off);
4415 		if (err)
4416 			return err;
4417 
4418 		if (t == BPF_READ)
4419 			err = check_stack_read(env, regno, off, size,
4420 					       value_regno);
4421 		else
4422 			err = check_stack_write(env, regno, off, size,
4423 						value_regno, insn_idx);
4424 	} else if (reg_is_pkt_pointer(reg)) {
4425 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4426 			verbose(env, "cannot write into packet\n");
4427 			return -EACCES;
4428 		}
4429 		if (t == BPF_WRITE && value_regno >= 0 &&
4430 		    is_pointer_value(env, value_regno)) {
4431 			verbose(env, "R%d leaks addr into packet\n",
4432 				value_regno);
4433 			return -EACCES;
4434 		}
4435 		err = check_packet_access(env, regno, off, size, false);
4436 		if (!err && t == BPF_READ && value_regno >= 0)
4437 			mark_reg_unknown(env, regs, value_regno);
4438 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
4439 		if (t == BPF_WRITE && value_regno >= 0 &&
4440 		    is_pointer_value(env, value_regno)) {
4441 			verbose(env, "R%d leaks addr into flow keys\n",
4442 				value_regno);
4443 			return -EACCES;
4444 		}
4445 
4446 		err = check_flow_keys_access(env, off, size);
4447 		if (!err && t == BPF_READ && value_regno >= 0)
4448 			mark_reg_unknown(env, regs, value_regno);
4449 	} else if (type_is_sk_pointer(reg->type)) {
4450 		if (t == BPF_WRITE) {
4451 			verbose(env, "R%d cannot write into %s\n",
4452 				regno, reg_type_str[reg->type]);
4453 			return -EACCES;
4454 		}
4455 		err = check_sock_access(env, insn_idx, regno, off, size, t);
4456 		if (!err && value_regno >= 0)
4457 			mark_reg_unknown(env, regs, value_regno);
4458 	} else if (reg->type == PTR_TO_TP_BUFFER) {
4459 		err = check_tp_buffer_access(env, reg, regno, off, size);
4460 		if (!err && t == BPF_READ && value_regno >= 0)
4461 			mark_reg_unknown(env, regs, value_regno);
4462 	} else if (reg->type == PTR_TO_BTF_ID) {
4463 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4464 					      value_regno);
4465 	} else if (reg->type == CONST_PTR_TO_MAP) {
4466 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4467 					      value_regno);
4468 	} else if (reg->type == PTR_TO_RDONLY_BUF) {
4469 		if (t == BPF_WRITE) {
4470 			verbose(env, "R%d cannot write into %s\n",
4471 				regno, reg_type_str[reg->type]);
4472 			return -EACCES;
4473 		}
4474 		err = check_buffer_access(env, reg, regno, off, size, false,
4475 					  "rdonly",
4476 					  &env->prog->aux->max_rdonly_access);
4477 		if (!err && value_regno >= 0)
4478 			mark_reg_unknown(env, regs, value_regno);
4479 	} else if (reg->type == PTR_TO_RDWR_BUF) {
4480 		err = check_buffer_access(env, reg, regno, off, size, false,
4481 					  "rdwr",
4482 					  &env->prog->aux->max_rdwr_access);
4483 		if (!err && t == BPF_READ && value_regno >= 0)
4484 			mark_reg_unknown(env, regs, value_regno);
4485 	} else {
4486 		verbose(env, "R%d invalid mem access '%s'\n", regno,
4487 			reg_type_str[reg->type]);
4488 		return -EACCES;
4489 	}
4490 
4491 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4492 	    regs[value_regno].type == SCALAR_VALUE) {
4493 		/* b/h/w load zero-extends, mark upper bits as known 0 */
4494 		coerce_reg_to_size(&regs[value_regno], size);
4495 	}
4496 	return err;
4497 }
4498 
4499 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4500 {
4501 	int load_reg;
4502 	int err;
4503 
4504 	switch (insn->imm) {
4505 	case BPF_ADD:
4506 	case BPF_ADD | BPF_FETCH:
4507 	case BPF_AND:
4508 	case BPF_AND | BPF_FETCH:
4509 	case BPF_OR:
4510 	case BPF_OR | BPF_FETCH:
4511 	case BPF_XOR:
4512 	case BPF_XOR | BPF_FETCH:
4513 	case BPF_XCHG:
4514 	case BPF_CMPXCHG:
4515 		break;
4516 	default:
4517 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4518 		return -EINVAL;
4519 	}
4520 
4521 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4522 		verbose(env, "invalid atomic operand size\n");
4523 		return -EINVAL;
4524 	}
4525 
4526 	/* check src1 operand */
4527 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
4528 	if (err)
4529 		return err;
4530 
4531 	/* check src2 operand */
4532 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4533 	if (err)
4534 		return err;
4535 
4536 	if (insn->imm == BPF_CMPXCHG) {
4537 		/* Check comparison of R0 with memory location */
4538 		err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4539 		if (err)
4540 			return err;
4541 	}
4542 
4543 	if (is_pointer_value(env, insn->src_reg)) {
4544 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4545 		return -EACCES;
4546 	}
4547 
4548 	if (is_ctx_reg(env, insn->dst_reg) ||
4549 	    is_pkt_reg(env, insn->dst_reg) ||
4550 	    is_flow_key_reg(env, insn->dst_reg) ||
4551 	    is_sk_reg(env, insn->dst_reg)) {
4552 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4553 			insn->dst_reg,
4554 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
4555 		return -EACCES;
4556 	}
4557 
4558 	if (insn->imm & BPF_FETCH) {
4559 		if (insn->imm == BPF_CMPXCHG)
4560 			load_reg = BPF_REG_0;
4561 		else
4562 			load_reg = insn->src_reg;
4563 
4564 		/* check and record load of old value */
4565 		err = check_reg_arg(env, load_reg, DST_OP);
4566 		if (err)
4567 			return err;
4568 	} else {
4569 		/* This instruction accesses a memory location but doesn't
4570 		 * actually load it into a register.
4571 		 */
4572 		load_reg = -1;
4573 	}
4574 
4575 	/* check whether we can read the memory */
4576 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4577 			       BPF_SIZE(insn->code), BPF_READ, load_reg, true);
4578 	if (err)
4579 		return err;
4580 
4581 	/* check whether we can write into the same memory */
4582 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4583 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4584 	if (err)
4585 		return err;
4586 
4587 	return 0;
4588 }
4589 
4590 /* When register 'regno' is used to read the stack (either directly or through
4591  * a helper function) make sure that it's within stack boundary and, depending
4592  * on the access type, that all elements of the stack are initialized.
4593  *
4594  * 'off' includes 'regno->off', but not its dynamic part (if any).
4595  *
4596  * All registers that have been spilled on the stack in the slots within the
4597  * read offsets are marked as read.
4598  */
4599 static int check_stack_range_initialized(
4600 		struct bpf_verifier_env *env, int regno, int off,
4601 		int access_size, bool zero_size_allowed,
4602 		enum stack_access_src type, struct bpf_call_arg_meta *meta)
4603 {
4604 	struct bpf_reg_state *reg = reg_state(env, regno);
4605 	struct bpf_func_state *state = func(env, reg);
4606 	int err, min_off, max_off, i, j, slot, spi;
4607 	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4608 	enum bpf_access_type bounds_check_type;
4609 	/* Some accesses can write anything into the stack, others are
4610 	 * read-only.
4611 	 */
4612 	bool clobber = false;
4613 
4614 	if (access_size == 0 && !zero_size_allowed) {
4615 		verbose(env, "invalid zero-sized read\n");
4616 		return -EACCES;
4617 	}
4618 
4619 	if (type == ACCESS_HELPER) {
4620 		/* The bounds checks for writes are more permissive than for
4621 		 * reads. However, if raw_mode is not set, we'll do extra
4622 		 * checks below.
4623 		 */
4624 		bounds_check_type = BPF_WRITE;
4625 		clobber = true;
4626 	} else {
4627 		bounds_check_type = BPF_READ;
4628 	}
4629 	err = check_stack_access_within_bounds(env, regno, off, access_size,
4630 					       type, bounds_check_type);
4631 	if (err)
4632 		return err;
4633 
4634 
4635 	if (tnum_is_const(reg->var_off)) {
4636 		min_off = max_off = reg->var_off.value + off;
4637 	} else {
4638 		/* Variable offset is prohibited for unprivileged mode for
4639 		 * simplicity since it requires corresponding support in
4640 		 * Spectre masking for stack ALU.
4641 		 * See also retrieve_ptr_limit().
4642 		 */
4643 		if (!env->bypass_spec_v1) {
4644 			char tn_buf[48];
4645 
4646 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4647 			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4648 				regno, err_extra, tn_buf);
4649 			return -EACCES;
4650 		}
4651 		/* Only initialized buffer on stack is allowed to be accessed
4652 		 * with variable offset. With uninitialized buffer it's hard to
4653 		 * guarantee that whole memory is marked as initialized on
4654 		 * helper return since specific bounds are unknown what may
4655 		 * cause uninitialized stack leaking.
4656 		 */
4657 		if (meta && meta->raw_mode)
4658 			meta = NULL;
4659 
4660 		min_off = reg->smin_value + off;
4661 		max_off = reg->smax_value + off;
4662 	}
4663 
4664 	if (meta && meta->raw_mode) {
4665 		meta->access_size = access_size;
4666 		meta->regno = regno;
4667 		return 0;
4668 	}
4669 
4670 	for (i = min_off; i < max_off + access_size; i++) {
4671 		u8 *stype;
4672 
4673 		slot = -i - 1;
4674 		spi = slot / BPF_REG_SIZE;
4675 		if (state->allocated_stack <= slot)
4676 			goto err;
4677 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4678 		if (*stype == STACK_MISC)
4679 			goto mark;
4680 		if (*stype == STACK_ZERO) {
4681 			if (clobber) {
4682 				/* helper can write anything into the stack */
4683 				*stype = STACK_MISC;
4684 			}
4685 			goto mark;
4686 		}
4687 
4688 		if (is_spilled_reg(&state->stack[spi]) &&
4689 		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4690 			goto mark;
4691 
4692 		if (is_spilled_reg(&state->stack[spi]) &&
4693 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4694 		     env->allow_ptr_leaks)) {
4695 			if (clobber) {
4696 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4697 				for (j = 0; j < BPF_REG_SIZE; j++)
4698 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
4699 			}
4700 			goto mark;
4701 		}
4702 
4703 err:
4704 		if (tnum_is_const(reg->var_off)) {
4705 			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4706 				err_extra, regno, min_off, i - min_off, access_size);
4707 		} else {
4708 			char tn_buf[48];
4709 
4710 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4711 			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4712 				err_extra, regno, tn_buf, i - min_off, access_size);
4713 		}
4714 		return -EACCES;
4715 mark:
4716 		/* reading any byte out of 8-byte 'spill_slot' will cause
4717 		 * the whole slot to be marked as 'read'
4718 		 */
4719 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
4720 			      state->stack[spi].spilled_ptr.parent,
4721 			      REG_LIVE_READ64);
4722 	}
4723 	return update_stack_depth(env, state, min_off);
4724 }
4725 
4726 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4727 				   int access_size, bool zero_size_allowed,
4728 				   struct bpf_call_arg_meta *meta)
4729 {
4730 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4731 
4732 	switch (reg->type) {
4733 	case PTR_TO_PACKET:
4734 	case PTR_TO_PACKET_META:
4735 		return check_packet_access(env, regno, reg->off, access_size,
4736 					   zero_size_allowed);
4737 	case PTR_TO_MAP_KEY:
4738 		return check_mem_region_access(env, regno, reg->off, access_size,
4739 					       reg->map_ptr->key_size, false);
4740 	case PTR_TO_MAP_VALUE:
4741 		if (check_map_access_type(env, regno, reg->off, access_size,
4742 					  meta && meta->raw_mode ? BPF_WRITE :
4743 					  BPF_READ))
4744 			return -EACCES;
4745 		return check_map_access(env, regno, reg->off, access_size,
4746 					zero_size_allowed);
4747 	case PTR_TO_MEM:
4748 		return check_mem_region_access(env, regno, reg->off,
4749 					       access_size, reg->mem_size,
4750 					       zero_size_allowed);
4751 	case PTR_TO_RDONLY_BUF:
4752 		if (meta && meta->raw_mode)
4753 			return -EACCES;
4754 		return check_buffer_access(env, reg, regno, reg->off,
4755 					   access_size, zero_size_allowed,
4756 					   "rdonly",
4757 					   &env->prog->aux->max_rdonly_access);
4758 	case PTR_TO_RDWR_BUF:
4759 		return check_buffer_access(env, reg, regno, reg->off,
4760 					   access_size, zero_size_allowed,
4761 					   "rdwr",
4762 					   &env->prog->aux->max_rdwr_access);
4763 	case PTR_TO_STACK:
4764 		return check_stack_range_initialized(
4765 				env,
4766 				regno, reg->off, access_size,
4767 				zero_size_allowed, ACCESS_HELPER, meta);
4768 	default: /* scalar_value or invalid ptr */
4769 		/* Allow zero-byte read from NULL, regardless of pointer type */
4770 		if (zero_size_allowed && access_size == 0 &&
4771 		    register_is_null(reg))
4772 			return 0;
4773 
4774 		verbose(env, "R%d type=%s expected=%s\n", regno,
4775 			reg_type_str[reg->type],
4776 			reg_type_str[PTR_TO_STACK]);
4777 		return -EACCES;
4778 	}
4779 }
4780 
4781 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4782 		   u32 regno, u32 mem_size)
4783 {
4784 	if (register_is_null(reg))
4785 		return 0;
4786 
4787 	if (reg_type_may_be_null(reg->type)) {
4788 		/* Assuming that the register contains a value check if the memory
4789 		 * access is safe. Temporarily save and restore the register's state as
4790 		 * the conversion shouldn't be visible to a caller.
4791 		 */
4792 		const struct bpf_reg_state saved_reg = *reg;
4793 		int rv;
4794 
4795 		mark_ptr_not_null_reg(reg);
4796 		rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
4797 		*reg = saved_reg;
4798 		return rv;
4799 	}
4800 
4801 	return check_helper_mem_access(env, regno, mem_size, true, NULL);
4802 }
4803 
4804 /* Implementation details:
4805  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4806  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4807  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4808  * value_or_null->value transition, since the verifier only cares about
4809  * the range of access to valid map value pointer and doesn't care about actual
4810  * address of the map element.
4811  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4812  * reg->id > 0 after value_or_null->value transition. By doing so
4813  * two bpf_map_lookups will be considered two different pointers that
4814  * point to different bpf_spin_locks.
4815  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4816  * dead-locks.
4817  * Since only one bpf_spin_lock is allowed the checks are simpler than
4818  * reg_is_refcounted() logic. The verifier needs to remember only
4819  * one spin_lock instead of array of acquired_refs.
4820  * cur_state->active_spin_lock remembers which map value element got locked
4821  * and clears it after bpf_spin_unlock.
4822  */
4823 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4824 			     bool is_lock)
4825 {
4826 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4827 	struct bpf_verifier_state *cur = env->cur_state;
4828 	bool is_const = tnum_is_const(reg->var_off);
4829 	struct bpf_map *map = reg->map_ptr;
4830 	u64 val = reg->var_off.value;
4831 
4832 	if (!is_const) {
4833 		verbose(env,
4834 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4835 			regno);
4836 		return -EINVAL;
4837 	}
4838 	if (!map->btf) {
4839 		verbose(env,
4840 			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
4841 			map->name);
4842 		return -EINVAL;
4843 	}
4844 	if (!map_value_has_spin_lock(map)) {
4845 		if (map->spin_lock_off == -E2BIG)
4846 			verbose(env,
4847 				"map '%s' has more than one 'struct bpf_spin_lock'\n",
4848 				map->name);
4849 		else if (map->spin_lock_off == -ENOENT)
4850 			verbose(env,
4851 				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
4852 				map->name);
4853 		else
4854 			verbose(env,
4855 				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4856 				map->name);
4857 		return -EINVAL;
4858 	}
4859 	if (map->spin_lock_off != val + reg->off) {
4860 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4861 			val + reg->off);
4862 		return -EINVAL;
4863 	}
4864 	if (is_lock) {
4865 		if (cur->active_spin_lock) {
4866 			verbose(env,
4867 				"Locking two bpf_spin_locks are not allowed\n");
4868 			return -EINVAL;
4869 		}
4870 		cur->active_spin_lock = reg->id;
4871 	} else {
4872 		if (!cur->active_spin_lock) {
4873 			verbose(env, "bpf_spin_unlock without taking a lock\n");
4874 			return -EINVAL;
4875 		}
4876 		if (cur->active_spin_lock != reg->id) {
4877 			verbose(env, "bpf_spin_unlock of different lock\n");
4878 			return -EINVAL;
4879 		}
4880 		cur->active_spin_lock = 0;
4881 	}
4882 	return 0;
4883 }
4884 
4885 static int process_timer_func(struct bpf_verifier_env *env, int regno,
4886 			      struct bpf_call_arg_meta *meta)
4887 {
4888 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4889 	bool is_const = tnum_is_const(reg->var_off);
4890 	struct bpf_map *map = reg->map_ptr;
4891 	u64 val = reg->var_off.value;
4892 
4893 	if (!is_const) {
4894 		verbose(env,
4895 			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
4896 			regno);
4897 		return -EINVAL;
4898 	}
4899 	if (!map->btf) {
4900 		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
4901 			map->name);
4902 		return -EINVAL;
4903 	}
4904 	if (!map_value_has_timer(map)) {
4905 		if (map->timer_off == -E2BIG)
4906 			verbose(env,
4907 				"map '%s' has more than one 'struct bpf_timer'\n",
4908 				map->name);
4909 		else if (map->timer_off == -ENOENT)
4910 			verbose(env,
4911 				"map '%s' doesn't have 'struct bpf_timer'\n",
4912 				map->name);
4913 		else
4914 			verbose(env,
4915 				"map '%s' is not a struct type or bpf_timer is mangled\n",
4916 				map->name);
4917 		return -EINVAL;
4918 	}
4919 	if (map->timer_off != val + reg->off) {
4920 		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
4921 			val + reg->off, map->timer_off);
4922 		return -EINVAL;
4923 	}
4924 	if (meta->map_ptr) {
4925 		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
4926 		return -EFAULT;
4927 	}
4928 	meta->map_uid = reg->map_uid;
4929 	meta->map_ptr = map;
4930 	return 0;
4931 }
4932 
4933 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4934 {
4935 	return type == ARG_PTR_TO_MEM ||
4936 	       type == ARG_PTR_TO_MEM_OR_NULL ||
4937 	       type == ARG_PTR_TO_UNINIT_MEM;
4938 }
4939 
4940 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4941 {
4942 	return type == ARG_CONST_SIZE ||
4943 	       type == ARG_CONST_SIZE_OR_ZERO;
4944 }
4945 
4946 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4947 {
4948 	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4949 }
4950 
4951 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4952 {
4953 	return type == ARG_PTR_TO_INT ||
4954 	       type == ARG_PTR_TO_LONG;
4955 }
4956 
4957 static int int_ptr_type_to_size(enum bpf_arg_type type)
4958 {
4959 	if (type == ARG_PTR_TO_INT)
4960 		return sizeof(u32);
4961 	else if (type == ARG_PTR_TO_LONG)
4962 		return sizeof(u64);
4963 
4964 	return -EINVAL;
4965 }
4966 
4967 static int resolve_map_arg_type(struct bpf_verifier_env *env,
4968 				 const struct bpf_call_arg_meta *meta,
4969 				 enum bpf_arg_type *arg_type)
4970 {
4971 	if (!meta->map_ptr) {
4972 		/* kernel subsystem misconfigured verifier */
4973 		verbose(env, "invalid map_ptr to access map->type\n");
4974 		return -EACCES;
4975 	}
4976 
4977 	switch (meta->map_ptr->map_type) {
4978 	case BPF_MAP_TYPE_SOCKMAP:
4979 	case BPF_MAP_TYPE_SOCKHASH:
4980 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4981 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4982 		} else {
4983 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
4984 			return -EINVAL;
4985 		}
4986 		break;
4987 	case BPF_MAP_TYPE_BLOOM_FILTER:
4988 		if (meta->func_id == BPF_FUNC_map_peek_elem)
4989 			*arg_type = ARG_PTR_TO_MAP_VALUE;
4990 		break;
4991 	default:
4992 		break;
4993 	}
4994 	return 0;
4995 }
4996 
4997 struct bpf_reg_types {
4998 	const enum bpf_reg_type types[10];
4999 	u32 *btf_id;
5000 };
5001 
5002 static const struct bpf_reg_types map_key_value_types = {
5003 	.types = {
5004 		PTR_TO_STACK,
5005 		PTR_TO_PACKET,
5006 		PTR_TO_PACKET_META,
5007 		PTR_TO_MAP_KEY,
5008 		PTR_TO_MAP_VALUE,
5009 	},
5010 };
5011 
5012 static const struct bpf_reg_types sock_types = {
5013 	.types = {
5014 		PTR_TO_SOCK_COMMON,
5015 		PTR_TO_SOCKET,
5016 		PTR_TO_TCP_SOCK,
5017 		PTR_TO_XDP_SOCK,
5018 	},
5019 };
5020 
5021 #ifdef CONFIG_NET
5022 static const struct bpf_reg_types btf_id_sock_common_types = {
5023 	.types = {
5024 		PTR_TO_SOCK_COMMON,
5025 		PTR_TO_SOCKET,
5026 		PTR_TO_TCP_SOCK,
5027 		PTR_TO_XDP_SOCK,
5028 		PTR_TO_BTF_ID,
5029 	},
5030 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5031 };
5032 #endif
5033 
5034 static const struct bpf_reg_types mem_types = {
5035 	.types = {
5036 		PTR_TO_STACK,
5037 		PTR_TO_PACKET,
5038 		PTR_TO_PACKET_META,
5039 		PTR_TO_MAP_KEY,
5040 		PTR_TO_MAP_VALUE,
5041 		PTR_TO_MEM,
5042 		PTR_TO_RDONLY_BUF,
5043 		PTR_TO_RDWR_BUF,
5044 	},
5045 };
5046 
5047 static const struct bpf_reg_types int_ptr_types = {
5048 	.types = {
5049 		PTR_TO_STACK,
5050 		PTR_TO_PACKET,
5051 		PTR_TO_PACKET_META,
5052 		PTR_TO_MAP_KEY,
5053 		PTR_TO_MAP_VALUE,
5054 	},
5055 };
5056 
5057 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5058 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5059 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5060 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
5061 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5062 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5063 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5064 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
5065 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5066 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5067 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5068 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5069 
5070 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5071 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
5072 	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
5073 	[ARG_PTR_TO_UNINIT_MAP_VALUE]	= &map_key_value_types,
5074 	[ARG_PTR_TO_MAP_VALUE_OR_NULL]	= &map_key_value_types,
5075 	[ARG_CONST_SIZE]		= &scalar_types,
5076 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
5077 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
5078 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
5079 	[ARG_PTR_TO_CTX]		= &context_types,
5080 	[ARG_PTR_TO_CTX_OR_NULL]	= &context_types,
5081 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
5082 #ifdef CONFIG_NET
5083 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
5084 #endif
5085 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
5086 	[ARG_PTR_TO_SOCKET_OR_NULL]	= &fullsock_types,
5087 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
5088 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
5089 	[ARG_PTR_TO_MEM]		= &mem_types,
5090 	[ARG_PTR_TO_MEM_OR_NULL]	= &mem_types,
5091 	[ARG_PTR_TO_UNINIT_MEM]		= &mem_types,
5092 	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
5093 	[ARG_PTR_TO_ALLOC_MEM_OR_NULL]	= &alloc_mem_types,
5094 	[ARG_PTR_TO_INT]		= &int_ptr_types,
5095 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
5096 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
5097 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
5098 	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
5099 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
5100 	[ARG_PTR_TO_TIMER]		= &timer_types,
5101 };
5102 
5103 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5104 			  enum bpf_arg_type arg_type,
5105 			  const u32 *arg_btf_id)
5106 {
5107 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5108 	enum bpf_reg_type expected, type = reg->type;
5109 	const struct bpf_reg_types *compatible;
5110 	int i, j;
5111 
5112 	compatible = compatible_reg_types[arg_type];
5113 	if (!compatible) {
5114 		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5115 		return -EFAULT;
5116 	}
5117 
5118 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5119 		expected = compatible->types[i];
5120 		if (expected == NOT_INIT)
5121 			break;
5122 
5123 		if (type == expected)
5124 			goto found;
5125 	}
5126 
5127 	verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
5128 	for (j = 0; j + 1 < i; j++)
5129 		verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
5130 	verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
5131 	return -EACCES;
5132 
5133 found:
5134 	if (type == PTR_TO_BTF_ID) {
5135 		if (!arg_btf_id) {
5136 			if (!compatible->btf_id) {
5137 				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5138 				return -EFAULT;
5139 			}
5140 			arg_btf_id = compatible->btf_id;
5141 		}
5142 
5143 		if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5144 					  btf_vmlinux, *arg_btf_id)) {
5145 			verbose(env, "R%d is of type %s but %s is expected\n",
5146 				regno, kernel_type_name(reg->btf, reg->btf_id),
5147 				kernel_type_name(btf_vmlinux, *arg_btf_id));
5148 			return -EACCES;
5149 		}
5150 
5151 		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5152 			verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
5153 				regno);
5154 			return -EACCES;
5155 		}
5156 	}
5157 
5158 	return 0;
5159 }
5160 
5161 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5162 			  struct bpf_call_arg_meta *meta,
5163 			  const struct bpf_func_proto *fn)
5164 {
5165 	u32 regno = BPF_REG_1 + arg;
5166 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5167 	enum bpf_arg_type arg_type = fn->arg_type[arg];
5168 	enum bpf_reg_type type = reg->type;
5169 	int err = 0;
5170 
5171 	if (arg_type == ARG_DONTCARE)
5172 		return 0;
5173 
5174 	err = check_reg_arg(env, regno, SRC_OP);
5175 	if (err)
5176 		return err;
5177 
5178 	if (arg_type == ARG_ANYTHING) {
5179 		if (is_pointer_value(env, regno)) {
5180 			verbose(env, "R%d leaks addr into helper function\n",
5181 				regno);
5182 			return -EACCES;
5183 		}
5184 		return 0;
5185 	}
5186 
5187 	if (type_is_pkt_pointer(type) &&
5188 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5189 		verbose(env, "helper access to the packet is not allowed\n");
5190 		return -EACCES;
5191 	}
5192 
5193 	if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5194 	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
5195 	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
5196 		err = resolve_map_arg_type(env, meta, &arg_type);
5197 		if (err)
5198 			return err;
5199 	}
5200 
5201 	if (register_is_null(reg) && arg_type_may_be_null(arg_type))
5202 		/* A NULL register has a SCALAR_VALUE type, so skip
5203 		 * type checking.
5204 		 */
5205 		goto skip_type_check;
5206 
5207 	err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5208 	if (err)
5209 		return err;
5210 
5211 	if (type == PTR_TO_CTX) {
5212 		err = check_ctx_reg(env, reg, regno);
5213 		if (err < 0)
5214 			return err;
5215 	}
5216 
5217 skip_type_check:
5218 	if (reg->ref_obj_id) {
5219 		if (meta->ref_obj_id) {
5220 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5221 				regno, reg->ref_obj_id,
5222 				meta->ref_obj_id);
5223 			return -EFAULT;
5224 		}
5225 		meta->ref_obj_id = reg->ref_obj_id;
5226 	}
5227 
5228 	if (arg_type == ARG_CONST_MAP_PTR) {
5229 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5230 		if (meta->map_ptr) {
5231 			/* Use map_uid (which is unique id of inner map) to reject:
5232 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5233 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5234 			 * if (inner_map1 && inner_map2) {
5235 			 *     timer = bpf_map_lookup_elem(inner_map1);
5236 			 *     if (timer)
5237 			 *         // mismatch would have been allowed
5238 			 *         bpf_timer_init(timer, inner_map2);
5239 			 * }
5240 			 *
5241 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
5242 			 */
5243 			if (meta->map_ptr != reg->map_ptr ||
5244 			    meta->map_uid != reg->map_uid) {
5245 				verbose(env,
5246 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5247 					meta->map_uid, reg->map_uid);
5248 				return -EINVAL;
5249 			}
5250 		}
5251 		meta->map_ptr = reg->map_ptr;
5252 		meta->map_uid = reg->map_uid;
5253 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5254 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
5255 		 * check that [key, key + map->key_size) are within
5256 		 * stack limits and initialized
5257 		 */
5258 		if (!meta->map_ptr) {
5259 			/* in function declaration map_ptr must come before
5260 			 * map_key, so that it's verified and known before
5261 			 * we have to check map_key here. Otherwise it means
5262 			 * that kernel subsystem misconfigured verifier
5263 			 */
5264 			verbose(env, "invalid map_ptr to access map->key\n");
5265 			return -EACCES;
5266 		}
5267 		err = check_helper_mem_access(env, regno,
5268 					      meta->map_ptr->key_size, false,
5269 					      NULL);
5270 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5271 		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
5272 		    !register_is_null(reg)) ||
5273 		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5274 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
5275 		 * check [value, value + map->value_size) validity
5276 		 */
5277 		if (!meta->map_ptr) {
5278 			/* kernel subsystem misconfigured verifier */
5279 			verbose(env, "invalid map_ptr to access map->value\n");
5280 			return -EACCES;
5281 		}
5282 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
5283 		err = check_helper_mem_access(env, regno,
5284 					      meta->map_ptr->value_size, false,
5285 					      meta);
5286 	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5287 		if (!reg->btf_id) {
5288 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5289 			return -EACCES;
5290 		}
5291 		meta->ret_btf = reg->btf;
5292 		meta->ret_btf_id = reg->btf_id;
5293 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5294 		if (meta->func_id == BPF_FUNC_spin_lock) {
5295 			if (process_spin_lock(env, regno, true))
5296 				return -EACCES;
5297 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
5298 			if (process_spin_lock(env, regno, false))
5299 				return -EACCES;
5300 		} else {
5301 			verbose(env, "verifier internal error\n");
5302 			return -EFAULT;
5303 		}
5304 	} else if (arg_type == ARG_PTR_TO_TIMER) {
5305 		if (process_timer_func(env, regno, meta))
5306 			return -EACCES;
5307 	} else if (arg_type == ARG_PTR_TO_FUNC) {
5308 		meta->subprogno = reg->subprogno;
5309 	} else if (arg_type_is_mem_ptr(arg_type)) {
5310 		/* The access to this pointer is only checked when we hit the
5311 		 * next is_mem_size argument below.
5312 		 */
5313 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
5314 	} else if (arg_type_is_mem_size(arg_type)) {
5315 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
5316 
5317 		/* This is used to refine r0 return value bounds for helpers
5318 		 * that enforce this value as an upper bound on return values.
5319 		 * See do_refine_retval_range() for helpers that can refine
5320 		 * the return value. C type of helper is u32 so we pull register
5321 		 * bound from umax_value however, if negative verifier errors
5322 		 * out. Only upper bounds can be learned because retval is an
5323 		 * int type and negative retvals are allowed.
5324 		 */
5325 		meta->msize_max_value = reg->umax_value;
5326 
5327 		/* The register is SCALAR_VALUE; the access check
5328 		 * happens using its boundaries.
5329 		 */
5330 		if (!tnum_is_const(reg->var_off))
5331 			/* For unprivileged variable accesses, disable raw
5332 			 * mode so that the program is required to
5333 			 * initialize all the memory that the helper could
5334 			 * just partially fill up.
5335 			 */
5336 			meta = NULL;
5337 
5338 		if (reg->smin_value < 0) {
5339 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5340 				regno);
5341 			return -EACCES;
5342 		}
5343 
5344 		if (reg->umin_value == 0) {
5345 			err = check_helper_mem_access(env, regno - 1, 0,
5346 						      zero_size_allowed,
5347 						      meta);
5348 			if (err)
5349 				return err;
5350 		}
5351 
5352 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5353 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5354 				regno);
5355 			return -EACCES;
5356 		}
5357 		err = check_helper_mem_access(env, regno - 1,
5358 					      reg->umax_value,
5359 					      zero_size_allowed, meta);
5360 		if (!err)
5361 			err = mark_chain_precision(env, regno);
5362 	} else if (arg_type_is_alloc_size(arg_type)) {
5363 		if (!tnum_is_const(reg->var_off)) {
5364 			verbose(env, "R%d is not a known constant'\n",
5365 				regno);
5366 			return -EACCES;
5367 		}
5368 		meta->mem_size = reg->var_off.value;
5369 	} else if (arg_type_is_int_ptr(arg_type)) {
5370 		int size = int_ptr_type_to_size(arg_type);
5371 
5372 		err = check_helper_mem_access(env, regno, size, false, meta);
5373 		if (err)
5374 			return err;
5375 		err = check_ptr_alignment(env, reg, 0, size, true);
5376 	} else if (arg_type == ARG_PTR_TO_CONST_STR) {
5377 		struct bpf_map *map = reg->map_ptr;
5378 		int map_off;
5379 		u64 map_addr;
5380 		char *str_ptr;
5381 
5382 		if (!bpf_map_is_rdonly(map)) {
5383 			verbose(env, "R%d does not point to a readonly map'\n", regno);
5384 			return -EACCES;
5385 		}
5386 
5387 		if (!tnum_is_const(reg->var_off)) {
5388 			verbose(env, "R%d is not a constant address'\n", regno);
5389 			return -EACCES;
5390 		}
5391 
5392 		if (!map->ops->map_direct_value_addr) {
5393 			verbose(env, "no direct value access support for this map type\n");
5394 			return -EACCES;
5395 		}
5396 
5397 		err = check_map_access(env, regno, reg->off,
5398 				       map->value_size - reg->off, false);
5399 		if (err)
5400 			return err;
5401 
5402 		map_off = reg->off + reg->var_off.value;
5403 		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5404 		if (err) {
5405 			verbose(env, "direct value access on string failed\n");
5406 			return err;
5407 		}
5408 
5409 		str_ptr = (char *)(long)(map_addr);
5410 		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5411 			verbose(env, "string is not zero-terminated\n");
5412 			return -EINVAL;
5413 		}
5414 	}
5415 
5416 	return err;
5417 }
5418 
5419 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5420 {
5421 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
5422 	enum bpf_prog_type type = resolve_prog_type(env->prog);
5423 
5424 	if (func_id != BPF_FUNC_map_update_elem)
5425 		return false;
5426 
5427 	/* It's not possible to get access to a locked struct sock in these
5428 	 * contexts, so updating is safe.
5429 	 */
5430 	switch (type) {
5431 	case BPF_PROG_TYPE_TRACING:
5432 		if (eatype == BPF_TRACE_ITER)
5433 			return true;
5434 		break;
5435 	case BPF_PROG_TYPE_SOCKET_FILTER:
5436 	case BPF_PROG_TYPE_SCHED_CLS:
5437 	case BPF_PROG_TYPE_SCHED_ACT:
5438 	case BPF_PROG_TYPE_XDP:
5439 	case BPF_PROG_TYPE_SK_REUSEPORT:
5440 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5441 	case BPF_PROG_TYPE_SK_LOOKUP:
5442 		return true;
5443 	default:
5444 		break;
5445 	}
5446 
5447 	verbose(env, "cannot update sockmap in this context\n");
5448 	return false;
5449 }
5450 
5451 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5452 {
5453 	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5454 }
5455 
5456 static int check_map_func_compatibility(struct bpf_verifier_env *env,
5457 					struct bpf_map *map, int func_id)
5458 {
5459 	if (!map)
5460 		return 0;
5461 
5462 	/* We need a two way check, first is from map perspective ... */
5463 	switch (map->map_type) {
5464 	case BPF_MAP_TYPE_PROG_ARRAY:
5465 		if (func_id != BPF_FUNC_tail_call)
5466 			goto error;
5467 		break;
5468 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5469 		if (func_id != BPF_FUNC_perf_event_read &&
5470 		    func_id != BPF_FUNC_perf_event_output &&
5471 		    func_id != BPF_FUNC_skb_output &&
5472 		    func_id != BPF_FUNC_perf_event_read_value &&
5473 		    func_id != BPF_FUNC_xdp_output)
5474 			goto error;
5475 		break;
5476 	case BPF_MAP_TYPE_RINGBUF:
5477 		if (func_id != BPF_FUNC_ringbuf_output &&
5478 		    func_id != BPF_FUNC_ringbuf_reserve &&
5479 		    func_id != BPF_FUNC_ringbuf_query)
5480 			goto error;
5481 		break;
5482 	case BPF_MAP_TYPE_STACK_TRACE:
5483 		if (func_id != BPF_FUNC_get_stackid)
5484 			goto error;
5485 		break;
5486 	case BPF_MAP_TYPE_CGROUP_ARRAY:
5487 		if (func_id != BPF_FUNC_skb_under_cgroup &&
5488 		    func_id != BPF_FUNC_current_task_under_cgroup)
5489 			goto error;
5490 		break;
5491 	case BPF_MAP_TYPE_CGROUP_STORAGE:
5492 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
5493 		if (func_id != BPF_FUNC_get_local_storage)
5494 			goto error;
5495 		break;
5496 	case BPF_MAP_TYPE_DEVMAP:
5497 	case BPF_MAP_TYPE_DEVMAP_HASH:
5498 		if (func_id != BPF_FUNC_redirect_map &&
5499 		    func_id != BPF_FUNC_map_lookup_elem)
5500 			goto error;
5501 		break;
5502 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
5503 	 * appear.
5504 	 */
5505 	case BPF_MAP_TYPE_CPUMAP:
5506 		if (func_id != BPF_FUNC_redirect_map)
5507 			goto error;
5508 		break;
5509 	case BPF_MAP_TYPE_XSKMAP:
5510 		if (func_id != BPF_FUNC_redirect_map &&
5511 		    func_id != BPF_FUNC_map_lookup_elem)
5512 			goto error;
5513 		break;
5514 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5515 	case BPF_MAP_TYPE_HASH_OF_MAPS:
5516 		if (func_id != BPF_FUNC_map_lookup_elem)
5517 			goto error;
5518 		break;
5519 	case BPF_MAP_TYPE_SOCKMAP:
5520 		if (func_id != BPF_FUNC_sk_redirect_map &&
5521 		    func_id != BPF_FUNC_sock_map_update &&
5522 		    func_id != BPF_FUNC_map_delete_elem &&
5523 		    func_id != BPF_FUNC_msg_redirect_map &&
5524 		    func_id != BPF_FUNC_sk_select_reuseport &&
5525 		    func_id != BPF_FUNC_map_lookup_elem &&
5526 		    !may_update_sockmap(env, func_id))
5527 			goto error;
5528 		break;
5529 	case BPF_MAP_TYPE_SOCKHASH:
5530 		if (func_id != BPF_FUNC_sk_redirect_hash &&
5531 		    func_id != BPF_FUNC_sock_hash_update &&
5532 		    func_id != BPF_FUNC_map_delete_elem &&
5533 		    func_id != BPF_FUNC_msg_redirect_hash &&
5534 		    func_id != BPF_FUNC_sk_select_reuseport &&
5535 		    func_id != BPF_FUNC_map_lookup_elem &&
5536 		    !may_update_sockmap(env, func_id))
5537 			goto error;
5538 		break;
5539 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
5540 		if (func_id != BPF_FUNC_sk_select_reuseport)
5541 			goto error;
5542 		break;
5543 	case BPF_MAP_TYPE_QUEUE:
5544 	case BPF_MAP_TYPE_STACK:
5545 		if (func_id != BPF_FUNC_map_peek_elem &&
5546 		    func_id != BPF_FUNC_map_pop_elem &&
5547 		    func_id != BPF_FUNC_map_push_elem)
5548 			goto error;
5549 		break;
5550 	case BPF_MAP_TYPE_SK_STORAGE:
5551 		if (func_id != BPF_FUNC_sk_storage_get &&
5552 		    func_id != BPF_FUNC_sk_storage_delete)
5553 			goto error;
5554 		break;
5555 	case BPF_MAP_TYPE_INODE_STORAGE:
5556 		if (func_id != BPF_FUNC_inode_storage_get &&
5557 		    func_id != BPF_FUNC_inode_storage_delete)
5558 			goto error;
5559 		break;
5560 	case BPF_MAP_TYPE_TASK_STORAGE:
5561 		if (func_id != BPF_FUNC_task_storage_get &&
5562 		    func_id != BPF_FUNC_task_storage_delete)
5563 			goto error;
5564 		break;
5565 	case BPF_MAP_TYPE_BLOOM_FILTER:
5566 		if (func_id != BPF_FUNC_map_peek_elem &&
5567 		    func_id != BPF_FUNC_map_push_elem)
5568 			goto error;
5569 		break;
5570 	default:
5571 		break;
5572 	}
5573 
5574 	/* ... and second from the function itself. */
5575 	switch (func_id) {
5576 	case BPF_FUNC_tail_call:
5577 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
5578 			goto error;
5579 		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
5580 			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
5581 			return -EINVAL;
5582 		}
5583 		break;
5584 	case BPF_FUNC_perf_event_read:
5585 	case BPF_FUNC_perf_event_output:
5586 	case BPF_FUNC_perf_event_read_value:
5587 	case BPF_FUNC_skb_output:
5588 	case BPF_FUNC_xdp_output:
5589 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
5590 			goto error;
5591 		break;
5592 	case BPF_FUNC_ringbuf_output:
5593 	case BPF_FUNC_ringbuf_reserve:
5594 	case BPF_FUNC_ringbuf_query:
5595 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
5596 			goto error;
5597 		break;
5598 	case BPF_FUNC_get_stackid:
5599 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
5600 			goto error;
5601 		break;
5602 	case BPF_FUNC_current_task_under_cgroup:
5603 	case BPF_FUNC_skb_under_cgroup:
5604 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
5605 			goto error;
5606 		break;
5607 	case BPF_FUNC_redirect_map:
5608 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
5609 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
5610 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
5611 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
5612 			goto error;
5613 		break;
5614 	case BPF_FUNC_sk_redirect_map:
5615 	case BPF_FUNC_msg_redirect_map:
5616 	case BPF_FUNC_sock_map_update:
5617 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
5618 			goto error;
5619 		break;
5620 	case BPF_FUNC_sk_redirect_hash:
5621 	case BPF_FUNC_msg_redirect_hash:
5622 	case BPF_FUNC_sock_hash_update:
5623 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
5624 			goto error;
5625 		break;
5626 	case BPF_FUNC_get_local_storage:
5627 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
5628 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
5629 			goto error;
5630 		break;
5631 	case BPF_FUNC_sk_select_reuseport:
5632 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
5633 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
5634 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
5635 			goto error;
5636 		break;
5637 	case BPF_FUNC_map_pop_elem:
5638 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5639 		    map->map_type != BPF_MAP_TYPE_STACK)
5640 			goto error;
5641 		break;
5642 	case BPF_FUNC_map_peek_elem:
5643 	case BPF_FUNC_map_push_elem:
5644 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5645 		    map->map_type != BPF_MAP_TYPE_STACK &&
5646 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
5647 			goto error;
5648 		break;
5649 	case BPF_FUNC_sk_storage_get:
5650 	case BPF_FUNC_sk_storage_delete:
5651 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
5652 			goto error;
5653 		break;
5654 	case BPF_FUNC_inode_storage_get:
5655 	case BPF_FUNC_inode_storage_delete:
5656 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
5657 			goto error;
5658 		break;
5659 	case BPF_FUNC_task_storage_get:
5660 	case BPF_FUNC_task_storage_delete:
5661 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
5662 			goto error;
5663 		break;
5664 	default:
5665 		break;
5666 	}
5667 
5668 	return 0;
5669 error:
5670 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
5671 		map->map_type, func_id_name(func_id), func_id);
5672 	return -EINVAL;
5673 }
5674 
5675 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
5676 {
5677 	int count = 0;
5678 
5679 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
5680 		count++;
5681 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
5682 		count++;
5683 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
5684 		count++;
5685 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
5686 		count++;
5687 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
5688 		count++;
5689 
5690 	/* We only support one arg being in raw mode at the moment,
5691 	 * which is sufficient for the helper functions we have
5692 	 * right now.
5693 	 */
5694 	return count <= 1;
5695 }
5696 
5697 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
5698 				    enum bpf_arg_type arg_next)
5699 {
5700 	return (arg_type_is_mem_ptr(arg_curr) &&
5701 	        !arg_type_is_mem_size(arg_next)) ||
5702 	       (!arg_type_is_mem_ptr(arg_curr) &&
5703 		arg_type_is_mem_size(arg_next));
5704 }
5705 
5706 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
5707 {
5708 	/* bpf_xxx(..., buf, len) call will access 'len'
5709 	 * bytes from memory 'buf'. Both arg types need
5710 	 * to be paired, so make sure there's no buggy
5711 	 * helper function specification.
5712 	 */
5713 	if (arg_type_is_mem_size(fn->arg1_type) ||
5714 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
5715 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
5716 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
5717 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
5718 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
5719 		return false;
5720 
5721 	return true;
5722 }
5723 
5724 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
5725 {
5726 	int count = 0;
5727 
5728 	if (arg_type_may_be_refcounted(fn->arg1_type))
5729 		count++;
5730 	if (arg_type_may_be_refcounted(fn->arg2_type))
5731 		count++;
5732 	if (arg_type_may_be_refcounted(fn->arg3_type))
5733 		count++;
5734 	if (arg_type_may_be_refcounted(fn->arg4_type))
5735 		count++;
5736 	if (arg_type_may_be_refcounted(fn->arg5_type))
5737 		count++;
5738 
5739 	/* A reference acquiring function cannot acquire
5740 	 * another refcounted ptr.
5741 	 */
5742 	if (may_be_acquire_function(func_id) && count)
5743 		return false;
5744 
5745 	/* We only support one arg being unreferenced at the moment,
5746 	 * which is sufficient for the helper functions we have right now.
5747 	 */
5748 	return count <= 1;
5749 }
5750 
5751 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
5752 {
5753 	int i;
5754 
5755 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
5756 		if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
5757 			return false;
5758 
5759 		if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
5760 			return false;
5761 	}
5762 
5763 	return true;
5764 }
5765 
5766 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
5767 {
5768 	return check_raw_mode_ok(fn) &&
5769 	       check_arg_pair_ok(fn) &&
5770 	       check_btf_id_ok(fn) &&
5771 	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
5772 }
5773 
5774 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
5775  * are now invalid, so turn them into unknown SCALAR_VALUE.
5776  */
5777 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
5778 				     struct bpf_func_state *state)
5779 {
5780 	struct bpf_reg_state *regs = state->regs, *reg;
5781 	int i;
5782 
5783 	for (i = 0; i < MAX_BPF_REG; i++)
5784 		if (reg_is_pkt_pointer_any(&regs[i]))
5785 			mark_reg_unknown(env, regs, i);
5786 
5787 	bpf_for_each_spilled_reg(i, state, reg) {
5788 		if (!reg)
5789 			continue;
5790 		if (reg_is_pkt_pointer_any(reg))
5791 			__mark_reg_unknown(env, reg);
5792 	}
5793 }
5794 
5795 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5796 {
5797 	struct bpf_verifier_state *vstate = env->cur_state;
5798 	int i;
5799 
5800 	for (i = 0; i <= vstate->curframe; i++)
5801 		__clear_all_pkt_pointers(env, vstate->frame[i]);
5802 }
5803 
5804 enum {
5805 	AT_PKT_END = -1,
5806 	BEYOND_PKT_END = -2,
5807 };
5808 
5809 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5810 {
5811 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5812 	struct bpf_reg_state *reg = &state->regs[regn];
5813 
5814 	if (reg->type != PTR_TO_PACKET)
5815 		/* PTR_TO_PACKET_META is not supported yet */
5816 		return;
5817 
5818 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5819 	 * How far beyond pkt_end it goes is unknown.
5820 	 * if (!range_open) it's the case of pkt >= pkt_end
5821 	 * if (range_open) it's the case of pkt > pkt_end
5822 	 * hence this pointer is at least 1 byte bigger than pkt_end
5823 	 */
5824 	if (range_open)
5825 		reg->range = BEYOND_PKT_END;
5826 	else
5827 		reg->range = AT_PKT_END;
5828 }
5829 
5830 static void release_reg_references(struct bpf_verifier_env *env,
5831 				   struct bpf_func_state *state,
5832 				   int ref_obj_id)
5833 {
5834 	struct bpf_reg_state *regs = state->regs, *reg;
5835 	int i;
5836 
5837 	for (i = 0; i < MAX_BPF_REG; i++)
5838 		if (regs[i].ref_obj_id == ref_obj_id)
5839 			mark_reg_unknown(env, regs, i);
5840 
5841 	bpf_for_each_spilled_reg(i, state, reg) {
5842 		if (!reg)
5843 			continue;
5844 		if (reg->ref_obj_id == ref_obj_id)
5845 			__mark_reg_unknown(env, reg);
5846 	}
5847 }
5848 
5849 /* The pointer with the specified id has released its reference to kernel
5850  * resources. Identify all copies of the same pointer and clear the reference.
5851  */
5852 static int release_reference(struct bpf_verifier_env *env,
5853 			     int ref_obj_id)
5854 {
5855 	struct bpf_verifier_state *vstate = env->cur_state;
5856 	int err;
5857 	int i;
5858 
5859 	err = release_reference_state(cur_func(env), ref_obj_id);
5860 	if (err)
5861 		return err;
5862 
5863 	for (i = 0; i <= vstate->curframe; i++)
5864 		release_reg_references(env, vstate->frame[i], ref_obj_id);
5865 
5866 	return 0;
5867 }
5868 
5869 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5870 				    struct bpf_reg_state *regs)
5871 {
5872 	int i;
5873 
5874 	/* after the call registers r0 - r5 were scratched */
5875 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
5876 		mark_reg_not_init(env, regs, caller_saved[i]);
5877 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5878 	}
5879 }
5880 
5881 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
5882 				   struct bpf_func_state *caller,
5883 				   struct bpf_func_state *callee,
5884 				   int insn_idx);
5885 
5886 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5887 			     int *insn_idx, int subprog,
5888 			     set_callee_state_fn set_callee_state_cb)
5889 {
5890 	struct bpf_verifier_state *state = env->cur_state;
5891 	struct bpf_func_info_aux *func_info_aux;
5892 	struct bpf_func_state *caller, *callee;
5893 	int err;
5894 	bool is_global = false;
5895 
5896 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5897 		verbose(env, "the call stack of %d frames is too deep\n",
5898 			state->curframe + 2);
5899 		return -E2BIG;
5900 	}
5901 
5902 	caller = state->frame[state->curframe];
5903 	if (state->frame[state->curframe + 1]) {
5904 		verbose(env, "verifier bug. Frame %d already allocated\n",
5905 			state->curframe + 1);
5906 		return -EFAULT;
5907 	}
5908 
5909 	func_info_aux = env->prog->aux->func_info_aux;
5910 	if (func_info_aux)
5911 		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5912 	err = btf_check_subprog_arg_match(env, subprog, caller->regs);
5913 	if (err == -EFAULT)
5914 		return err;
5915 	if (is_global) {
5916 		if (err) {
5917 			verbose(env, "Caller passes invalid args into func#%d\n",
5918 				subprog);
5919 			return err;
5920 		} else {
5921 			if (env->log.level & BPF_LOG_LEVEL)
5922 				verbose(env,
5923 					"Func#%d is global and valid. Skipping.\n",
5924 					subprog);
5925 			clear_caller_saved_regs(env, caller->regs);
5926 
5927 			/* All global functions return a 64-bit SCALAR_VALUE */
5928 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
5929 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5930 
5931 			/* continue with next insn after call */
5932 			return 0;
5933 		}
5934 	}
5935 
5936 	if (insn->code == (BPF_JMP | BPF_CALL) &&
5937 	    insn->imm == BPF_FUNC_timer_set_callback) {
5938 		struct bpf_verifier_state *async_cb;
5939 
5940 		/* there is no real recursion here. timer callbacks are async */
5941 		env->subprog_info[subprog].is_async_cb = true;
5942 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
5943 					 *insn_idx, subprog);
5944 		if (!async_cb)
5945 			return -EFAULT;
5946 		callee = async_cb->frame[0];
5947 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
5948 
5949 		/* Convert bpf_timer_set_callback() args into timer callback args */
5950 		err = set_callee_state_cb(env, caller, callee, *insn_idx);
5951 		if (err)
5952 			return err;
5953 
5954 		clear_caller_saved_regs(env, caller->regs);
5955 		mark_reg_unknown(env, caller->regs, BPF_REG_0);
5956 		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5957 		/* continue with next insn after call */
5958 		return 0;
5959 	}
5960 
5961 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5962 	if (!callee)
5963 		return -ENOMEM;
5964 	state->frame[state->curframe + 1] = callee;
5965 
5966 	/* callee cannot access r0, r6 - r9 for reading and has to write
5967 	 * into its own stack before reading from it.
5968 	 * callee can read/write into caller's stack
5969 	 */
5970 	init_func_state(env, callee,
5971 			/* remember the callsite, it will be used by bpf_exit */
5972 			*insn_idx /* callsite */,
5973 			state->curframe + 1 /* frameno within this callchain */,
5974 			subprog /* subprog number within this prog */);
5975 
5976 	/* Transfer references to the callee */
5977 	err = copy_reference_state(callee, caller);
5978 	if (err)
5979 		return err;
5980 
5981 	err = set_callee_state_cb(env, caller, callee, *insn_idx);
5982 	if (err)
5983 		return err;
5984 
5985 	clear_caller_saved_regs(env, caller->regs);
5986 
5987 	/* only increment it after check_reg_arg() finished */
5988 	state->curframe++;
5989 
5990 	/* and go analyze first insn of the callee */
5991 	*insn_idx = env->subprog_info[subprog].start - 1;
5992 
5993 	if (env->log.level & BPF_LOG_LEVEL) {
5994 		verbose(env, "caller:\n");
5995 		print_verifier_state(env, caller);
5996 		verbose(env, "callee:\n");
5997 		print_verifier_state(env, callee);
5998 	}
5999 	return 0;
6000 }
6001 
6002 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6003 				   struct bpf_func_state *caller,
6004 				   struct bpf_func_state *callee)
6005 {
6006 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6007 	 *      void *callback_ctx, u64 flags);
6008 	 * callback_fn(struct bpf_map *map, void *key, void *value,
6009 	 *      void *callback_ctx);
6010 	 */
6011 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6012 
6013 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6014 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6015 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6016 
6017 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6018 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6019 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6020 
6021 	/* pointer to stack or null */
6022 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6023 
6024 	/* unused */
6025 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6026 	return 0;
6027 }
6028 
6029 static int set_callee_state(struct bpf_verifier_env *env,
6030 			    struct bpf_func_state *caller,
6031 			    struct bpf_func_state *callee, int insn_idx)
6032 {
6033 	int i;
6034 
6035 	/* copy r1 - r5 args that callee can access.  The copy includes parent
6036 	 * pointers, which connects us up to the liveness chain
6037 	 */
6038 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6039 		callee->regs[i] = caller->regs[i];
6040 	return 0;
6041 }
6042 
6043 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6044 			   int *insn_idx)
6045 {
6046 	int subprog, target_insn;
6047 
6048 	target_insn = *insn_idx + insn->imm + 1;
6049 	subprog = find_subprog(env, target_insn);
6050 	if (subprog < 0) {
6051 		verbose(env, "verifier bug. No program starts at insn %d\n",
6052 			target_insn);
6053 		return -EFAULT;
6054 	}
6055 
6056 	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6057 }
6058 
6059 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6060 				       struct bpf_func_state *caller,
6061 				       struct bpf_func_state *callee,
6062 				       int insn_idx)
6063 {
6064 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6065 	struct bpf_map *map;
6066 	int err;
6067 
6068 	if (bpf_map_ptr_poisoned(insn_aux)) {
6069 		verbose(env, "tail_call abusing map_ptr\n");
6070 		return -EINVAL;
6071 	}
6072 
6073 	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6074 	if (!map->ops->map_set_for_each_callback_args ||
6075 	    !map->ops->map_for_each_callback) {
6076 		verbose(env, "callback function not allowed for map\n");
6077 		return -ENOTSUPP;
6078 	}
6079 
6080 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6081 	if (err)
6082 		return err;
6083 
6084 	callee->in_callback_fn = true;
6085 	return 0;
6086 }
6087 
6088 static int set_timer_callback_state(struct bpf_verifier_env *env,
6089 				    struct bpf_func_state *caller,
6090 				    struct bpf_func_state *callee,
6091 				    int insn_idx)
6092 {
6093 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6094 
6095 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6096 	 * callback_fn(struct bpf_map *map, void *key, void *value);
6097 	 */
6098 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6099 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6100 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
6101 
6102 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6103 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6104 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
6105 
6106 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6107 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6108 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
6109 
6110 	/* unused */
6111 	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6112 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6113 	callee->in_async_callback_fn = true;
6114 	return 0;
6115 }
6116 
6117 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6118 {
6119 	struct bpf_verifier_state *state = env->cur_state;
6120 	struct bpf_func_state *caller, *callee;
6121 	struct bpf_reg_state *r0;
6122 	int err;
6123 
6124 	callee = state->frame[state->curframe];
6125 	r0 = &callee->regs[BPF_REG_0];
6126 	if (r0->type == PTR_TO_STACK) {
6127 		/* technically it's ok to return caller's stack pointer
6128 		 * (or caller's caller's pointer) back to the caller,
6129 		 * since these pointers are valid. Only current stack
6130 		 * pointer will be invalid as soon as function exits,
6131 		 * but let's be conservative
6132 		 */
6133 		verbose(env, "cannot return stack pointer to the caller\n");
6134 		return -EINVAL;
6135 	}
6136 
6137 	state->curframe--;
6138 	caller = state->frame[state->curframe];
6139 	if (callee->in_callback_fn) {
6140 		/* enforce R0 return value range [0, 1]. */
6141 		struct tnum range = tnum_range(0, 1);
6142 
6143 		if (r0->type != SCALAR_VALUE) {
6144 			verbose(env, "R0 not a scalar value\n");
6145 			return -EACCES;
6146 		}
6147 		if (!tnum_in(range, r0->var_off)) {
6148 			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6149 			return -EINVAL;
6150 		}
6151 	} else {
6152 		/* return to the caller whatever r0 had in the callee */
6153 		caller->regs[BPF_REG_0] = *r0;
6154 	}
6155 
6156 	/* Transfer references to the caller */
6157 	err = copy_reference_state(caller, callee);
6158 	if (err)
6159 		return err;
6160 
6161 	*insn_idx = callee->callsite + 1;
6162 	if (env->log.level & BPF_LOG_LEVEL) {
6163 		verbose(env, "returning from callee:\n");
6164 		print_verifier_state(env, callee);
6165 		verbose(env, "to caller at %d:\n", *insn_idx);
6166 		print_verifier_state(env, caller);
6167 	}
6168 	/* clear everything in the callee */
6169 	free_func_state(callee);
6170 	state->frame[state->curframe + 1] = NULL;
6171 	return 0;
6172 }
6173 
6174 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6175 				   int func_id,
6176 				   struct bpf_call_arg_meta *meta)
6177 {
6178 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6179 
6180 	if (ret_type != RET_INTEGER ||
6181 	    (func_id != BPF_FUNC_get_stack &&
6182 	     func_id != BPF_FUNC_get_task_stack &&
6183 	     func_id != BPF_FUNC_probe_read_str &&
6184 	     func_id != BPF_FUNC_probe_read_kernel_str &&
6185 	     func_id != BPF_FUNC_probe_read_user_str))
6186 		return;
6187 
6188 	ret_reg->smax_value = meta->msize_max_value;
6189 	ret_reg->s32_max_value = meta->msize_max_value;
6190 	ret_reg->smin_value = -MAX_ERRNO;
6191 	ret_reg->s32_min_value = -MAX_ERRNO;
6192 	__reg_deduce_bounds(ret_reg);
6193 	__reg_bound_offset(ret_reg);
6194 	__update_reg_bounds(ret_reg);
6195 }
6196 
6197 static int
6198 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6199 		int func_id, int insn_idx)
6200 {
6201 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6202 	struct bpf_map *map = meta->map_ptr;
6203 
6204 	if (func_id != BPF_FUNC_tail_call &&
6205 	    func_id != BPF_FUNC_map_lookup_elem &&
6206 	    func_id != BPF_FUNC_map_update_elem &&
6207 	    func_id != BPF_FUNC_map_delete_elem &&
6208 	    func_id != BPF_FUNC_map_push_elem &&
6209 	    func_id != BPF_FUNC_map_pop_elem &&
6210 	    func_id != BPF_FUNC_map_peek_elem &&
6211 	    func_id != BPF_FUNC_for_each_map_elem &&
6212 	    func_id != BPF_FUNC_redirect_map)
6213 		return 0;
6214 
6215 	if (map == NULL) {
6216 		verbose(env, "kernel subsystem misconfigured verifier\n");
6217 		return -EINVAL;
6218 	}
6219 
6220 	/* In case of read-only, some additional restrictions
6221 	 * need to be applied in order to prevent altering the
6222 	 * state of the map from program side.
6223 	 */
6224 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6225 	    (func_id == BPF_FUNC_map_delete_elem ||
6226 	     func_id == BPF_FUNC_map_update_elem ||
6227 	     func_id == BPF_FUNC_map_push_elem ||
6228 	     func_id == BPF_FUNC_map_pop_elem)) {
6229 		verbose(env, "write into map forbidden\n");
6230 		return -EACCES;
6231 	}
6232 
6233 	if (!BPF_MAP_PTR(aux->map_ptr_state))
6234 		bpf_map_ptr_store(aux, meta->map_ptr,
6235 				  !meta->map_ptr->bypass_spec_v1);
6236 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
6237 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
6238 				  !meta->map_ptr->bypass_spec_v1);
6239 	return 0;
6240 }
6241 
6242 static int
6243 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6244 		int func_id, int insn_idx)
6245 {
6246 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6247 	struct bpf_reg_state *regs = cur_regs(env), *reg;
6248 	struct bpf_map *map = meta->map_ptr;
6249 	struct tnum range;
6250 	u64 val;
6251 	int err;
6252 
6253 	if (func_id != BPF_FUNC_tail_call)
6254 		return 0;
6255 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6256 		verbose(env, "kernel subsystem misconfigured verifier\n");
6257 		return -EINVAL;
6258 	}
6259 
6260 	range = tnum_range(0, map->max_entries - 1);
6261 	reg = &regs[BPF_REG_3];
6262 
6263 	if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6264 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6265 		return 0;
6266 	}
6267 
6268 	err = mark_chain_precision(env, BPF_REG_3);
6269 	if (err)
6270 		return err;
6271 
6272 	val = reg->var_off.value;
6273 	if (bpf_map_key_unseen(aux))
6274 		bpf_map_key_store(aux, val);
6275 	else if (!bpf_map_key_poisoned(aux) &&
6276 		  bpf_map_key_immediate(aux) != val)
6277 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6278 	return 0;
6279 }
6280 
6281 static int check_reference_leak(struct bpf_verifier_env *env)
6282 {
6283 	struct bpf_func_state *state = cur_func(env);
6284 	int i;
6285 
6286 	for (i = 0; i < state->acquired_refs; i++) {
6287 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6288 			state->refs[i].id, state->refs[i].insn_idx);
6289 	}
6290 	return state->acquired_refs ? -EINVAL : 0;
6291 }
6292 
6293 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6294 				   struct bpf_reg_state *regs)
6295 {
6296 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6297 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6298 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
6299 	int err, fmt_map_off, num_args;
6300 	u64 fmt_addr;
6301 	char *fmt;
6302 
6303 	/* data must be an array of u64 */
6304 	if (data_len_reg->var_off.value % 8)
6305 		return -EINVAL;
6306 	num_args = data_len_reg->var_off.value / 8;
6307 
6308 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6309 	 * and map_direct_value_addr is set.
6310 	 */
6311 	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6312 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6313 						  fmt_map_off);
6314 	if (err) {
6315 		verbose(env, "verifier bug\n");
6316 		return -EFAULT;
6317 	}
6318 	fmt = (char *)(long)fmt_addr + fmt_map_off;
6319 
6320 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6321 	 * can focus on validating the format specifiers.
6322 	 */
6323 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
6324 	if (err < 0)
6325 		verbose(env, "Invalid format string\n");
6326 
6327 	return err;
6328 }
6329 
6330 static int check_get_func_ip(struct bpf_verifier_env *env)
6331 {
6332 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
6333 	enum bpf_prog_type type = resolve_prog_type(env->prog);
6334 	int func_id = BPF_FUNC_get_func_ip;
6335 
6336 	if (type == BPF_PROG_TYPE_TRACING) {
6337 		if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
6338 		    eatype != BPF_MODIFY_RETURN) {
6339 			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6340 				func_id_name(func_id), func_id);
6341 			return -ENOTSUPP;
6342 		}
6343 		return 0;
6344 	} else if (type == BPF_PROG_TYPE_KPROBE) {
6345 		return 0;
6346 	}
6347 
6348 	verbose(env, "func %s#%d not supported for program type %d\n",
6349 		func_id_name(func_id), func_id, type);
6350 	return -ENOTSUPP;
6351 }
6352 
6353 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6354 			     int *insn_idx_p)
6355 {
6356 	const struct bpf_func_proto *fn = NULL;
6357 	struct bpf_reg_state *regs;
6358 	struct bpf_call_arg_meta meta;
6359 	int insn_idx = *insn_idx_p;
6360 	bool changes_data;
6361 	int i, err, func_id;
6362 
6363 	/* find function prototype */
6364 	func_id = insn->imm;
6365 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
6366 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6367 			func_id);
6368 		return -EINVAL;
6369 	}
6370 
6371 	if (env->ops->get_func_proto)
6372 		fn = env->ops->get_func_proto(func_id, env->prog);
6373 	if (!fn) {
6374 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6375 			func_id);
6376 		return -EINVAL;
6377 	}
6378 
6379 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
6380 	if (!env->prog->gpl_compatible && fn->gpl_only) {
6381 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
6382 		return -EINVAL;
6383 	}
6384 
6385 	if (fn->allowed && !fn->allowed(env->prog)) {
6386 		verbose(env, "helper call is not allowed in probe\n");
6387 		return -EINVAL;
6388 	}
6389 
6390 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
6391 	changes_data = bpf_helper_changes_pkt_data(fn->func);
6392 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6393 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6394 			func_id_name(func_id), func_id);
6395 		return -EINVAL;
6396 	}
6397 
6398 	memset(&meta, 0, sizeof(meta));
6399 	meta.pkt_access = fn->pkt_access;
6400 
6401 	err = check_func_proto(fn, func_id);
6402 	if (err) {
6403 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
6404 			func_id_name(func_id), func_id);
6405 		return err;
6406 	}
6407 
6408 	meta.func_id = func_id;
6409 	/* check args */
6410 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
6411 		err = check_func_arg(env, i, &meta, fn);
6412 		if (err)
6413 			return err;
6414 	}
6415 
6416 	err = record_func_map(env, &meta, func_id, insn_idx);
6417 	if (err)
6418 		return err;
6419 
6420 	err = record_func_key(env, &meta, func_id, insn_idx);
6421 	if (err)
6422 		return err;
6423 
6424 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
6425 	 * is inferred from register state.
6426 	 */
6427 	for (i = 0; i < meta.access_size; i++) {
6428 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6429 				       BPF_WRITE, -1, false);
6430 		if (err)
6431 			return err;
6432 	}
6433 
6434 	if (func_id == BPF_FUNC_tail_call) {
6435 		err = check_reference_leak(env);
6436 		if (err) {
6437 			verbose(env, "tail_call would lead to reference leak\n");
6438 			return err;
6439 		}
6440 	} else if (is_release_function(func_id)) {
6441 		err = release_reference(env, meta.ref_obj_id);
6442 		if (err) {
6443 			verbose(env, "func %s#%d reference has not been acquired before\n",
6444 				func_id_name(func_id), func_id);
6445 			return err;
6446 		}
6447 	}
6448 
6449 	regs = cur_regs(env);
6450 
6451 	/* check that flags argument in get_local_storage(map, flags) is 0,
6452 	 * this is required because get_local_storage() can't return an error.
6453 	 */
6454 	if (func_id == BPF_FUNC_get_local_storage &&
6455 	    !register_is_null(&regs[BPF_REG_2])) {
6456 		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6457 		return -EINVAL;
6458 	}
6459 
6460 	if (func_id == BPF_FUNC_for_each_map_elem) {
6461 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6462 					set_map_elem_callback_state);
6463 		if (err < 0)
6464 			return -EINVAL;
6465 	}
6466 
6467 	if (func_id == BPF_FUNC_timer_set_callback) {
6468 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6469 					set_timer_callback_state);
6470 		if (err < 0)
6471 			return -EINVAL;
6472 	}
6473 
6474 	if (func_id == BPF_FUNC_snprintf) {
6475 		err = check_bpf_snprintf_call(env, regs);
6476 		if (err < 0)
6477 			return err;
6478 	}
6479 
6480 	/* reset caller saved regs */
6481 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
6482 		mark_reg_not_init(env, regs, caller_saved[i]);
6483 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6484 	}
6485 
6486 	/* helper call returns 64-bit value. */
6487 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6488 
6489 	/* update return register (already marked as written above) */
6490 	if (fn->ret_type == RET_INTEGER) {
6491 		/* sets type to SCALAR_VALUE */
6492 		mark_reg_unknown(env, regs, BPF_REG_0);
6493 	} else if (fn->ret_type == RET_VOID) {
6494 		regs[BPF_REG_0].type = NOT_INIT;
6495 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
6496 		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6497 		/* There is no offset yet applied, variable or fixed */
6498 		mark_reg_known_zero(env, regs, BPF_REG_0);
6499 		/* remember map_ptr, so that check_map_access()
6500 		 * can check 'value_size' boundary of memory access
6501 		 * to map element returned from bpf_map_lookup_elem()
6502 		 */
6503 		if (meta.map_ptr == NULL) {
6504 			verbose(env,
6505 				"kernel subsystem misconfigured verifier\n");
6506 			return -EINVAL;
6507 		}
6508 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
6509 		regs[BPF_REG_0].map_uid = meta.map_uid;
6510 		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6511 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
6512 			if (map_value_has_spin_lock(meta.map_ptr))
6513 				regs[BPF_REG_0].id = ++env->id_gen;
6514 		} else {
6515 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
6516 		}
6517 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
6518 		mark_reg_known_zero(env, regs, BPF_REG_0);
6519 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
6520 	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
6521 		mark_reg_known_zero(env, regs, BPF_REG_0);
6522 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
6523 	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
6524 		mark_reg_known_zero(env, regs, BPF_REG_0);
6525 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
6526 	} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
6527 		mark_reg_known_zero(env, regs, BPF_REG_0);
6528 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
6529 		regs[BPF_REG_0].mem_size = meta.mem_size;
6530 	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
6531 		   fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
6532 		const struct btf_type *t;
6533 
6534 		mark_reg_known_zero(env, regs, BPF_REG_0);
6535 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
6536 		if (!btf_type_is_struct(t)) {
6537 			u32 tsize;
6538 			const struct btf_type *ret;
6539 			const char *tname;
6540 
6541 			/* resolve the type size of ksym. */
6542 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
6543 			if (IS_ERR(ret)) {
6544 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
6545 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
6546 					tname, PTR_ERR(ret));
6547 				return -EINVAL;
6548 			}
6549 			regs[BPF_REG_0].type =
6550 				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6551 				PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
6552 			regs[BPF_REG_0].mem_size = tsize;
6553 		} else {
6554 			regs[BPF_REG_0].type =
6555 				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6556 				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
6557 			regs[BPF_REG_0].btf = meta.ret_btf;
6558 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
6559 		}
6560 	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
6561 		   fn->ret_type == RET_PTR_TO_BTF_ID) {
6562 		int ret_btf_id;
6563 
6564 		mark_reg_known_zero(env, regs, BPF_REG_0);
6565 		regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
6566 						     PTR_TO_BTF_ID :
6567 						     PTR_TO_BTF_ID_OR_NULL;
6568 		ret_btf_id = *fn->ret_btf_id;
6569 		if (ret_btf_id == 0) {
6570 			verbose(env, "invalid return type %d of func %s#%d\n",
6571 				fn->ret_type, func_id_name(func_id), func_id);
6572 			return -EINVAL;
6573 		}
6574 		/* current BPF helper definitions are only coming from
6575 		 * built-in code with type IDs from  vmlinux BTF
6576 		 */
6577 		regs[BPF_REG_0].btf = btf_vmlinux;
6578 		regs[BPF_REG_0].btf_id = ret_btf_id;
6579 	} else {
6580 		verbose(env, "unknown return type %d of func %s#%d\n",
6581 			fn->ret_type, func_id_name(func_id), func_id);
6582 		return -EINVAL;
6583 	}
6584 
6585 	if (reg_type_may_be_null(regs[BPF_REG_0].type))
6586 		regs[BPF_REG_0].id = ++env->id_gen;
6587 
6588 	if (is_ptr_cast_function(func_id)) {
6589 		/* For release_reference() */
6590 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
6591 	} else if (is_acquire_function(func_id, meta.map_ptr)) {
6592 		int id = acquire_reference_state(env, insn_idx);
6593 
6594 		if (id < 0)
6595 			return id;
6596 		/* For mark_ptr_or_null_reg() */
6597 		regs[BPF_REG_0].id = id;
6598 		/* For release_reference() */
6599 		regs[BPF_REG_0].ref_obj_id = id;
6600 	}
6601 
6602 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
6603 
6604 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
6605 	if (err)
6606 		return err;
6607 
6608 	if ((func_id == BPF_FUNC_get_stack ||
6609 	     func_id == BPF_FUNC_get_task_stack) &&
6610 	    !env->prog->has_callchain_buf) {
6611 		const char *err_str;
6612 
6613 #ifdef CONFIG_PERF_EVENTS
6614 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
6615 		err_str = "cannot get callchain buffer for func %s#%d\n";
6616 #else
6617 		err = -ENOTSUPP;
6618 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
6619 #endif
6620 		if (err) {
6621 			verbose(env, err_str, func_id_name(func_id), func_id);
6622 			return err;
6623 		}
6624 
6625 		env->prog->has_callchain_buf = true;
6626 	}
6627 
6628 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
6629 		env->prog->call_get_stack = true;
6630 
6631 	if (func_id == BPF_FUNC_get_func_ip) {
6632 		if (check_get_func_ip(env))
6633 			return -ENOTSUPP;
6634 		env->prog->call_get_func_ip = true;
6635 	}
6636 
6637 	if (changes_data)
6638 		clear_all_pkt_pointers(env);
6639 	return 0;
6640 }
6641 
6642 /* mark_btf_func_reg_size() is used when the reg size is determined by
6643  * the BTF func_proto's return value size and argument.
6644  */
6645 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
6646 				   size_t reg_size)
6647 {
6648 	struct bpf_reg_state *reg = &cur_regs(env)[regno];
6649 
6650 	if (regno == BPF_REG_0) {
6651 		/* Function return value */
6652 		reg->live |= REG_LIVE_WRITTEN;
6653 		reg->subreg_def = reg_size == sizeof(u64) ?
6654 			DEF_NOT_SUBREG : env->insn_idx + 1;
6655 	} else {
6656 		/* Function argument */
6657 		if (reg_size == sizeof(u64)) {
6658 			mark_insn_zext(env, reg);
6659 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
6660 		} else {
6661 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
6662 		}
6663 	}
6664 }
6665 
6666 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
6667 {
6668 	const struct btf_type *t, *func, *func_proto, *ptr_type;
6669 	struct bpf_reg_state *regs = cur_regs(env);
6670 	const char *func_name, *ptr_type_name;
6671 	u32 i, nargs, func_id, ptr_type_id;
6672 	struct module *btf_mod = NULL;
6673 	const struct btf_param *args;
6674 	struct btf *desc_btf;
6675 	int err;
6676 
6677 	/* skip for now, but return error when we find this in fixup_kfunc_call */
6678 	if (!insn->imm)
6679 		return 0;
6680 
6681 	desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
6682 	if (IS_ERR(desc_btf))
6683 		return PTR_ERR(desc_btf);
6684 
6685 	func_id = insn->imm;
6686 	func = btf_type_by_id(desc_btf, func_id);
6687 	func_name = btf_name_by_offset(desc_btf, func->name_off);
6688 	func_proto = btf_type_by_id(desc_btf, func->type);
6689 
6690 	if (!env->ops->check_kfunc_call ||
6691 	    !env->ops->check_kfunc_call(func_id, btf_mod)) {
6692 		verbose(env, "calling kernel function %s is not allowed\n",
6693 			func_name);
6694 		return -EACCES;
6695 	}
6696 
6697 	/* Check the arguments */
6698 	err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
6699 	if (err)
6700 		return err;
6701 
6702 	for (i = 0; i < CALLER_SAVED_REGS; i++)
6703 		mark_reg_not_init(env, regs, caller_saved[i]);
6704 
6705 	/* Check return type */
6706 	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
6707 	if (btf_type_is_scalar(t)) {
6708 		mark_reg_unknown(env, regs, BPF_REG_0);
6709 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
6710 	} else if (btf_type_is_ptr(t)) {
6711 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
6712 						   &ptr_type_id);
6713 		if (!btf_type_is_struct(ptr_type)) {
6714 			ptr_type_name = btf_name_by_offset(desc_btf,
6715 							   ptr_type->name_off);
6716 			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
6717 				func_name, btf_type_str(ptr_type),
6718 				ptr_type_name);
6719 			return -EINVAL;
6720 		}
6721 		mark_reg_known_zero(env, regs, BPF_REG_0);
6722 		regs[BPF_REG_0].btf = desc_btf;
6723 		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
6724 		regs[BPF_REG_0].btf_id = ptr_type_id;
6725 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
6726 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
6727 
6728 	nargs = btf_type_vlen(func_proto);
6729 	args = (const struct btf_param *)(func_proto + 1);
6730 	for (i = 0; i < nargs; i++) {
6731 		u32 regno = i + 1;
6732 
6733 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
6734 		if (btf_type_is_ptr(t))
6735 			mark_btf_func_reg_size(env, regno, sizeof(void *));
6736 		else
6737 			/* scalar. ensured by btf_check_kfunc_arg_match() */
6738 			mark_btf_func_reg_size(env, regno, t->size);
6739 	}
6740 
6741 	return 0;
6742 }
6743 
6744 static bool signed_add_overflows(s64 a, s64 b)
6745 {
6746 	/* Do the add in u64, where overflow is well-defined */
6747 	s64 res = (s64)((u64)a + (u64)b);
6748 
6749 	if (b < 0)
6750 		return res > a;
6751 	return res < a;
6752 }
6753 
6754 static bool signed_add32_overflows(s32 a, s32 b)
6755 {
6756 	/* Do the add in u32, where overflow is well-defined */
6757 	s32 res = (s32)((u32)a + (u32)b);
6758 
6759 	if (b < 0)
6760 		return res > a;
6761 	return res < a;
6762 }
6763 
6764 static bool signed_sub_overflows(s64 a, s64 b)
6765 {
6766 	/* Do the sub in u64, where overflow is well-defined */
6767 	s64 res = (s64)((u64)a - (u64)b);
6768 
6769 	if (b < 0)
6770 		return res < a;
6771 	return res > a;
6772 }
6773 
6774 static bool signed_sub32_overflows(s32 a, s32 b)
6775 {
6776 	/* Do the sub in u32, where overflow is well-defined */
6777 	s32 res = (s32)((u32)a - (u32)b);
6778 
6779 	if (b < 0)
6780 		return res < a;
6781 	return res > a;
6782 }
6783 
6784 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
6785 				  const struct bpf_reg_state *reg,
6786 				  enum bpf_reg_type type)
6787 {
6788 	bool known = tnum_is_const(reg->var_off);
6789 	s64 val = reg->var_off.value;
6790 	s64 smin = reg->smin_value;
6791 
6792 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
6793 		verbose(env, "math between %s pointer and %lld is not allowed\n",
6794 			reg_type_str[type], val);
6795 		return false;
6796 	}
6797 
6798 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
6799 		verbose(env, "%s pointer offset %d is not allowed\n",
6800 			reg_type_str[type], reg->off);
6801 		return false;
6802 	}
6803 
6804 	if (smin == S64_MIN) {
6805 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
6806 			reg_type_str[type]);
6807 		return false;
6808 	}
6809 
6810 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
6811 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
6812 			smin, reg_type_str[type]);
6813 		return false;
6814 	}
6815 
6816 	return true;
6817 }
6818 
6819 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
6820 {
6821 	return &env->insn_aux_data[env->insn_idx];
6822 }
6823 
6824 enum {
6825 	REASON_BOUNDS	= -1,
6826 	REASON_TYPE	= -2,
6827 	REASON_PATHS	= -3,
6828 	REASON_LIMIT	= -4,
6829 	REASON_STACK	= -5,
6830 };
6831 
6832 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
6833 			      u32 *alu_limit, bool mask_to_left)
6834 {
6835 	u32 max = 0, ptr_limit = 0;
6836 
6837 	switch (ptr_reg->type) {
6838 	case PTR_TO_STACK:
6839 		/* Offset 0 is out-of-bounds, but acceptable start for the
6840 		 * left direction, see BPF_REG_FP. Also, unknown scalar
6841 		 * offset where we would need to deal with min/max bounds is
6842 		 * currently prohibited for unprivileged.
6843 		 */
6844 		max = MAX_BPF_STACK + mask_to_left;
6845 		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
6846 		break;
6847 	case PTR_TO_MAP_VALUE:
6848 		max = ptr_reg->map_ptr->value_size;
6849 		ptr_limit = (mask_to_left ?
6850 			     ptr_reg->smin_value :
6851 			     ptr_reg->umax_value) + ptr_reg->off;
6852 		break;
6853 	default:
6854 		return REASON_TYPE;
6855 	}
6856 
6857 	if (ptr_limit >= max)
6858 		return REASON_LIMIT;
6859 	*alu_limit = ptr_limit;
6860 	return 0;
6861 }
6862 
6863 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
6864 				    const struct bpf_insn *insn)
6865 {
6866 	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
6867 }
6868 
6869 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
6870 				       u32 alu_state, u32 alu_limit)
6871 {
6872 	/* If we arrived here from different branches with different
6873 	 * state or limits to sanitize, then this won't work.
6874 	 */
6875 	if (aux->alu_state &&
6876 	    (aux->alu_state != alu_state ||
6877 	     aux->alu_limit != alu_limit))
6878 		return REASON_PATHS;
6879 
6880 	/* Corresponding fixup done in do_misc_fixups(). */
6881 	aux->alu_state = alu_state;
6882 	aux->alu_limit = alu_limit;
6883 	return 0;
6884 }
6885 
6886 static int sanitize_val_alu(struct bpf_verifier_env *env,
6887 			    struct bpf_insn *insn)
6888 {
6889 	struct bpf_insn_aux_data *aux = cur_aux(env);
6890 
6891 	if (can_skip_alu_sanitation(env, insn))
6892 		return 0;
6893 
6894 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
6895 }
6896 
6897 static bool sanitize_needed(u8 opcode)
6898 {
6899 	return opcode == BPF_ADD || opcode == BPF_SUB;
6900 }
6901 
6902 struct bpf_sanitize_info {
6903 	struct bpf_insn_aux_data aux;
6904 	bool mask_to_left;
6905 };
6906 
6907 static struct bpf_verifier_state *
6908 sanitize_speculative_path(struct bpf_verifier_env *env,
6909 			  const struct bpf_insn *insn,
6910 			  u32 next_idx, u32 curr_idx)
6911 {
6912 	struct bpf_verifier_state *branch;
6913 	struct bpf_reg_state *regs;
6914 
6915 	branch = push_stack(env, next_idx, curr_idx, true);
6916 	if (branch && insn) {
6917 		regs = branch->frame[branch->curframe]->regs;
6918 		if (BPF_SRC(insn->code) == BPF_K) {
6919 			mark_reg_unknown(env, regs, insn->dst_reg);
6920 		} else if (BPF_SRC(insn->code) == BPF_X) {
6921 			mark_reg_unknown(env, regs, insn->dst_reg);
6922 			mark_reg_unknown(env, regs, insn->src_reg);
6923 		}
6924 	}
6925 	return branch;
6926 }
6927 
6928 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
6929 			    struct bpf_insn *insn,
6930 			    const struct bpf_reg_state *ptr_reg,
6931 			    const struct bpf_reg_state *off_reg,
6932 			    struct bpf_reg_state *dst_reg,
6933 			    struct bpf_sanitize_info *info,
6934 			    const bool commit_window)
6935 {
6936 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
6937 	struct bpf_verifier_state *vstate = env->cur_state;
6938 	bool off_is_imm = tnum_is_const(off_reg->var_off);
6939 	bool off_is_neg = off_reg->smin_value < 0;
6940 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
6941 	u8 opcode = BPF_OP(insn->code);
6942 	u32 alu_state, alu_limit;
6943 	struct bpf_reg_state tmp;
6944 	bool ret;
6945 	int err;
6946 
6947 	if (can_skip_alu_sanitation(env, insn))
6948 		return 0;
6949 
6950 	/* We already marked aux for masking from non-speculative
6951 	 * paths, thus we got here in the first place. We only care
6952 	 * to explore bad access from here.
6953 	 */
6954 	if (vstate->speculative)
6955 		goto do_sim;
6956 
6957 	if (!commit_window) {
6958 		if (!tnum_is_const(off_reg->var_off) &&
6959 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
6960 			return REASON_BOUNDS;
6961 
6962 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
6963 				     (opcode == BPF_SUB && !off_is_neg);
6964 	}
6965 
6966 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
6967 	if (err < 0)
6968 		return err;
6969 
6970 	if (commit_window) {
6971 		/* In commit phase we narrow the masking window based on
6972 		 * the observed pointer move after the simulated operation.
6973 		 */
6974 		alu_state = info->aux.alu_state;
6975 		alu_limit = abs(info->aux.alu_limit - alu_limit);
6976 	} else {
6977 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
6978 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
6979 		alu_state |= ptr_is_dst_reg ?
6980 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
6981 
6982 		/* Limit pruning on unknown scalars to enable deep search for
6983 		 * potential masking differences from other program paths.
6984 		 */
6985 		if (!off_is_imm)
6986 			env->explore_alu_limits = true;
6987 	}
6988 
6989 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
6990 	if (err < 0)
6991 		return err;
6992 do_sim:
6993 	/* If we're in commit phase, we're done here given we already
6994 	 * pushed the truncated dst_reg into the speculative verification
6995 	 * stack.
6996 	 *
6997 	 * Also, when register is a known constant, we rewrite register-based
6998 	 * operation to immediate-based, and thus do not need masking (and as
6999 	 * a consequence, do not need to simulate the zero-truncation either).
7000 	 */
7001 	if (commit_window || off_is_imm)
7002 		return 0;
7003 
7004 	/* Simulate and find potential out-of-bounds access under
7005 	 * speculative execution from truncation as a result of
7006 	 * masking when off was not within expected range. If off
7007 	 * sits in dst, then we temporarily need to move ptr there
7008 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
7009 	 * for cases where we use K-based arithmetic in one direction
7010 	 * and truncated reg-based in the other in order to explore
7011 	 * bad access.
7012 	 */
7013 	if (!ptr_is_dst_reg) {
7014 		tmp = *dst_reg;
7015 		*dst_reg = *ptr_reg;
7016 	}
7017 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7018 					env->insn_idx);
7019 	if (!ptr_is_dst_reg && ret)
7020 		*dst_reg = tmp;
7021 	return !ret ? REASON_STACK : 0;
7022 }
7023 
7024 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7025 {
7026 	struct bpf_verifier_state *vstate = env->cur_state;
7027 
7028 	/* If we simulate paths under speculation, we don't update the
7029 	 * insn as 'seen' such that when we verify unreachable paths in
7030 	 * the non-speculative domain, sanitize_dead_code() can still
7031 	 * rewrite/sanitize them.
7032 	 */
7033 	if (!vstate->speculative)
7034 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7035 }
7036 
7037 static int sanitize_err(struct bpf_verifier_env *env,
7038 			const struct bpf_insn *insn, int reason,
7039 			const struct bpf_reg_state *off_reg,
7040 			const struct bpf_reg_state *dst_reg)
7041 {
7042 	static const char *err = "pointer arithmetic with it prohibited for !root";
7043 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7044 	u32 dst = insn->dst_reg, src = insn->src_reg;
7045 
7046 	switch (reason) {
7047 	case REASON_BOUNDS:
7048 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7049 			off_reg == dst_reg ? dst : src, err);
7050 		break;
7051 	case REASON_TYPE:
7052 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7053 			off_reg == dst_reg ? src : dst, err);
7054 		break;
7055 	case REASON_PATHS:
7056 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7057 			dst, op, err);
7058 		break;
7059 	case REASON_LIMIT:
7060 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7061 			dst, op, err);
7062 		break;
7063 	case REASON_STACK:
7064 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7065 			dst, err);
7066 		break;
7067 	default:
7068 		verbose(env, "verifier internal error: unknown reason (%d)\n",
7069 			reason);
7070 		break;
7071 	}
7072 
7073 	return -EACCES;
7074 }
7075 
7076 /* check that stack access falls within stack limits and that 'reg' doesn't
7077  * have a variable offset.
7078  *
7079  * Variable offset is prohibited for unprivileged mode for simplicity since it
7080  * requires corresponding support in Spectre masking for stack ALU.  See also
7081  * retrieve_ptr_limit().
7082  *
7083  *
7084  * 'off' includes 'reg->off'.
7085  */
7086 static int check_stack_access_for_ptr_arithmetic(
7087 				struct bpf_verifier_env *env,
7088 				int regno,
7089 				const struct bpf_reg_state *reg,
7090 				int off)
7091 {
7092 	if (!tnum_is_const(reg->var_off)) {
7093 		char tn_buf[48];
7094 
7095 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7096 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7097 			regno, tn_buf, off);
7098 		return -EACCES;
7099 	}
7100 
7101 	if (off >= 0 || off < -MAX_BPF_STACK) {
7102 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
7103 			"prohibited for !root; off=%d\n", regno, off);
7104 		return -EACCES;
7105 	}
7106 
7107 	return 0;
7108 }
7109 
7110 static int sanitize_check_bounds(struct bpf_verifier_env *env,
7111 				 const struct bpf_insn *insn,
7112 				 const struct bpf_reg_state *dst_reg)
7113 {
7114 	u32 dst = insn->dst_reg;
7115 
7116 	/* For unprivileged we require that resulting offset must be in bounds
7117 	 * in order to be able to sanitize access later on.
7118 	 */
7119 	if (env->bypass_spec_v1)
7120 		return 0;
7121 
7122 	switch (dst_reg->type) {
7123 	case PTR_TO_STACK:
7124 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7125 					dst_reg->off + dst_reg->var_off.value))
7126 			return -EACCES;
7127 		break;
7128 	case PTR_TO_MAP_VALUE:
7129 		if (check_map_access(env, dst, dst_reg->off, 1, false)) {
7130 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7131 				"prohibited for !root\n", dst);
7132 			return -EACCES;
7133 		}
7134 		break;
7135 	default:
7136 		break;
7137 	}
7138 
7139 	return 0;
7140 }
7141 
7142 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
7143  * Caller should also handle BPF_MOV case separately.
7144  * If we return -EACCES, caller may want to try again treating pointer as a
7145  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
7146  */
7147 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7148 				   struct bpf_insn *insn,
7149 				   const struct bpf_reg_state *ptr_reg,
7150 				   const struct bpf_reg_state *off_reg)
7151 {
7152 	struct bpf_verifier_state *vstate = env->cur_state;
7153 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
7154 	struct bpf_reg_state *regs = state->regs, *dst_reg;
7155 	bool known = tnum_is_const(off_reg->var_off);
7156 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7157 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7158 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7159 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
7160 	struct bpf_sanitize_info info = {};
7161 	u8 opcode = BPF_OP(insn->code);
7162 	u32 dst = insn->dst_reg;
7163 	int ret;
7164 
7165 	dst_reg = &regs[dst];
7166 
7167 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7168 	    smin_val > smax_val || umin_val > umax_val) {
7169 		/* Taint dst register if offset had invalid bounds derived from
7170 		 * e.g. dead branches.
7171 		 */
7172 		__mark_reg_unknown(env, dst_reg);
7173 		return 0;
7174 	}
7175 
7176 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
7177 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
7178 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7179 			__mark_reg_unknown(env, dst_reg);
7180 			return 0;
7181 		}
7182 
7183 		verbose(env,
7184 			"R%d 32-bit pointer arithmetic prohibited\n",
7185 			dst);
7186 		return -EACCES;
7187 	}
7188 
7189 	switch (ptr_reg->type) {
7190 	case PTR_TO_MAP_VALUE_OR_NULL:
7191 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
7192 			dst, reg_type_str[ptr_reg->type]);
7193 		return -EACCES;
7194 	case CONST_PTR_TO_MAP:
7195 		/* smin_val represents the known value */
7196 		if (known && smin_val == 0 && opcode == BPF_ADD)
7197 			break;
7198 		fallthrough;
7199 	case PTR_TO_PACKET_END:
7200 	case PTR_TO_SOCKET:
7201 	case PTR_TO_SOCKET_OR_NULL:
7202 	case PTR_TO_SOCK_COMMON:
7203 	case PTR_TO_SOCK_COMMON_OR_NULL:
7204 	case PTR_TO_TCP_SOCK:
7205 	case PTR_TO_TCP_SOCK_OR_NULL:
7206 	case PTR_TO_XDP_SOCK:
7207 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
7208 			dst, reg_type_str[ptr_reg->type]);
7209 		return -EACCES;
7210 	default:
7211 		break;
7212 	}
7213 
7214 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7215 	 * The id may be overwritten later if we create a new variable offset.
7216 	 */
7217 	dst_reg->type = ptr_reg->type;
7218 	dst_reg->id = ptr_reg->id;
7219 
7220 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7221 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7222 		return -EINVAL;
7223 
7224 	/* pointer types do not carry 32-bit bounds at the moment. */
7225 	__mark_reg32_unbounded(dst_reg);
7226 
7227 	if (sanitize_needed(opcode)) {
7228 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
7229 				       &info, false);
7230 		if (ret < 0)
7231 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
7232 	}
7233 
7234 	switch (opcode) {
7235 	case BPF_ADD:
7236 		/* We can take a fixed offset as long as it doesn't overflow
7237 		 * the s32 'off' field
7238 		 */
7239 		if (known && (ptr_reg->off + smin_val ==
7240 			      (s64)(s32)(ptr_reg->off + smin_val))) {
7241 			/* pointer += K.  Accumulate it into fixed offset */
7242 			dst_reg->smin_value = smin_ptr;
7243 			dst_reg->smax_value = smax_ptr;
7244 			dst_reg->umin_value = umin_ptr;
7245 			dst_reg->umax_value = umax_ptr;
7246 			dst_reg->var_off = ptr_reg->var_off;
7247 			dst_reg->off = ptr_reg->off + smin_val;
7248 			dst_reg->raw = ptr_reg->raw;
7249 			break;
7250 		}
7251 		/* A new variable offset is created.  Note that off_reg->off
7252 		 * == 0, since it's a scalar.
7253 		 * dst_reg gets the pointer type and since some positive
7254 		 * integer value was added to the pointer, give it a new 'id'
7255 		 * if it's a PTR_TO_PACKET.
7256 		 * this creates a new 'base' pointer, off_reg (variable) gets
7257 		 * added into the variable offset, and we copy the fixed offset
7258 		 * from ptr_reg.
7259 		 */
7260 		if (signed_add_overflows(smin_ptr, smin_val) ||
7261 		    signed_add_overflows(smax_ptr, smax_val)) {
7262 			dst_reg->smin_value = S64_MIN;
7263 			dst_reg->smax_value = S64_MAX;
7264 		} else {
7265 			dst_reg->smin_value = smin_ptr + smin_val;
7266 			dst_reg->smax_value = smax_ptr + smax_val;
7267 		}
7268 		if (umin_ptr + umin_val < umin_ptr ||
7269 		    umax_ptr + umax_val < umax_ptr) {
7270 			dst_reg->umin_value = 0;
7271 			dst_reg->umax_value = U64_MAX;
7272 		} else {
7273 			dst_reg->umin_value = umin_ptr + umin_val;
7274 			dst_reg->umax_value = umax_ptr + umax_val;
7275 		}
7276 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7277 		dst_reg->off = ptr_reg->off;
7278 		dst_reg->raw = ptr_reg->raw;
7279 		if (reg_is_pkt_pointer(ptr_reg)) {
7280 			dst_reg->id = ++env->id_gen;
7281 			/* something was added to pkt_ptr, set range to zero */
7282 			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7283 		}
7284 		break;
7285 	case BPF_SUB:
7286 		if (dst_reg == off_reg) {
7287 			/* scalar -= pointer.  Creates an unknown scalar */
7288 			verbose(env, "R%d tried to subtract pointer from scalar\n",
7289 				dst);
7290 			return -EACCES;
7291 		}
7292 		/* We don't allow subtraction from FP, because (according to
7293 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
7294 		 * be able to deal with it.
7295 		 */
7296 		if (ptr_reg->type == PTR_TO_STACK) {
7297 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
7298 				dst);
7299 			return -EACCES;
7300 		}
7301 		if (known && (ptr_reg->off - smin_val ==
7302 			      (s64)(s32)(ptr_reg->off - smin_val))) {
7303 			/* pointer -= K.  Subtract it from fixed offset */
7304 			dst_reg->smin_value = smin_ptr;
7305 			dst_reg->smax_value = smax_ptr;
7306 			dst_reg->umin_value = umin_ptr;
7307 			dst_reg->umax_value = umax_ptr;
7308 			dst_reg->var_off = ptr_reg->var_off;
7309 			dst_reg->id = ptr_reg->id;
7310 			dst_reg->off = ptr_reg->off - smin_val;
7311 			dst_reg->raw = ptr_reg->raw;
7312 			break;
7313 		}
7314 		/* A new variable offset is created.  If the subtrahend is known
7315 		 * nonnegative, then any reg->range we had before is still good.
7316 		 */
7317 		if (signed_sub_overflows(smin_ptr, smax_val) ||
7318 		    signed_sub_overflows(smax_ptr, smin_val)) {
7319 			/* Overflow possible, we know nothing */
7320 			dst_reg->smin_value = S64_MIN;
7321 			dst_reg->smax_value = S64_MAX;
7322 		} else {
7323 			dst_reg->smin_value = smin_ptr - smax_val;
7324 			dst_reg->smax_value = smax_ptr - smin_val;
7325 		}
7326 		if (umin_ptr < umax_val) {
7327 			/* Overflow possible, we know nothing */
7328 			dst_reg->umin_value = 0;
7329 			dst_reg->umax_value = U64_MAX;
7330 		} else {
7331 			/* Cannot overflow (as long as bounds are consistent) */
7332 			dst_reg->umin_value = umin_ptr - umax_val;
7333 			dst_reg->umax_value = umax_ptr - umin_val;
7334 		}
7335 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7336 		dst_reg->off = ptr_reg->off;
7337 		dst_reg->raw = ptr_reg->raw;
7338 		if (reg_is_pkt_pointer(ptr_reg)) {
7339 			dst_reg->id = ++env->id_gen;
7340 			/* something was added to pkt_ptr, set range to zero */
7341 			if (smin_val < 0)
7342 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7343 		}
7344 		break;
7345 	case BPF_AND:
7346 	case BPF_OR:
7347 	case BPF_XOR:
7348 		/* bitwise ops on pointers are troublesome, prohibit. */
7349 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7350 			dst, bpf_alu_string[opcode >> 4]);
7351 		return -EACCES;
7352 	default:
7353 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
7354 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7355 			dst, bpf_alu_string[opcode >> 4]);
7356 		return -EACCES;
7357 	}
7358 
7359 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7360 		return -EINVAL;
7361 
7362 	__update_reg_bounds(dst_reg);
7363 	__reg_deduce_bounds(dst_reg);
7364 	__reg_bound_offset(dst_reg);
7365 
7366 	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7367 		return -EACCES;
7368 	if (sanitize_needed(opcode)) {
7369 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
7370 				       &info, true);
7371 		if (ret < 0)
7372 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
7373 	}
7374 
7375 	return 0;
7376 }
7377 
7378 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7379 				 struct bpf_reg_state *src_reg)
7380 {
7381 	s32 smin_val = src_reg->s32_min_value;
7382 	s32 smax_val = src_reg->s32_max_value;
7383 	u32 umin_val = src_reg->u32_min_value;
7384 	u32 umax_val = src_reg->u32_max_value;
7385 
7386 	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7387 	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7388 		dst_reg->s32_min_value = S32_MIN;
7389 		dst_reg->s32_max_value = S32_MAX;
7390 	} else {
7391 		dst_reg->s32_min_value += smin_val;
7392 		dst_reg->s32_max_value += smax_val;
7393 	}
7394 	if (dst_reg->u32_min_value + umin_val < umin_val ||
7395 	    dst_reg->u32_max_value + umax_val < umax_val) {
7396 		dst_reg->u32_min_value = 0;
7397 		dst_reg->u32_max_value = U32_MAX;
7398 	} else {
7399 		dst_reg->u32_min_value += umin_val;
7400 		dst_reg->u32_max_value += umax_val;
7401 	}
7402 }
7403 
7404 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7405 			       struct bpf_reg_state *src_reg)
7406 {
7407 	s64 smin_val = src_reg->smin_value;
7408 	s64 smax_val = src_reg->smax_value;
7409 	u64 umin_val = src_reg->umin_value;
7410 	u64 umax_val = src_reg->umax_value;
7411 
7412 	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
7413 	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
7414 		dst_reg->smin_value = S64_MIN;
7415 		dst_reg->smax_value = S64_MAX;
7416 	} else {
7417 		dst_reg->smin_value += smin_val;
7418 		dst_reg->smax_value += smax_val;
7419 	}
7420 	if (dst_reg->umin_value + umin_val < umin_val ||
7421 	    dst_reg->umax_value + umax_val < umax_val) {
7422 		dst_reg->umin_value = 0;
7423 		dst_reg->umax_value = U64_MAX;
7424 	} else {
7425 		dst_reg->umin_value += umin_val;
7426 		dst_reg->umax_value += umax_val;
7427 	}
7428 }
7429 
7430 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
7431 				 struct bpf_reg_state *src_reg)
7432 {
7433 	s32 smin_val = src_reg->s32_min_value;
7434 	s32 smax_val = src_reg->s32_max_value;
7435 	u32 umin_val = src_reg->u32_min_value;
7436 	u32 umax_val = src_reg->u32_max_value;
7437 
7438 	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
7439 	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
7440 		/* Overflow possible, we know nothing */
7441 		dst_reg->s32_min_value = S32_MIN;
7442 		dst_reg->s32_max_value = S32_MAX;
7443 	} else {
7444 		dst_reg->s32_min_value -= smax_val;
7445 		dst_reg->s32_max_value -= smin_val;
7446 	}
7447 	if (dst_reg->u32_min_value < umax_val) {
7448 		/* Overflow possible, we know nothing */
7449 		dst_reg->u32_min_value = 0;
7450 		dst_reg->u32_max_value = U32_MAX;
7451 	} else {
7452 		/* Cannot overflow (as long as bounds are consistent) */
7453 		dst_reg->u32_min_value -= umax_val;
7454 		dst_reg->u32_max_value -= umin_val;
7455 	}
7456 }
7457 
7458 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
7459 			       struct bpf_reg_state *src_reg)
7460 {
7461 	s64 smin_val = src_reg->smin_value;
7462 	s64 smax_val = src_reg->smax_value;
7463 	u64 umin_val = src_reg->umin_value;
7464 	u64 umax_val = src_reg->umax_value;
7465 
7466 	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
7467 	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
7468 		/* Overflow possible, we know nothing */
7469 		dst_reg->smin_value = S64_MIN;
7470 		dst_reg->smax_value = S64_MAX;
7471 	} else {
7472 		dst_reg->smin_value -= smax_val;
7473 		dst_reg->smax_value -= smin_val;
7474 	}
7475 	if (dst_reg->umin_value < umax_val) {
7476 		/* Overflow possible, we know nothing */
7477 		dst_reg->umin_value = 0;
7478 		dst_reg->umax_value = U64_MAX;
7479 	} else {
7480 		/* Cannot overflow (as long as bounds are consistent) */
7481 		dst_reg->umin_value -= umax_val;
7482 		dst_reg->umax_value -= umin_val;
7483 	}
7484 }
7485 
7486 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
7487 				 struct bpf_reg_state *src_reg)
7488 {
7489 	s32 smin_val = src_reg->s32_min_value;
7490 	u32 umin_val = src_reg->u32_min_value;
7491 	u32 umax_val = src_reg->u32_max_value;
7492 
7493 	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
7494 		/* Ain't nobody got time to multiply that sign */
7495 		__mark_reg32_unbounded(dst_reg);
7496 		return;
7497 	}
7498 	/* Both values are positive, so we can work with unsigned and
7499 	 * copy the result to signed (unless it exceeds S32_MAX).
7500 	 */
7501 	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
7502 		/* Potential overflow, we know nothing */
7503 		__mark_reg32_unbounded(dst_reg);
7504 		return;
7505 	}
7506 	dst_reg->u32_min_value *= umin_val;
7507 	dst_reg->u32_max_value *= umax_val;
7508 	if (dst_reg->u32_max_value > S32_MAX) {
7509 		/* Overflow possible, we know nothing */
7510 		dst_reg->s32_min_value = S32_MIN;
7511 		dst_reg->s32_max_value = S32_MAX;
7512 	} else {
7513 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7514 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7515 	}
7516 }
7517 
7518 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
7519 			       struct bpf_reg_state *src_reg)
7520 {
7521 	s64 smin_val = src_reg->smin_value;
7522 	u64 umin_val = src_reg->umin_value;
7523 	u64 umax_val = src_reg->umax_value;
7524 
7525 	if (smin_val < 0 || dst_reg->smin_value < 0) {
7526 		/* Ain't nobody got time to multiply that sign */
7527 		__mark_reg64_unbounded(dst_reg);
7528 		return;
7529 	}
7530 	/* Both values are positive, so we can work with unsigned and
7531 	 * copy the result to signed (unless it exceeds S64_MAX).
7532 	 */
7533 	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
7534 		/* Potential overflow, we know nothing */
7535 		__mark_reg64_unbounded(dst_reg);
7536 		return;
7537 	}
7538 	dst_reg->umin_value *= umin_val;
7539 	dst_reg->umax_value *= umax_val;
7540 	if (dst_reg->umax_value > S64_MAX) {
7541 		/* Overflow possible, we know nothing */
7542 		dst_reg->smin_value = S64_MIN;
7543 		dst_reg->smax_value = S64_MAX;
7544 	} else {
7545 		dst_reg->smin_value = dst_reg->umin_value;
7546 		dst_reg->smax_value = dst_reg->umax_value;
7547 	}
7548 }
7549 
7550 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
7551 				 struct bpf_reg_state *src_reg)
7552 {
7553 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7554 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7555 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7556 	s32 smin_val = src_reg->s32_min_value;
7557 	u32 umax_val = src_reg->u32_max_value;
7558 
7559 	if (src_known && dst_known) {
7560 		__mark_reg32_known(dst_reg, var32_off.value);
7561 		return;
7562 	}
7563 
7564 	/* We get our minimum from the var_off, since that's inherently
7565 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
7566 	 */
7567 	dst_reg->u32_min_value = var32_off.value;
7568 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
7569 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7570 		/* Lose signed bounds when ANDing negative numbers,
7571 		 * ain't nobody got time for that.
7572 		 */
7573 		dst_reg->s32_min_value = S32_MIN;
7574 		dst_reg->s32_max_value = S32_MAX;
7575 	} else {
7576 		/* ANDing two positives gives a positive, so safe to
7577 		 * cast result into s64.
7578 		 */
7579 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7580 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7581 	}
7582 }
7583 
7584 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
7585 			       struct bpf_reg_state *src_reg)
7586 {
7587 	bool src_known = tnum_is_const(src_reg->var_off);
7588 	bool dst_known = tnum_is_const(dst_reg->var_off);
7589 	s64 smin_val = src_reg->smin_value;
7590 	u64 umax_val = src_reg->umax_value;
7591 
7592 	if (src_known && dst_known) {
7593 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7594 		return;
7595 	}
7596 
7597 	/* We get our minimum from the var_off, since that's inherently
7598 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
7599 	 */
7600 	dst_reg->umin_value = dst_reg->var_off.value;
7601 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
7602 	if (dst_reg->smin_value < 0 || smin_val < 0) {
7603 		/* Lose signed bounds when ANDing negative numbers,
7604 		 * ain't nobody got time for that.
7605 		 */
7606 		dst_reg->smin_value = S64_MIN;
7607 		dst_reg->smax_value = S64_MAX;
7608 	} else {
7609 		/* ANDing two positives gives a positive, so safe to
7610 		 * cast result into s64.
7611 		 */
7612 		dst_reg->smin_value = dst_reg->umin_value;
7613 		dst_reg->smax_value = dst_reg->umax_value;
7614 	}
7615 	/* We may learn something more from the var_off */
7616 	__update_reg_bounds(dst_reg);
7617 }
7618 
7619 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
7620 				struct bpf_reg_state *src_reg)
7621 {
7622 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7623 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7624 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7625 	s32 smin_val = src_reg->s32_min_value;
7626 	u32 umin_val = src_reg->u32_min_value;
7627 
7628 	if (src_known && dst_known) {
7629 		__mark_reg32_known(dst_reg, var32_off.value);
7630 		return;
7631 	}
7632 
7633 	/* We get our maximum from the var_off, and our minimum is the
7634 	 * maximum of the operands' minima
7635 	 */
7636 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
7637 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7638 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7639 		/* Lose signed bounds when ORing negative numbers,
7640 		 * ain't nobody got time for that.
7641 		 */
7642 		dst_reg->s32_min_value = S32_MIN;
7643 		dst_reg->s32_max_value = S32_MAX;
7644 	} else {
7645 		/* ORing two positives gives a positive, so safe to
7646 		 * cast result into s64.
7647 		 */
7648 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7649 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7650 	}
7651 }
7652 
7653 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
7654 			      struct bpf_reg_state *src_reg)
7655 {
7656 	bool src_known = tnum_is_const(src_reg->var_off);
7657 	bool dst_known = tnum_is_const(dst_reg->var_off);
7658 	s64 smin_val = src_reg->smin_value;
7659 	u64 umin_val = src_reg->umin_value;
7660 
7661 	if (src_known && dst_known) {
7662 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7663 		return;
7664 	}
7665 
7666 	/* We get our maximum from the var_off, and our minimum is the
7667 	 * maximum of the operands' minima
7668 	 */
7669 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
7670 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7671 	if (dst_reg->smin_value < 0 || smin_val < 0) {
7672 		/* Lose signed bounds when ORing negative numbers,
7673 		 * ain't nobody got time for that.
7674 		 */
7675 		dst_reg->smin_value = S64_MIN;
7676 		dst_reg->smax_value = S64_MAX;
7677 	} else {
7678 		/* ORing two positives gives a positive, so safe to
7679 		 * cast result into s64.
7680 		 */
7681 		dst_reg->smin_value = dst_reg->umin_value;
7682 		dst_reg->smax_value = dst_reg->umax_value;
7683 	}
7684 	/* We may learn something more from the var_off */
7685 	__update_reg_bounds(dst_reg);
7686 }
7687 
7688 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
7689 				 struct bpf_reg_state *src_reg)
7690 {
7691 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
7692 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7693 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7694 	s32 smin_val = src_reg->s32_min_value;
7695 
7696 	if (src_known && dst_known) {
7697 		__mark_reg32_known(dst_reg, var32_off.value);
7698 		return;
7699 	}
7700 
7701 	/* We get both minimum and maximum from the var32_off. */
7702 	dst_reg->u32_min_value = var32_off.value;
7703 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7704 
7705 	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
7706 		/* XORing two positive sign numbers gives a positive,
7707 		 * so safe to cast u32 result into s32.
7708 		 */
7709 		dst_reg->s32_min_value = dst_reg->u32_min_value;
7710 		dst_reg->s32_max_value = dst_reg->u32_max_value;
7711 	} else {
7712 		dst_reg->s32_min_value = S32_MIN;
7713 		dst_reg->s32_max_value = S32_MAX;
7714 	}
7715 }
7716 
7717 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
7718 			       struct bpf_reg_state *src_reg)
7719 {
7720 	bool src_known = tnum_is_const(src_reg->var_off);
7721 	bool dst_known = tnum_is_const(dst_reg->var_off);
7722 	s64 smin_val = src_reg->smin_value;
7723 
7724 	if (src_known && dst_known) {
7725 		/* dst_reg->var_off.value has been updated earlier */
7726 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
7727 		return;
7728 	}
7729 
7730 	/* We get both minimum and maximum from the var_off. */
7731 	dst_reg->umin_value = dst_reg->var_off.value;
7732 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7733 
7734 	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
7735 		/* XORing two positive sign numbers gives a positive,
7736 		 * so safe to cast u64 result into s64.
7737 		 */
7738 		dst_reg->smin_value = dst_reg->umin_value;
7739 		dst_reg->smax_value = dst_reg->umax_value;
7740 	} else {
7741 		dst_reg->smin_value = S64_MIN;
7742 		dst_reg->smax_value = S64_MAX;
7743 	}
7744 
7745 	__update_reg_bounds(dst_reg);
7746 }
7747 
7748 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7749 				   u64 umin_val, u64 umax_val)
7750 {
7751 	/* We lose all sign bit information (except what we can pick
7752 	 * up from var_off)
7753 	 */
7754 	dst_reg->s32_min_value = S32_MIN;
7755 	dst_reg->s32_max_value = S32_MAX;
7756 	/* If we might shift our top bit out, then we know nothing */
7757 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
7758 		dst_reg->u32_min_value = 0;
7759 		dst_reg->u32_max_value = U32_MAX;
7760 	} else {
7761 		dst_reg->u32_min_value <<= umin_val;
7762 		dst_reg->u32_max_value <<= umax_val;
7763 	}
7764 }
7765 
7766 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7767 				 struct bpf_reg_state *src_reg)
7768 {
7769 	u32 umax_val = src_reg->u32_max_value;
7770 	u32 umin_val = src_reg->u32_min_value;
7771 	/* u32 alu operation will zext upper bits */
7772 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
7773 
7774 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7775 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
7776 	/* Not required but being careful mark reg64 bounds as unknown so
7777 	 * that we are forced to pick them up from tnum and zext later and
7778 	 * if some path skips this step we are still safe.
7779 	 */
7780 	__mark_reg64_unbounded(dst_reg);
7781 	__update_reg32_bounds(dst_reg);
7782 }
7783 
7784 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
7785 				   u64 umin_val, u64 umax_val)
7786 {
7787 	/* Special case <<32 because it is a common compiler pattern to sign
7788 	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
7789 	 * positive we know this shift will also be positive so we can track
7790 	 * bounds correctly. Otherwise we lose all sign bit information except
7791 	 * what we can pick up from var_off. Perhaps we can generalize this
7792 	 * later to shifts of any length.
7793 	 */
7794 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
7795 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
7796 	else
7797 		dst_reg->smax_value = S64_MAX;
7798 
7799 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
7800 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
7801 	else
7802 		dst_reg->smin_value = S64_MIN;
7803 
7804 	/* If we might shift our top bit out, then we know nothing */
7805 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
7806 		dst_reg->umin_value = 0;
7807 		dst_reg->umax_value = U64_MAX;
7808 	} else {
7809 		dst_reg->umin_value <<= umin_val;
7810 		dst_reg->umax_value <<= umax_val;
7811 	}
7812 }
7813 
7814 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
7815 			       struct bpf_reg_state *src_reg)
7816 {
7817 	u64 umax_val = src_reg->umax_value;
7818 	u64 umin_val = src_reg->umin_value;
7819 
7820 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
7821 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
7822 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7823 
7824 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
7825 	/* We may learn something more from the var_off */
7826 	__update_reg_bounds(dst_reg);
7827 }
7828 
7829 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
7830 				 struct bpf_reg_state *src_reg)
7831 {
7832 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
7833 	u32 umax_val = src_reg->u32_max_value;
7834 	u32 umin_val = src_reg->u32_min_value;
7835 
7836 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7837 	 * be negative, then either:
7838 	 * 1) src_reg might be zero, so the sign bit of the result is
7839 	 *    unknown, so we lose our signed bounds
7840 	 * 2) it's known negative, thus the unsigned bounds capture the
7841 	 *    signed bounds
7842 	 * 3) the signed bounds cross zero, so they tell us nothing
7843 	 *    about the result
7844 	 * If the value in dst_reg is known nonnegative, then again the
7845 	 * unsigned bounds capture the signed bounds.
7846 	 * Thus, in all cases it suffices to blow away our signed bounds
7847 	 * and rely on inferring new ones from the unsigned bounds and
7848 	 * var_off of the result.
7849 	 */
7850 	dst_reg->s32_min_value = S32_MIN;
7851 	dst_reg->s32_max_value = S32_MAX;
7852 
7853 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
7854 	dst_reg->u32_min_value >>= umax_val;
7855 	dst_reg->u32_max_value >>= umin_val;
7856 
7857 	__mark_reg64_unbounded(dst_reg);
7858 	__update_reg32_bounds(dst_reg);
7859 }
7860 
7861 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
7862 			       struct bpf_reg_state *src_reg)
7863 {
7864 	u64 umax_val = src_reg->umax_value;
7865 	u64 umin_val = src_reg->umin_value;
7866 
7867 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7868 	 * be negative, then either:
7869 	 * 1) src_reg might be zero, so the sign bit of the result is
7870 	 *    unknown, so we lose our signed bounds
7871 	 * 2) it's known negative, thus the unsigned bounds capture the
7872 	 *    signed bounds
7873 	 * 3) the signed bounds cross zero, so they tell us nothing
7874 	 *    about the result
7875 	 * If the value in dst_reg is known nonnegative, then again the
7876 	 * unsigned bounds capture the signed bounds.
7877 	 * Thus, in all cases it suffices to blow away our signed bounds
7878 	 * and rely on inferring new ones from the unsigned bounds and
7879 	 * var_off of the result.
7880 	 */
7881 	dst_reg->smin_value = S64_MIN;
7882 	dst_reg->smax_value = S64_MAX;
7883 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
7884 	dst_reg->umin_value >>= umax_val;
7885 	dst_reg->umax_value >>= umin_val;
7886 
7887 	/* Its not easy to operate on alu32 bounds here because it depends
7888 	 * on bits being shifted in. Take easy way out and mark unbounded
7889 	 * so we can recalculate later from tnum.
7890 	 */
7891 	__mark_reg32_unbounded(dst_reg);
7892 	__update_reg_bounds(dst_reg);
7893 }
7894 
7895 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
7896 				  struct bpf_reg_state *src_reg)
7897 {
7898 	u64 umin_val = src_reg->u32_min_value;
7899 
7900 	/* Upon reaching here, src_known is true and
7901 	 * umax_val is equal to umin_val.
7902 	 */
7903 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
7904 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
7905 
7906 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
7907 
7908 	/* blow away the dst_reg umin_value/umax_value and rely on
7909 	 * dst_reg var_off to refine the result.
7910 	 */
7911 	dst_reg->u32_min_value = 0;
7912 	dst_reg->u32_max_value = U32_MAX;
7913 
7914 	__mark_reg64_unbounded(dst_reg);
7915 	__update_reg32_bounds(dst_reg);
7916 }
7917 
7918 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
7919 				struct bpf_reg_state *src_reg)
7920 {
7921 	u64 umin_val = src_reg->umin_value;
7922 
7923 	/* Upon reaching here, src_known is true and umax_val is equal
7924 	 * to umin_val.
7925 	 */
7926 	dst_reg->smin_value >>= umin_val;
7927 	dst_reg->smax_value >>= umin_val;
7928 
7929 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
7930 
7931 	/* blow away the dst_reg umin_value/umax_value and rely on
7932 	 * dst_reg var_off to refine the result.
7933 	 */
7934 	dst_reg->umin_value = 0;
7935 	dst_reg->umax_value = U64_MAX;
7936 
7937 	/* Its not easy to operate on alu32 bounds here because it depends
7938 	 * on bits being shifted in from upper 32-bits. Take easy way out
7939 	 * and mark unbounded so we can recalculate later from tnum.
7940 	 */
7941 	__mark_reg32_unbounded(dst_reg);
7942 	__update_reg_bounds(dst_reg);
7943 }
7944 
7945 /* WARNING: This function does calculations on 64-bit values, but the actual
7946  * execution may occur on 32-bit values. Therefore, things like bitshifts
7947  * need extra checks in the 32-bit case.
7948  */
7949 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
7950 				      struct bpf_insn *insn,
7951 				      struct bpf_reg_state *dst_reg,
7952 				      struct bpf_reg_state src_reg)
7953 {
7954 	struct bpf_reg_state *regs = cur_regs(env);
7955 	u8 opcode = BPF_OP(insn->code);
7956 	bool src_known;
7957 	s64 smin_val, smax_val;
7958 	u64 umin_val, umax_val;
7959 	s32 s32_min_val, s32_max_val;
7960 	u32 u32_min_val, u32_max_val;
7961 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
7962 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
7963 	int ret;
7964 
7965 	smin_val = src_reg.smin_value;
7966 	smax_val = src_reg.smax_value;
7967 	umin_val = src_reg.umin_value;
7968 	umax_val = src_reg.umax_value;
7969 
7970 	s32_min_val = src_reg.s32_min_value;
7971 	s32_max_val = src_reg.s32_max_value;
7972 	u32_min_val = src_reg.u32_min_value;
7973 	u32_max_val = src_reg.u32_max_value;
7974 
7975 	if (alu32) {
7976 		src_known = tnum_subreg_is_const(src_reg.var_off);
7977 		if ((src_known &&
7978 		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
7979 		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
7980 			/* Taint dst register if offset had invalid bounds
7981 			 * derived from e.g. dead branches.
7982 			 */
7983 			__mark_reg_unknown(env, dst_reg);
7984 			return 0;
7985 		}
7986 	} else {
7987 		src_known = tnum_is_const(src_reg.var_off);
7988 		if ((src_known &&
7989 		     (smin_val != smax_val || umin_val != umax_val)) ||
7990 		    smin_val > smax_val || umin_val > umax_val) {
7991 			/* Taint dst register if offset had invalid bounds
7992 			 * derived from e.g. dead branches.
7993 			 */
7994 			__mark_reg_unknown(env, dst_reg);
7995 			return 0;
7996 		}
7997 	}
7998 
7999 	if (!src_known &&
8000 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8001 		__mark_reg_unknown(env, dst_reg);
8002 		return 0;
8003 	}
8004 
8005 	if (sanitize_needed(opcode)) {
8006 		ret = sanitize_val_alu(env, insn);
8007 		if (ret < 0)
8008 			return sanitize_err(env, insn, ret, NULL, NULL);
8009 	}
8010 
8011 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8012 	 * There are two classes of instructions: The first class we track both
8013 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
8014 	 * greatest amount of precision when alu operations are mixed with jmp32
8015 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8016 	 * and BPF_OR. This is possible because these ops have fairly easy to
8017 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8018 	 * See alu32 verifier tests for examples. The second class of
8019 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8020 	 * with regards to tracking sign/unsigned bounds because the bits may
8021 	 * cross subreg boundaries in the alu64 case. When this happens we mark
8022 	 * the reg unbounded in the subreg bound space and use the resulting
8023 	 * tnum to calculate an approximation of the sign/unsigned bounds.
8024 	 */
8025 	switch (opcode) {
8026 	case BPF_ADD:
8027 		scalar32_min_max_add(dst_reg, &src_reg);
8028 		scalar_min_max_add(dst_reg, &src_reg);
8029 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8030 		break;
8031 	case BPF_SUB:
8032 		scalar32_min_max_sub(dst_reg, &src_reg);
8033 		scalar_min_max_sub(dst_reg, &src_reg);
8034 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8035 		break;
8036 	case BPF_MUL:
8037 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8038 		scalar32_min_max_mul(dst_reg, &src_reg);
8039 		scalar_min_max_mul(dst_reg, &src_reg);
8040 		break;
8041 	case BPF_AND:
8042 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8043 		scalar32_min_max_and(dst_reg, &src_reg);
8044 		scalar_min_max_and(dst_reg, &src_reg);
8045 		break;
8046 	case BPF_OR:
8047 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8048 		scalar32_min_max_or(dst_reg, &src_reg);
8049 		scalar_min_max_or(dst_reg, &src_reg);
8050 		break;
8051 	case BPF_XOR:
8052 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8053 		scalar32_min_max_xor(dst_reg, &src_reg);
8054 		scalar_min_max_xor(dst_reg, &src_reg);
8055 		break;
8056 	case BPF_LSH:
8057 		if (umax_val >= insn_bitness) {
8058 			/* Shifts greater than 31 or 63 are undefined.
8059 			 * This includes shifts by a negative number.
8060 			 */
8061 			mark_reg_unknown(env, regs, insn->dst_reg);
8062 			break;
8063 		}
8064 		if (alu32)
8065 			scalar32_min_max_lsh(dst_reg, &src_reg);
8066 		else
8067 			scalar_min_max_lsh(dst_reg, &src_reg);
8068 		break;
8069 	case BPF_RSH:
8070 		if (umax_val >= insn_bitness) {
8071 			/* Shifts greater than 31 or 63 are undefined.
8072 			 * This includes shifts by a negative number.
8073 			 */
8074 			mark_reg_unknown(env, regs, insn->dst_reg);
8075 			break;
8076 		}
8077 		if (alu32)
8078 			scalar32_min_max_rsh(dst_reg, &src_reg);
8079 		else
8080 			scalar_min_max_rsh(dst_reg, &src_reg);
8081 		break;
8082 	case BPF_ARSH:
8083 		if (umax_val >= insn_bitness) {
8084 			/* Shifts greater than 31 or 63 are undefined.
8085 			 * This includes shifts by a negative number.
8086 			 */
8087 			mark_reg_unknown(env, regs, insn->dst_reg);
8088 			break;
8089 		}
8090 		if (alu32)
8091 			scalar32_min_max_arsh(dst_reg, &src_reg);
8092 		else
8093 			scalar_min_max_arsh(dst_reg, &src_reg);
8094 		break;
8095 	default:
8096 		mark_reg_unknown(env, regs, insn->dst_reg);
8097 		break;
8098 	}
8099 
8100 	/* ALU32 ops are zero extended into 64bit register */
8101 	if (alu32)
8102 		zext_32_to_64(dst_reg);
8103 
8104 	__update_reg_bounds(dst_reg);
8105 	__reg_deduce_bounds(dst_reg);
8106 	__reg_bound_offset(dst_reg);
8107 	return 0;
8108 }
8109 
8110 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8111  * and var_off.
8112  */
8113 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8114 				   struct bpf_insn *insn)
8115 {
8116 	struct bpf_verifier_state *vstate = env->cur_state;
8117 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8118 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
8119 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8120 	u8 opcode = BPF_OP(insn->code);
8121 	int err;
8122 
8123 	dst_reg = &regs[insn->dst_reg];
8124 	src_reg = NULL;
8125 	if (dst_reg->type != SCALAR_VALUE)
8126 		ptr_reg = dst_reg;
8127 	else
8128 		/* Make sure ID is cleared otherwise dst_reg min/max could be
8129 		 * incorrectly propagated into other registers by find_equal_scalars()
8130 		 */
8131 		dst_reg->id = 0;
8132 	if (BPF_SRC(insn->code) == BPF_X) {
8133 		src_reg = &regs[insn->src_reg];
8134 		if (src_reg->type != SCALAR_VALUE) {
8135 			if (dst_reg->type != SCALAR_VALUE) {
8136 				/* Combining two pointers by any ALU op yields
8137 				 * an arbitrary scalar. Disallow all math except
8138 				 * pointer subtraction
8139 				 */
8140 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8141 					mark_reg_unknown(env, regs, insn->dst_reg);
8142 					return 0;
8143 				}
8144 				verbose(env, "R%d pointer %s pointer prohibited\n",
8145 					insn->dst_reg,
8146 					bpf_alu_string[opcode >> 4]);
8147 				return -EACCES;
8148 			} else {
8149 				/* scalar += pointer
8150 				 * This is legal, but we have to reverse our
8151 				 * src/dest handling in computing the range
8152 				 */
8153 				err = mark_chain_precision(env, insn->dst_reg);
8154 				if (err)
8155 					return err;
8156 				return adjust_ptr_min_max_vals(env, insn,
8157 							       src_reg, dst_reg);
8158 			}
8159 		} else if (ptr_reg) {
8160 			/* pointer += scalar */
8161 			err = mark_chain_precision(env, insn->src_reg);
8162 			if (err)
8163 				return err;
8164 			return adjust_ptr_min_max_vals(env, insn,
8165 						       dst_reg, src_reg);
8166 		}
8167 	} else {
8168 		/* Pretend the src is a reg with a known value, since we only
8169 		 * need to be able to read from this state.
8170 		 */
8171 		off_reg.type = SCALAR_VALUE;
8172 		__mark_reg_known(&off_reg, insn->imm);
8173 		src_reg = &off_reg;
8174 		if (ptr_reg) /* pointer += K */
8175 			return adjust_ptr_min_max_vals(env, insn,
8176 						       ptr_reg, src_reg);
8177 	}
8178 
8179 	/* Got here implies adding two SCALAR_VALUEs */
8180 	if (WARN_ON_ONCE(ptr_reg)) {
8181 		print_verifier_state(env, state);
8182 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
8183 		return -EINVAL;
8184 	}
8185 	if (WARN_ON(!src_reg)) {
8186 		print_verifier_state(env, state);
8187 		verbose(env, "verifier internal error: no src_reg\n");
8188 		return -EINVAL;
8189 	}
8190 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
8191 }
8192 
8193 /* check validity of 32-bit and 64-bit arithmetic operations */
8194 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8195 {
8196 	struct bpf_reg_state *regs = cur_regs(env);
8197 	u8 opcode = BPF_OP(insn->code);
8198 	int err;
8199 
8200 	if (opcode == BPF_END || opcode == BPF_NEG) {
8201 		if (opcode == BPF_NEG) {
8202 			if (BPF_SRC(insn->code) != 0 ||
8203 			    insn->src_reg != BPF_REG_0 ||
8204 			    insn->off != 0 || insn->imm != 0) {
8205 				verbose(env, "BPF_NEG uses reserved fields\n");
8206 				return -EINVAL;
8207 			}
8208 		} else {
8209 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
8210 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8211 			    BPF_CLASS(insn->code) == BPF_ALU64) {
8212 				verbose(env, "BPF_END uses reserved fields\n");
8213 				return -EINVAL;
8214 			}
8215 		}
8216 
8217 		/* check src operand */
8218 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8219 		if (err)
8220 			return err;
8221 
8222 		if (is_pointer_value(env, insn->dst_reg)) {
8223 			verbose(env, "R%d pointer arithmetic prohibited\n",
8224 				insn->dst_reg);
8225 			return -EACCES;
8226 		}
8227 
8228 		/* check dest operand */
8229 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
8230 		if (err)
8231 			return err;
8232 
8233 	} else if (opcode == BPF_MOV) {
8234 
8235 		if (BPF_SRC(insn->code) == BPF_X) {
8236 			if (insn->imm != 0 || insn->off != 0) {
8237 				verbose(env, "BPF_MOV uses reserved fields\n");
8238 				return -EINVAL;
8239 			}
8240 
8241 			/* check src operand */
8242 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
8243 			if (err)
8244 				return err;
8245 		} else {
8246 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8247 				verbose(env, "BPF_MOV uses reserved fields\n");
8248 				return -EINVAL;
8249 			}
8250 		}
8251 
8252 		/* check dest operand, mark as required later */
8253 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8254 		if (err)
8255 			return err;
8256 
8257 		if (BPF_SRC(insn->code) == BPF_X) {
8258 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
8259 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8260 
8261 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
8262 				/* case: R1 = R2
8263 				 * copy register state to dest reg
8264 				 */
8265 				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8266 					/* Assign src and dst registers the same ID
8267 					 * that will be used by find_equal_scalars()
8268 					 * to propagate min/max range.
8269 					 */
8270 					src_reg->id = ++env->id_gen;
8271 				*dst_reg = *src_reg;
8272 				dst_reg->live |= REG_LIVE_WRITTEN;
8273 				dst_reg->subreg_def = DEF_NOT_SUBREG;
8274 			} else {
8275 				/* R1 = (u32) R2 */
8276 				if (is_pointer_value(env, insn->src_reg)) {
8277 					verbose(env,
8278 						"R%d partial copy of pointer\n",
8279 						insn->src_reg);
8280 					return -EACCES;
8281 				} else if (src_reg->type == SCALAR_VALUE) {
8282 					*dst_reg = *src_reg;
8283 					/* Make sure ID is cleared otherwise
8284 					 * dst_reg min/max could be incorrectly
8285 					 * propagated into src_reg by find_equal_scalars()
8286 					 */
8287 					dst_reg->id = 0;
8288 					dst_reg->live |= REG_LIVE_WRITTEN;
8289 					dst_reg->subreg_def = env->insn_idx + 1;
8290 				} else {
8291 					mark_reg_unknown(env, regs,
8292 							 insn->dst_reg);
8293 				}
8294 				zext_32_to_64(dst_reg);
8295 			}
8296 		} else {
8297 			/* case: R = imm
8298 			 * remember the value we stored into this reg
8299 			 */
8300 			/* clear any state __mark_reg_known doesn't set */
8301 			mark_reg_unknown(env, regs, insn->dst_reg);
8302 			regs[insn->dst_reg].type = SCALAR_VALUE;
8303 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
8304 				__mark_reg_known(regs + insn->dst_reg,
8305 						 insn->imm);
8306 			} else {
8307 				__mark_reg_known(regs + insn->dst_reg,
8308 						 (u32)insn->imm);
8309 			}
8310 		}
8311 
8312 	} else if (opcode > BPF_END) {
8313 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
8314 		return -EINVAL;
8315 
8316 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
8317 
8318 		if (BPF_SRC(insn->code) == BPF_X) {
8319 			if (insn->imm != 0 || insn->off != 0) {
8320 				verbose(env, "BPF_ALU uses reserved fields\n");
8321 				return -EINVAL;
8322 			}
8323 			/* check src1 operand */
8324 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
8325 			if (err)
8326 				return err;
8327 		} else {
8328 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8329 				verbose(env, "BPF_ALU uses reserved fields\n");
8330 				return -EINVAL;
8331 			}
8332 		}
8333 
8334 		/* check src2 operand */
8335 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8336 		if (err)
8337 			return err;
8338 
8339 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8340 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
8341 			verbose(env, "div by zero\n");
8342 			return -EINVAL;
8343 		}
8344 
8345 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8346 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8347 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8348 
8349 			if (insn->imm < 0 || insn->imm >= size) {
8350 				verbose(env, "invalid shift %d\n", insn->imm);
8351 				return -EINVAL;
8352 			}
8353 		}
8354 
8355 		/* check dest operand */
8356 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8357 		if (err)
8358 			return err;
8359 
8360 		return adjust_reg_min_max_vals(env, insn);
8361 	}
8362 
8363 	return 0;
8364 }
8365 
8366 static void __find_good_pkt_pointers(struct bpf_func_state *state,
8367 				     struct bpf_reg_state *dst_reg,
8368 				     enum bpf_reg_type type, int new_range)
8369 {
8370 	struct bpf_reg_state *reg;
8371 	int i;
8372 
8373 	for (i = 0; i < MAX_BPF_REG; i++) {
8374 		reg = &state->regs[i];
8375 		if (reg->type == type && reg->id == dst_reg->id)
8376 			/* keep the maximum range already checked */
8377 			reg->range = max(reg->range, new_range);
8378 	}
8379 
8380 	bpf_for_each_spilled_reg(i, state, reg) {
8381 		if (!reg)
8382 			continue;
8383 		if (reg->type == type && reg->id == dst_reg->id)
8384 			reg->range = max(reg->range, new_range);
8385 	}
8386 }
8387 
8388 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
8389 				   struct bpf_reg_state *dst_reg,
8390 				   enum bpf_reg_type type,
8391 				   bool range_right_open)
8392 {
8393 	int new_range, i;
8394 
8395 	if (dst_reg->off < 0 ||
8396 	    (dst_reg->off == 0 && range_right_open))
8397 		/* This doesn't give us any range */
8398 		return;
8399 
8400 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
8401 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
8402 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
8403 		 * than pkt_end, but that's because it's also less than pkt.
8404 		 */
8405 		return;
8406 
8407 	new_range = dst_reg->off;
8408 	if (range_right_open)
8409 		new_range--;
8410 
8411 	/* Examples for register markings:
8412 	 *
8413 	 * pkt_data in dst register:
8414 	 *
8415 	 *   r2 = r3;
8416 	 *   r2 += 8;
8417 	 *   if (r2 > pkt_end) goto <handle exception>
8418 	 *   <access okay>
8419 	 *
8420 	 *   r2 = r3;
8421 	 *   r2 += 8;
8422 	 *   if (r2 < pkt_end) goto <access okay>
8423 	 *   <handle exception>
8424 	 *
8425 	 *   Where:
8426 	 *     r2 == dst_reg, pkt_end == src_reg
8427 	 *     r2=pkt(id=n,off=8,r=0)
8428 	 *     r3=pkt(id=n,off=0,r=0)
8429 	 *
8430 	 * pkt_data in src register:
8431 	 *
8432 	 *   r2 = r3;
8433 	 *   r2 += 8;
8434 	 *   if (pkt_end >= r2) goto <access okay>
8435 	 *   <handle exception>
8436 	 *
8437 	 *   r2 = r3;
8438 	 *   r2 += 8;
8439 	 *   if (pkt_end <= r2) goto <handle exception>
8440 	 *   <access okay>
8441 	 *
8442 	 *   Where:
8443 	 *     pkt_end == dst_reg, r2 == src_reg
8444 	 *     r2=pkt(id=n,off=8,r=0)
8445 	 *     r3=pkt(id=n,off=0,r=0)
8446 	 *
8447 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
8448 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
8449 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
8450 	 * the check.
8451 	 */
8452 
8453 	/* If our ids match, then we must have the same max_value.  And we
8454 	 * don't care about the other reg's fixed offset, since if it's too big
8455 	 * the range won't allow anything.
8456 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
8457 	 */
8458 	for (i = 0; i <= vstate->curframe; i++)
8459 		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
8460 					 new_range);
8461 }
8462 
8463 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
8464 {
8465 	struct tnum subreg = tnum_subreg(reg->var_off);
8466 	s32 sval = (s32)val;
8467 
8468 	switch (opcode) {
8469 	case BPF_JEQ:
8470 		if (tnum_is_const(subreg))
8471 			return !!tnum_equals_const(subreg, val);
8472 		break;
8473 	case BPF_JNE:
8474 		if (tnum_is_const(subreg))
8475 			return !tnum_equals_const(subreg, val);
8476 		break;
8477 	case BPF_JSET:
8478 		if ((~subreg.mask & subreg.value) & val)
8479 			return 1;
8480 		if (!((subreg.mask | subreg.value) & val))
8481 			return 0;
8482 		break;
8483 	case BPF_JGT:
8484 		if (reg->u32_min_value > val)
8485 			return 1;
8486 		else if (reg->u32_max_value <= val)
8487 			return 0;
8488 		break;
8489 	case BPF_JSGT:
8490 		if (reg->s32_min_value > sval)
8491 			return 1;
8492 		else if (reg->s32_max_value <= sval)
8493 			return 0;
8494 		break;
8495 	case BPF_JLT:
8496 		if (reg->u32_max_value < val)
8497 			return 1;
8498 		else if (reg->u32_min_value >= val)
8499 			return 0;
8500 		break;
8501 	case BPF_JSLT:
8502 		if (reg->s32_max_value < sval)
8503 			return 1;
8504 		else if (reg->s32_min_value >= sval)
8505 			return 0;
8506 		break;
8507 	case BPF_JGE:
8508 		if (reg->u32_min_value >= val)
8509 			return 1;
8510 		else if (reg->u32_max_value < val)
8511 			return 0;
8512 		break;
8513 	case BPF_JSGE:
8514 		if (reg->s32_min_value >= sval)
8515 			return 1;
8516 		else if (reg->s32_max_value < sval)
8517 			return 0;
8518 		break;
8519 	case BPF_JLE:
8520 		if (reg->u32_max_value <= val)
8521 			return 1;
8522 		else if (reg->u32_min_value > val)
8523 			return 0;
8524 		break;
8525 	case BPF_JSLE:
8526 		if (reg->s32_max_value <= sval)
8527 			return 1;
8528 		else if (reg->s32_min_value > sval)
8529 			return 0;
8530 		break;
8531 	}
8532 
8533 	return -1;
8534 }
8535 
8536 
8537 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
8538 {
8539 	s64 sval = (s64)val;
8540 
8541 	switch (opcode) {
8542 	case BPF_JEQ:
8543 		if (tnum_is_const(reg->var_off))
8544 			return !!tnum_equals_const(reg->var_off, val);
8545 		break;
8546 	case BPF_JNE:
8547 		if (tnum_is_const(reg->var_off))
8548 			return !tnum_equals_const(reg->var_off, val);
8549 		break;
8550 	case BPF_JSET:
8551 		if ((~reg->var_off.mask & reg->var_off.value) & val)
8552 			return 1;
8553 		if (!((reg->var_off.mask | reg->var_off.value) & val))
8554 			return 0;
8555 		break;
8556 	case BPF_JGT:
8557 		if (reg->umin_value > val)
8558 			return 1;
8559 		else if (reg->umax_value <= val)
8560 			return 0;
8561 		break;
8562 	case BPF_JSGT:
8563 		if (reg->smin_value > sval)
8564 			return 1;
8565 		else if (reg->smax_value <= sval)
8566 			return 0;
8567 		break;
8568 	case BPF_JLT:
8569 		if (reg->umax_value < val)
8570 			return 1;
8571 		else if (reg->umin_value >= val)
8572 			return 0;
8573 		break;
8574 	case BPF_JSLT:
8575 		if (reg->smax_value < sval)
8576 			return 1;
8577 		else if (reg->smin_value >= sval)
8578 			return 0;
8579 		break;
8580 	case BPF_JGE:
8581 		if (reg->umin_value >= val)
8582 			return 1;
8583 		else if (reg->umax_value < val)
8584 			return 0;
8585 		break;
8586 	case BPF_JSGE:
8587 		if (reg->smin_value >= sval)
8588 			return 1;
8589 		else if (reg->smax_value < sval)
8590 			return 0;
8591 		break;
8592 	case BPF_JLE:
8593 		if (reg->umax_value <= val)
8594 			return 1;
8595 		else if (reg->umin_value > val)
8596 			return 0;
8597 		break;
8598 	case BPF_JSLE:
8599 		if (reg->smax_value <= sval)
8600 			return 1;
8601 		else if (reg->smin_value > sval)
8602 			return 0;
8603 		break;
8604 	}
8605 
8606 	return -1;
8607 }
8608 
8609 /* compute branch direction of the expression "if (reg opcode val) goto target;"
8610  * and return:
8611  *  1 - branch will be taken and "goto target" will be executed
8612  *  0 - branch will not be taken and fall-through to next insn
8613  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
8614  *      range [0,10]
8615  */
8616 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
8617 			   bool is_jmp32)
8618 {
8619 	if (__is_pointer_value(false, reg)) {
8620 		if (!reg_type_not_null(reg->type))
8621 			return -1;
8622 
8623 		/* If pointer is valid tests against zero will fail so we can
8624 		 * use this to direct branch taken.
8625 		 */
8626 		if (val != 0)
8627 			return -1;
8628 
8629 		switch (opcode) {
8630 		case BPF_JEQ:
8631 			return 0;
8632 		case BPF_JNE:
8633 			return 1;
8634 		default:
8635 			return -1;
8636 		}
8637 	}
8638 
8639 	if (is_jmp32)
8640 		return is_branch32_taken(reg, val, opcode);
8641 	return is_branch64_taken(reg, val, opcode);
8642 }
8643 
8644 static int flip_opcode(u32 opcode)
8645 {
8646 	/* How can we transform "a <op> b" into "b <op> a"? */
8647 	static const u8 opcode_flip[16] = {
8648 		/* these stay the same */
8649 		[BPF_JEQ  >> 4] = BPF_JEQ,
8650 		[BPF_JNE  >> 4] = BPF_JNE,
8651 		[BPF_JSET >> 4] = BPF_JSET,
8652 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
8653 		[BPF_JGE  >> 4] = BPF_JLE,
8654 		[BPF_JGT  >> 4] = BPF_JLT,
8655 		[BPF_JLE  >> 4] = BPF_JGE,
8656 		[BPF_JLT  >> 4] = BPF_JGT,
8657 		[BPF_JSGE >> 4] = BPF_JSLE,
8658 		[BPF_JSGT >> 4] = BPF_JSLT,
8659 		[BPF_JSLE >> 4] = BPF_JSGE,
8660 		[BPF_JSLT >> 4] = BPF_JSGT
8661 	};
8662 	return opcode_flip[opcode >> 4];
8663 }
8664 
8665 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
8666 				   struct bpf_reg_state *src_reg,
8667 				   u8 opcode)
8668 {
8669 	struct bpf_reg_state *pkt;
8670 
8671 	if (src_reg->type == PTR_TO_PACKET_END) {
8672 		pkt = dst_reg;
8673 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
8674 		pkt = src_reg;
8675 		opcode = flip_opcode(opcode);
8676 	} else {
8677 		return -1;
8678 	}
8679 
8680 	if (pkt->range >= 0)
8681 		return -1;
8682 
8683 	switch (opcode) {
8684 	case BPF_JLE:
8685 		/* pkt <= pkt_end */
8686 		fallthrough;
8687 	case BPF_JGT:
8688 		/* pkt > pkt_end */
8689 		if (pkt->range == BEYOND_PKT_END)
8690 			/* pkt has at last one extra byte beyond pkt_end */
8691 			return opcode == BPF_JGT;
8692 		break;
8693 	case BPF_JLT:
8694 		/* pkt < pkt_end */
8695 		fallthrough;
8696 	case BPF_JGE:
8697 		/* pkt >= pkt_end */
8698 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
8699 			return opcode == BPF_JGE;
8700 		break;
8701 	}
8702 	return -1;
8703 }
8704 
8705 /* Adjusts the register min/max values in the case that the dst_reg is the
8706  * variable register that we are working on, and src_reg is a constant or we're
8707  * simply doing a BPF_K check.
8708  * In JEQ/JNE cases we also adjust the var_off values.
8709  */
8710 static void reg_set_min_max(struct bpf_reg_state *true_reg,
8711 			    struct bpf_reg_state *false_reg,
8712 			    u64 val, u32 val32,
8713 			    u8 opcode, bool is_jmp32)
8714 {
8715 	struct tnum false_32off = tnum_subreg(false_reg->var_off);
8716 	struct tnum false_64off = false_reg->var_off;
8717 	struct tnum true_32off = tnum_subreg(true_reg->var_off);
8718 	struct tnum true_64off = true_reg->var_off;
8719 	s64 sval = (s64)val;
8720 	s32 sval32 = (s32)val32;
8721 
8722 	/* If the dst_reg is a pointer, we can't learn anything about its
8723 	 * variable offset from the compare (unless src_reg were a pointer into
8724 	 * the same object, but we don't bother with that.
8725 	 * Since false_reg and true_reg have the same type by construction, we
8726 	 * only need to check one of them for pointerness.
8727 	 */
8728 	if (__is_pointer_value(false, false_reg))
8729 		return;
8730 
8731 	switch (opcode) {
8732 	case BPF_JEQ:
8733 	case BPF_JNE:
8734 	{
8735 		struct bpf_reg_state *reg =
8736 			opcode == BPF_JEQ ? true_reg : false_reg;
8737 
8738 		/* JEQ/JNE comparison doesn't change the register equivalence.
8739 		 * r1 = r2;
8740 		 * if (r1 == 42) goto label;
8741 		 * ...
8742 		 * label: // here both r1 and r2 are known to be 42.
8743 		 *
8744 		 * Hence when marking register as known preserve it's ID.
8745 		 */
8746 		if (is_jmp32)
8747 			__mark_reg32_known(reg, val32);
8748 		else
8749 			___mark_reg_known(reg, val);
8750 		break;
8751 	}
8752 	case BPF_JSET:
8753 		if (is_jmp32) {
8754 			false_32off = tnum_and(false_32off, tnum_const(~val32));
8755 			if (is_power_of_2(val32))
8756 				true_32off = tnum_or(true_32off,
8757 						     tnum_const(val32));
8758 		} else {
8759 			false_64off = tnum_and(false_64off, tnum_const(~val));
8760 			if (is_power_of_2(val))
8761 				true_64off = tnum_or(true_64off,
8762 						     tnum_const(val));
8763 		}
8764 		break;
8765 	case BPF_JGE:
8766 	case BPF_JGT:
8767 	{
8768 		if (is_jmp32) {
8769 			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
8770 			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
8771 
8772 			false_reg->u32_max_value = min(false_reg->u32_max_value,
8773 						       false_umax);
8774 			true_reg->u32_min_value = max(true_reg->u32_min_value,
8775 						      true_umin);
8776 		} else {
8777 			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
8778 			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
8779 
8780 			false_reg->umax_value = min(false_reg->umax_value, false_umax);
8781 			true_reg->umin_value = max(true_reg->umin_value, true_umin);
8782 		}
8783 		break;
8784 	}
8785 	case BPF_JSGE:
8786 	case BPF_JSGT:
8787 	{
8788 		if (is_jmp32) {
8789 			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
8790 			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
8791 
8792 			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
8793 			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
8794 		} else {
8795 			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
8796 			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
8797 
8798 			false_reg->smax_value = min(false_reg->smax_value, false_smax);
8799 			true_reg->smin_value = max(true_reg->smin_value, true_smin);
8800 		}
8801 		break;
8802 	}
8803 	case BPF_JLE:
8804 	case BPF_JLT:
8805 	{
8806 		if (is_jmp32) {
8807 			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
8808 			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
8809 
8810 			false_reg->u32_min_value = max(false_reg->u32_min_value,
8811 						       false_umin);
8812 			true_reg->u32_max_value = min(true_reg->u32_max_value,
8813 						      true_umax);
8814 		} else {
8815 			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
8816 			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
8817 
8818 			false_reg->umin_value = max(false_reg->umin_value, false_umin);
8819 			true_reg->umax_value = min(true_reg->umax_value, true_umax);
8820 		}
8821 		break;
8822 	}
8823 	case BPF_JSLE:
8824 	case BPF_JSLT:
8825 	{
8826 		if (is_jmp32) {
8827 			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
8828 			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
8829 
8830 			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
8831 			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
8832 		} else {
8833 			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
8834 			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
8835 
8836 			false_reg->smin_value = max(false_reg->smin_value, false_smin);
8837 			true_reg->smax_value = min(true_reg->smax_value, true_smax);
8838 		}
8839 		break;
8840 	}
8841 	default:
8842 		return;
8843 	}
8844 
8845 	if (is_jmp32) {
8846 		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
8847 					     tnum_subreg(false_32off));
8848 		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
8849 					    tnum_subreg(true_32off));
8850 		__reg_combine_32_into_64(false_reg);
8851 		__reg_combine_32_into_64(true_reg);
8852 	} else {
8853 		false_reg->var_off = false_64off;
8854 		true_reg->var_off = true_64off;
8855 		__reg_combine_64_into_32(false_reg);
8856 		__reg_combine_64_into_32(true_reg);
8857 	}
8858 }
8859 
8860 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
8861  * the variable reg.
8862  */
8863 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
8864 				struct bpf_reg_state *false_reg,
8865 				u64 val, u32 val32,
8866 				u8 opcode, bool is_jmp32)
8867 {
8868 	opcode = flip_opcode(opcode);
8869 	/* This uses zero as "not present in table"; luckily the zero opcode,
8870 	 * BPF_JA, can't get here.
8871 	 */
8872 	if (opcode)
8873 		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
8874 }
8875 
8876 /* Regs are known to be equal, so intersect their min/max/var_off */
8877 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
8878 				  struct bpf_reg_state *dst_reg)
8879 {
8880 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
8881 							dst_reg->umin_value);
8882 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
8883 							dst_reg->umax_value);
8884 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
8885 							dst_reg->smin_value);
8886 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
8887 							dst_reg->smax_value);
8888 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
8889 							     dst_reg->var_off);
8890 	/* We might have learned new bounds from the var_off. */
8891 	__update_reg_bounds(src_reg);
8892 	__update_reg_bounds(dst_reg);
8893 	/* We might have learned something about the sign bit. */
8894 	__reg_deduce_bounds(src_reg);
8895 	__reg_deduce_bounds(dst_reg);
8896 	/* We might have learned some bits from the bounds. */
8897 	__reg_bound_offset(src_reg);
8898 	__reg_bound_offset(dst_reg);
8899 	/* Intersecting with the old var_off might have improved our bounds
8900 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
8901 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
8902 	 */
8903 	__update_reg_bounds(src_reg);
8904 	__update_reg_bounds(dst_reg);
8905 }
8906 
8907 static void reg_combine_min_max(struct bpf_reg_state *true_src,
8908 				struct bpf_reg_state *true_dst,
8909 				struct bpf_reg_state *false_src,
8910 				struct bpf_reg_state *false_dst,
8911 				u8 opcode)
8912 {
8913 	switch (opcode) {
8914 	case BPF_JEQ:
8915 		__reg_combine_min_max(true_src, true_dst);
8916 		break;
8917 	case BPF_JNE:
8918 		__reg_combine_min_max(false_src, false_dst);
8919 		break;
8920 	}
8921 }
8922 
8923 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
8924 				 struct bpf_reg_state *reg, u32 id,
8925 				 bool is_null)
8926 {
8927 	if (reg_type_may_be_null(reg->type) && reg->id == id &&
8928 	    !WARN_ON_ONCE(!reg->id)) {
8929 		/* Old offset (both fixed and variable parts) should
8930 		 * have been known-zero, because we don't allow pointer
8931 		 * arithmetic on pointers that might be NULL.
8932 		 */
8933 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
8934 				 !tnum_equals_const(reg->var_off, 0) ||
8935 				 reg->off)) {
8936 			__mark_reg_known_zero(reg);
8937 			reg->off = 0;
8938 		}
8939 		if (is_null) {
8940 			reg->type = SCALAR_VALUE;
8941 			/* We don't need id and ref_obj_id from this point
8942 			 * onwards anymore, thus we should better reset it,
8943 			 * so that state pruning has chances to take effect.
8944 			 */
8945 			reg->id = 0;
8946 			reg->ref_obj_id = 0;
8947 
8948 			return;
8949 		}
8950 
8951 		mark_ptr_not_null_reg(reg);
8952 
8953 		if (!reg_may_point_to_spin_lock(reg)) {
8954 			/* For not-NULL ptr, reg->ref_obj_id will be reset
8955 			 * in release_reg_references().
8956 			 *
8957 			 * reg->id is still used by spin_lock ptr. Other
8958 			 * than spin_lock ptr type, reg->id can be reset.
8959 			 */
8960 			reg->id = 0;
8961 		}
8962 	}
8963 }
8964 
8965 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
8966 				    bool is_null)
8967 {
8968 	struct bpf_reg_state *reg;
8969 	int i;
8970 
8971 	for (i = 0; i < MAX_BPF_REG; i++)
8972 		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
8973 
8974 	bpf_for_each_spilled_reg(i, state, reg) {
8975 		if (!reg)
8976 			continue;
8977 		mark_ptr_or_null_reg(state, reg, id, is_null);
8978 	}
8979 }
8980 
8981 /* The logic is similar to find_good_pkt_pointers(), both could eventually
8982  * be folded together at some point.
8983  */
8984 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
8985 				  bool is_null)
8986 {
8987 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8988 	struct bpf_reg_state *regs = state->regs;
8989 	u32 ref_obj_id = regs[regno].ref_obj_id;
8990 	u32 id = regs[regno].id;
8991 	int i;
8992 
8993 	if (ref_obj_id && ref_obj_id == id && is_null)
8994 		/* regs[regno] is in the " == NULL" branch.
8995 		 * No one could have freed the reference state before
8996 		 * doing the NULL check.
8997 		 */
8998 		WARN_ON_ONCE(release_reference_state(state, id));
8999 
9000 	for (i = 0; i <= vstate->curframe; i++)
9001 		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9002 }
9003 
9004 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9005 				   struct bpf_reg_state *dst_reg,
9006 				   struct bpf_reg_state *src_reg,
9007 				   struct bpf_verifier_state *this_branch,
9008 				   struct bpf_verifier_state *other_branch)
9009 {
9010 	if (BPF_SRC(insn->code) != BPF_X)
9011 		return false;
9012 
9013 	/* Pointers are always 64-bit. */
9014 	if (BPF_CLASS(insn->code) == BPF_JMP32)
9015 		return false;
9016 
9017 	switch (BPF_OP(insn->code)) {
9018 	case BPF_JGT:
9019 		if ((dst_reg->type == PTR_TO_PACKET &&
9020 		     src_reg->type == PTR_TO_PACKET_END) ||
9021 		    (dst_reg->type == PTR_TO_PACKET_META &&
9022 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9023 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9024 			find_good_pkt_pointers(this_branch, dst_reg,
9025 					       dst_reg->type, false);
9026 			mark_pkt_end(other_branch, insn->dst_reg, true);
9027 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9028 			    src_reg->type == PTR_TO_PACKET) ||
9029 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9030 			    src_reg->type == PTR_TO_PACKET_META)) {
9031 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
9032 			find_good_pkt_pointers(other_branch, src_reg,
9033 					       src_reg->type, true);
9034 			mark_pkt_end(this_branch, insn->src_reg, false);
9035 		} else {
9036 			return false;
9037 		}
9038 		break;
9039 	case BPF_JLT:
9040 		if ((dst_reg->type == PTR_TO_PACKET &&
9041 		     src_reg->type == PTR_TO_PACKET_END) ||
9042 		    (dst_reg->type == PTR_TO_PACKET_META &&
9043 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9044 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9045 			find_good_pkt_pointers(other_branch, dst_reg,
9046 					       dst_reg->type, true);
9047 			mark_pkt_end(this_branch, insn->dst_reg, false);
9048 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9049 			    src_reg->type == PTR_TO_PACKET) ||
9050 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9051 			    src_reg->type == PTR_TO_PACKET_META)) {
9052 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
9053 			find_good_pkt_pointers(this_branch, src_reg,
9054 					       src_reg->type, false);
9055 			mark_pkt_end(other_branch, insn->src_reg, true);
9056 		} else {
9057 			return false;
9058 		}
9059 		break;
9060 	case BPF_JGE:
9061 		if ((dst_reg->type == PTR_TO_PACKET &&
9062 		     src_reg->type == PTR_TO_PACKET_END) ||
9063 		    (dst_reg->type == PTR_TO_PACKET_META &&
9064 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9065 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9066 			find_good_pkt_pointers(this_branch, dst_reg,
9067 					       dst_reg->type, true);
9068 			mark_pkt_end(other_branch, insn->dst_reg, false);
9069 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9070 			    src_reg->type == PTR_TO_PACKET) ||
9071 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9072 			    src_reg->type == PTR_TO_PACKET_META)) {
9073 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9074 			find_good_pkt_pointers(other_branch, src_reg,
9075 					       src_reg->type, false);
9076 			mark_pkt_end(this_branch, insn->src_reg, true);
9077 		} else {
9078 			return false;
9079 		}
9080 		break;
9081 	case BPF_JLE:
9082 		if ((dst_reg->type == PTR_TO_PACKET &&
9083 		     src_reg->type == PTR_TO_PACKET_END) ||
9084 		    (dst_reg->type == PTR_TO_PACKET_META &&
9085 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9086 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9087 			find_good_pkt_pointers(other_branch, dst_reg,
9088 					       dst_reg->type, false);
9089 			mark_pkt_end(this_branch, insn->dst_reg, true);
9090 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
9091 			    src_reg->type == PTR_TO_PACKET) ||
9092 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9093 			    src_reg->type == PTR_TO_PACKET_META)) {
9094 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9095 			find_good_pkt_pointers(this_branch, src_reg,
9096 					       src_reg->type, true);
9097 			mark_pkt_end(other_branch, insn->src_reg, false);
9098 		} else {
9099 			return false;
9100 		}
9101 		break;
9102 	default:
9103 		return false;
9104 	}
9105 
9106 	return true;
9107 }
9108 
9109 static void find_equal_scalars(struct bpf_verifier_state *vstate,
9110 			       struct bpf_reg_state *known_reg)
9111 {
9112 	struct bpf_func_state *state;
9113 	struct bpf_reg_state *reg;
9114 	int i, j;
9115 
9116 	for (i = 0; i <= vstate->curframe; i++) {
9117 		state = vstate->frame[i];
9118 		for (j = 0; j < MAX_BPF_REG; j++) {
9119 			reg = &state->regs[j];
9120 			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9121 				*reg = *known_reg;
9122 		}
9123 
9124 		bpf_for_each_spilled_reg(j, state, reg) {
9125 			if (!reg)
9126 				continue;
9127 			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9128 				*reg = *known_reg;
9129 		}
9130 	}
9131 }
9132 
9133 static int check_cond_jmp_op(struct bpf_verifier_env *env,
9134 			     struct bpf_insn *insn, int *insn_idx)
9135 {
9136 	struct bpf_verifier_state *this_branch = env->cur_state;
9137 	struct bpf_verifier_state *other_branch;
9138 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
9139 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
9140 	u8 opcode = BPF_OP(insn->code);
9141 	bool is_jmp32;
9142 	int pred = -1;
9143 	int err;
9144 
9145 	/* Only conditional jumps are expected to reach here. */
9146 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
9147 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
9148 		return -EINVAL;
9149 	}
9150 
9151 	if (BPF_SRC(insn->code) == BPF_X) {
9152 		if (insn->imm != 0) {
9153 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9154 			return -EINVAL;
9155 		}
9156 
9157 		/* check src1 operand */
9158 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
9159 		if (err)
9160 			return err;
9161 
9162 		if (is_pointer_value(env, insn->src_reg)) {
9163 			verbose(env, "R%d pointer comparison prohibited\n",
9164 				insn->src_reg);
9165 			return -EACCES;
9166 		}
9167 		src_reg = &regs[insn->src_reg];
9168 	} else {
9169 		if (insn->src_reg != BPF_REG_0) {
9170 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9171 			return -EINVAL;
9172 		}
9173 	}
9174 
9175 	/* check src2 operand */
9176 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9177 	if (err)
9178 		return err;
9179 
9180 	dst_reg = &regs[insn->dst_reg];
9181 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
9182 
9183 	if (BPF_SRC(insn->code) == BPF_K) {
9184 		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9185 	} else if (src_reg->type == SCALAR_VALUE &&
9186 		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9187 		pred = is_branch_taken(dst_reg,
9188 				       tnum_subreg(src_reg->var_off).value,
9189 				       opcode,
9190 				       is_jmp32);
9191 	} else if (src_reg->type == SCALAR_VALUE &&
9192 		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9193 		pred = is_branch_taken(dst_reg,
9194 				       src_reg->var_off.value,
9195 				       opcode,
9196 				       is_jmp32);
9197 	} else if (reg_is_pkt_pointer_any(dst_reg) &&
9198 		   reg_is_pkt_pointer_any(src_reg) &&
9199 		   !is_jmp32) {
9200 		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
9201 	}
9202 
9203 	if (pred >= 0) {
9204 		/* If we get here with a dst_reg pointer type it is because
9205 		 * above is_branch_taken() special cased the 0 comparison.
9206 		 */
9207 		if (!__is_pointer_value(false, dst_reg))
9208 			err = mark_chain_precision(env, insn->dst_reg);
9209 		if (BPF_SRC(insn->code) == BPF_X && !err &&
9210 		    !__is_pointer_value(false, src_reg))
9211 			err = mark_chain_precision(env, insn->src_reg);
9212 		if (err)
9213 			return err;
9214 	}
9215 
9216 	if (pred == 1) {
9217 		/* Only follow the goto, ignore fall-through. If needed, push
9218 		 * the fall-through branch for simulation under speculative
9219 		 * execution.
9220 		 */
9221 		if (!env->bypass_spec_v1 &&
9222 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
9223 					       *insn_idx))
9224 			return -EFAULT;
9225 		*insn_idx += insn->off;
9226 		return 0;
9227 	} else if (pred == 0) {
9228 		/* Only follow the fall-through branch, since that's where the
9229 		 * program will go. If needed, push the goto branch for
9230 		 * simulation under speculative execution.
9231 		 */
9232 		if (!env->bypass_spec_v1 &&
9233 		    !sanitize_speculative_path(env, insn,
9234 					       *insn_idx + insn->off + 1,
9235 					       *insn_idx))
9236 			return -EFAULT;
9237 		return 0;
9238 	}
9239 
9240 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9241 				  false);
9242 	if (!other_branch)
9243 		return -EFAULT;
9244 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
9245 
9246 	/* detect if we are comparing against a constant value so we can adjust
9247 	 * our min/max values for our dst register.
9248 	 * this is only legit if both are scalars (or pointers to the same
9249 	 * object, I suppose, but we don't support that right now), because
9250 	 * otherwise the different base pointers mean the offsets aren't
9251 	 * comparable.
9252 	 */
9253 	if (BPF_SRC(insn->code) == BPF_X) {
9254 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
9255 
9256 		if (dst_reg->type == SCALAR_VALUE &&
9257 		    src_reg->type == SCALAR_VALUE) {
9258 			if (tnum_is_const(src_reg->var_off) ||
9259 			    (is_jmp32 &&
9260 			     tnum_is_const(tnum_subreg(src_reg->var_off))))
9261 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
9262 						dst_reg,
9263 						src_reg->var_off.value,
9264 						tnum_subreg(src_reg->var_off).value,
9265 						opcode, is_jmp32);
9266 			else if (tnum_is_const(dst_reg->var_off) ||
9267 				 (is_jmp32 &&
9268 				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
9269 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
9270 						    src_reg,
9271 						    dst_reg->var_off.value,
9272 						    tnum_subreg(dst_reg->var_off).value,
9273 						    opcode, is_jmp32);
9274 			else if (!is_jmp32 &&
9275 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
9276 				/* Comparing for equality, we can combine knowledge */
9277 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
9278 						    &other_branch_regs[insn->dst_reg],
9279 						    src_reg, dst_reg, opcode);
9280 			if (src_reg->id &&
9281 			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
9282 				find_equal_scalars(this_branch, src_reg);
9283 				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9284 			}
9285 
9286 		}
9287 	} else if (dst_reg->type == SCALAR_VALUE) {
9288 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
9289 					dst_reg, insn->imm, (u32)insn->imm,
9290 					opcode, is_jmp32);
9291 	}
9292 
9293 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9294 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
9295 		find_equal_scalars(this_branch, dst_reg);
9296 		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9297 	}
9298 
9299 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9300 	 * NOTE: these optimizations below are related with pointer comparison
9301 	 *       which will never be JMP32.
9302 	 */
9303 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
9304 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
9305 	    reg_type_may_be_null(dst_reg->type)) {
9306 		/* Mark all identical registers in each branch as either
9307 		 * safe or unknown depending R == 0 or R != 0 conditional.
9308 		 */
9309 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9310 				      opcode == BPF_JNE);
9311 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9312 				      opcode == BPF_JEQ);
9313 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9314 					   this_branch, other_branch) &&
9315 		   is_pointer_value(env, insn->dst_reg)) {
9316 		verbose(env, "R%d pointer comparison prohibited\n",
9317 			insn->dst_reg);
9318 		return -EACCES;
9319 	}
9320 	if (env->log.level & BPF_LOG_LEVEL)
9321 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
9322 	return 0;
9323 }
9324 
9325 /* verify BPF_LD_IMM64 instruction */
9326 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
9327 {
9328 	struct bpf_insn_aux_data *aux = cur_aux(env);
9329 	struct bpf_reg_state *regs = cur_regs(env);
9330 	struct bpf_reg_state *dst_reg;
9331 	struct bpf_map *map;
9332 	int err;
9333 
9334 	if (BPF_SIZE(insn->code) != BPF_DW) {
9335 		verbose(env, "invalid BPF_LD_IMM insn\n");
9336 		return -EINVAL;
9337 	}
9338 	if (insn->off != 0) {
9339 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
9340 		return -EINVAL;
9341 	}
9342 
9343 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
9344 	if (err)
9345 		return err;
9346 
9347 	dst_reg = &regs[insn->dst_reg];
9348 	if (insn->src_reg == 0) {
9349 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9350 
9351 		dst_reg->type = SCALAR_VALUE;
9352 		__mark_reg_known(&regs[insn->dst_reg], imm);
9353 		return 0;
9354 	}
9355 
9356 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
9357 		mark_reg_known_zero(env, regs, insn->dst_reg);
9358 
9359 		dst_reg->type = aux->btf_var.reg_type;
9360 		switch (dst_reg->type) {
9361 		case PTR_TO_MEM:
9362 			dst_reg->mem_size = aux->btf_var.mem_size;
9363 			break;
9364 		case PTR_TO_BTF_ID:
9365 		case PTR_TO_PERCPU_BTF_ID:
9366 			dst_reg->btf = aux->btf_var.btf;
9367 			dst_reg->btf_id = aux->btf_var.btf_id;
9368 			break;
9369 		default:
9370 			verbose(env, "bpf verifier is misconfigured\n");
9371 			return -EFAULT;
9372 		}
9373 		return 0;
9374 	}
9375 
9376 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
9377 		struct bpf_prog_aux *aux = env->prog->aux;
9378 		u32 subprogno = find_subprog(env,
9379 					     env->insn_idx + insn->imm + 1);
9380 
9381 		if (!aux->func_info) {
9382 			verbose(env, "missing btf func_info\n");
9383 			return -EINVAL;
9384 		}
9385 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9386 			verbose(env, "callback function not static\n");
9387 			return -EINVAL;
9388 		}
9389 
9390 		dst_reg->type = PTR_TO_FUNC;
9391 		dst_reg->subprogno = subprogno;
9392 		return 0;
9393 	}
9394 
9395 	map = env->used_maps[aux->map_index];
9396 	mark_reg_known_zero(env, regs, insn->dst_reg);
9397 	dst_reg->map_ptr = map;
9398 
9399 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9400 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
9401 		dst_reg->type = PTR_TO_MAP_VALUE;
9402 		dst_reg->off = aux->map_off;
9403 		if (map_value_has_spin_lock(map))
9404 			dst_reg->id = ++env->id_gen;
9405 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
9406 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
9407 		dst_reg->type = CONST_PTR_TO_MAP;
9408 	} else {
9409 		verbose(env, "bpf verifier is misconfigured\n");
9410 		return -EINVAL;
9411 	}
9412 
9413 	return 0;
9414 }
9415 
9416 static bool may_access_skb(enum bpf_prog_type type)
9417 {
9418 	switch (type) {
9419 	case BPF_PROG_TYPE_SOCKET_FILTER:
9420 	case BPF_PROG_TYPE_SCHED_CLS:
9421 	case BPF_PROG_TYPE_SCHED_ACT:
9422 		return true;
9423 	default:
9424 		return false;
9425 	}
9426 }
9427 
9428 /* verify safety of LD_ABS|LD_IND instructions:
9429  * - they can only appear in the programs where ctx == skb
9430  * - since they are wrappers of function calls, they scratch R1-R5 registers,
9431  *   preserve R6-R9, and store return value into R0
9432  *
9433  * Implicit input:
9434  *   ctx == skb == R6 == CTX
9435  *
9436  * Explicit input:
9437  *   SRC == any register
9438  *   IMM == 32-bit immediate
9439  *
9440  * Output:
9441  *   R0 - 8/16/32-bit skb data converted to cpu endianness
9442  */
9443 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
9444 {
9445 	struct bpf_reg_state *regs = cur_regs(env);
9446 	static const int ctx_reg = BPF_REG_6;
9447 	u8 mode = BPF_MODE(insn->code);
9448 	int i, err;
9449 
9450 	if (!may_access_skb(resolve_prog_type(env->prog))) {
9451 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
9452 		return -EINVAL;
9453 	}
9454 
9455 	if (!env->ops->gen_ld_abs) {
9456 		verbose(env, "bpf verifier is misconfigured\n");
9457 		return -EINVAL;
9458 	}
9459 
9460 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
9461 	    BPF_SIZE(insn->code) == BPF_DW ||
9462 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
9463 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
9464 		return -EINVAL;
9465 	}
9466 
9467 	/* check whether implicit source operand (register R6) is readable */
9468 	err = check_reg_arg(env, ctx_reg, SRC_OP);
9469 	if (err)
9470 		return err;
9471 
9472 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
9473 	 * gen_ld_abs() may terminate the program at runtime, leading to
9474 	 * reference leak.
9475 	 */
9476 	err = check_reference_leak(env);
9477 	if (err) {
9478 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
9479 		return err;
9480 	}
9481 
9482 	if (env->cur_state->active_spin_lock) {
9483 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
9484 		return -EINVAL;
9485 	}
9486 
9487 	if (regs[ctx_reg].type != PTR_TO_CTX) {
9488 		verbose(env,
9489 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
9490 		return -EINVAL;
9491 	}
9492 
9493 	if (mode == BPF_IND) {
9494 		/* check explicit source operand */
9495 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
9496 		if (err)
9497 			return err;
9498 	}
9499 
9500 	err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
9501 	if (err < 0)
9502 		return err;
9503 
9504 	/* reset caller saved regs to unreadable */
9505 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9506 		mark_reg_not_init(env, regs, caller_saved[i]);
9507 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9508 	}
9509 
9510 	/* mark destination R0 register as readable, since it contains
9511 	 * the value fetched from the packet.
9512 	 * Already marked as written above.
9513 	 */
9514 	mark_reg_unknown(env, regs, BPF_REG_0);
9515 	/* ld_abs load up to 32-bit skb data. */
9516 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
9517 	return 0;
9518 }
9519 
9520 static int check_return_code(struct bpf_verifier_env *env)
9521 {
9522 	struct tnum enforce_attach_type_range = tnum_unknown;
9523 	const struct bpf_prog *prog = env->prog;
9524 	struct bpf_reg_state *reg;
9525 	struct tnum range = tnum_range(0, 1);
9526 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9527 	int err;
9528 	struct bpf_func_state *frame = env->cur_state->frame[0];
9529 	const bool is_subprog = frame->subprogno;
9530 
9531 	/* LSM and struct_ops func-ptr's return type could be "void" */
9532 	if (!is_subprog &&
9533 	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
9534 	     prog_type == BPF_PROG_TYPE_LSM) &&
9535 	    !prog->aux->attach_func_proto->type)
9536 		return 0;
9537 
9538 	/* eBPF calling convention is such that R0 is used
9539 	 * to return the value from eBPF program.
9540 	 * Make sure that it's readable at this time
9541 	 * of bpf_exit, which means that program wrote
9542 	 * something into it earlier
9543 	 */
9544 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
9545 	if (err)
9546 		return err;
9547 
9548 	if (is_pointer_value(env, BPF_REG_0)) {
9549 		verbose(env, "R0 leaks addr as return value\n");
9550 		return -EACCES;
9551 	}
9552 
9553 	reg = cur_regs(env) + BPF_REG_0;
9554 
9555 	if (frame->in_async_callback_fn) {
9556 		/* enforce return zero from async callbacks like timer */
9557 		if (reg->type != SCALAR_VALUE) {
9558 			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
9559 				reg_type_str[reg->type]);
9560 			return -EINVAL;
9561 		}
9562 
9563 		if (!tnum_in(tnum_const(0), reg->var_off)) {
9564 			verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
9565 			return -EINVAL;
9566 		}
9567 		return 0;
9568 	}
9569 
9570 	if (is_subprog) {
9571 		if (reg->type != SCALAR_VALUE) {
9572 			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
9573 				reg_type_str[reg->type]);
9574 			return -EINVAL;
9575 		}
9576 		return 0;
9577 	}
9578 
9579 	switch (prog_type) {
9580 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
9581 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
9582 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
9583 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
9584 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
9585 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
9586 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
9587 			range = tnum_range(1, 1);
9588 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
9589 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
9590 			range = tnum_range(0, 3);
9591 		break;
9592 	case BPF_PROG_TYPE_CGROUP_SKB:
9593 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
9594 			range = tnum_range(0, 3);
9595 			enforce_attach_type_range = tnum_range(2, 3);
9596 		}
9597 		break;
9598 	case BPF_PROG_TYPE_CGROUP_SOCK:
9599 	case BPF_PROG_TYPE_SOCK_OPS:
9600 	case BPF_PROG_TYPE_CGROUP_DEVICE:
9601 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
9602 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
9603 		break;
9604 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
9605 		if (!env->prog->aux->attach_btf_id)
9606 			return 0;
9607 		range = tnum_const(0);
9608 		break;
9609 	case BPF_PROG_TYPE_TRACING:
9610 		switch (env->prog->expected_attach_type) {
9611 		case BPF_TRACE_FENTRY:
9612 		case BPF_TRACE_FEXIT:
9613 			range = tnum_const(0);
9614 			break;
9615 		case BPF_TRACE_RAW_TP:
9616 		case BPF_MODIFY_RETURN:
9617 			return 0;
9618 		case BPF_TRACE_ITER:
9619 			break;
9620 		default:
9621 			return -ENOTSUPP;
9622 		}
9623 		break;
9624 	case BPF_PROG_TYPE_SK_LOOKUP:
9625 		range = tnum_range(SK_DROP, SK_PASS);
9626 		break;
9627 	case BPF_PROG_TYPE_EXT:
9628 		/* freplace program can return anything as its return value
9629 		 * depends on the to-be-replaced kernel func or bpf program.
9630 		 */
9631 	default:
9632 		return 0;
9633 	}
9634 
9635 	if (reg->type != SCALAR_VALUE) {
9636 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
9637 			reg_type_str[reg->type]);
9638 		return -EINVAL;
9639 	}
9640 
9641 	if (!tnum_in(range, reg->var_off)) {
9642 		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
9643 		return -EINVAL;
9644 	}
9645 
9646 	if (!tnum_is_unknown(enforce_attach_type_range) &&
9647 	    tnum_in(enforce_attach_type_range, reg->var_off))
9648 		env->prog->enforce_expected_attach_type = 1;
9649 	return 0;
9650 }
9651 
9652 /* non-recursive DFS pseudo code
9653  * 1  procedure DFS-iterative(G,v):
9654  * 2      label v as discovered
9655  * 3      let S be a stack
9656  * 4      S.push(v)
9657  * 5      while S is not empty
9658  * 6            t <- S.pop()
9659  * 7            if t is what we're looking for:
9660  * 8                return t
9661  * 9            for all edges e in G.adjacentEdges(t) do
9662  * 10               if edge e is already labelled
9663  * 11                   continue with the next edge
9664  * 12               w <- G.adjacentVertex(t,e)
9665  * 13               if vertex w is not discovered and not explored
9666  * 14                   label e as tree-edge
9667  * 15                   label w as discovered
9668  * 16                   S.push(w)
9669  * 17                   continue at 5
9670  * 18               else if vertex w is discovered
9671  * 19                   label e as back-edge
9672  * 20               else
9673  * 21                   // vertex w is explored
9674  * 22                   label e as forward- or cross-edge
9675  * 23           label t as explored
9676  * 24           S.pop()
9677  *
9678  * convention:
9679  * 0x10 - discovered
9680  * 0x11 - discovered and fall-through edge labelled
9681  * 0x12 - discovered and fall-through and branch edges labelled
9682  * 0x20 - explored
9683  */
9684 
9685 enum {
9686 	DISCOVERED = 0x10,
9687 	EXPLORED = 0x20,
9688 	FALLTHROUGH = 1,
9689 	BRANCH = 2,
9690 };
9691 
9692 static u32 state_htab_size(struct bpf_verifier_env *env)
9693 {
9694 	return env->prog->len;
9695 }
9696 
9697 static struct bpf_verifier_state_list **explored_state(
9698 					struct bpf_verifier_env *env,
9699 					int idx)
9700 {
9701 	struct bpf_verifier_state *cur = env->cur_state;
9702 	struct bpf_func_state *state = cur->frame[cur->curframe];
9703 
9704 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
9705 }
9706 
9707 static void init_explored_state(struct bpf_verifier_env *env, int idx)
9708 {
9709 	env->insn_aux_data[idx].prune_point = true;
9710 }
9711 
9712 enum {
9713 	DONE_EXPLORING = 0,
9714 	KEEP_EXPLORING = 1,
9715 };
9716 
9717 /* t, w, e - match pseudo-code above:
9718  * t - index of current instruction
9719  * w - next instruction
9720  * e - edge
9721  */
9722 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
9723 		     bool loop_ok)
9724 {
9725 	int *insn_stack = env->cfg.insn_stack;
9726 	int *insn_state = env->cfg.insn_state;
9727 
9728 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
9729 		return DONE_EXPLORING;
9730 
9731 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
9732 		return DONE_EXPLORING;
9733 
9734 	if (w < 0 || w >= env->prog->len) {
9735 		verbose_linfo(env, t, "%d: ", t);
9736 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
9737 		return -EINVAL;
9738 	}
9739 
9740 	if (e == BRANCH)
9741 		/* mark branch target for state pruning */
9742 		init_explored_state(env, w);
9743 
9744 	if (insn_state[w] == 0) {
9745 		/* tree-edge */
9746 		insn_state[t] = DISCOVERED | e;
9747 		insn_state[w] = DISCOVERED;
9748 		if (env->cfg.cur_stack >= env->prog->len)
9749 			return -E2BIG;
9750 		insn_stack[env->cfg.cur_stack++] = w;
9751 		return KEEP_EXPLORING;
9752 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
9753 		if (loop_ok && env->bpf_capable)
9754 			return DONE_EXPLORING;
9755 		verbose_linfo(env, t, "%d: ", t);
9756 		verbose_linfo(env, w, "%d: ", w);
9757 		verbose(env, "back-edge from insn %d to %d\n", t, w);
9758 		return -EINVAL;
9759 	} else if (insn_state[w] == EXPLORED) {
9760 		/* forward- or cross-edge */
9761 		insn_state[t] = DISCOVERED | e;
9762 	} else {
9763 		verbose(env, "insn state internal bug\n");
9764 		return -EFAULT;
9765 	}
9766 	return DONE_EXPLORING;
9767 }
9768 
9769 static int visit_func_call_insn(int t, int insn_cnt,
9770 				struct bpf_insn *insns,
9771 				struct bpf_verifier_env *env,
9772 				bool visit_callee)
9773 {
9774 	int ret;
9775 
9776 	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
9777 	if (ret)
9778 		return ret;
9779 
9780 	if (t + 1 < insn_cnt)
9781 		init_explored_state(env, t + 1);
9782 	if (visit_callee) {
9783 		init_explored_state(env, t);
9784 		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
9785 				/* It's ok to allow recursion from CFG point of
9786 				 * view. __check_func_call() will do the actual
9787 				 * check.
9788 				 */
9789 				bpf_pseudo_func(insns + t));
9790 	}
9791 	return ret;
9792 }
9793 
9794 /* Visits the instruction at index t and returns one of the following:
9795  *  < 0 - an error occurred
9796  *  DONE_EXPLORING - the instruction was fully explored
9797  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
9798  */
9799 static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
9800 {
9801 	struct bpf_insn *insns = env->prog->insnsi;
9802 	int ret;
9803 
9804 	if (bpf_pseudo_func(insns + t))
9805 		return visit_func_call_insn(t, insn_cnt, insns, env, true);
9806 
9807 	/* All non-branch instructions have a single fall-through edge. */
9808 	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
9809 	    BPF_CLASS(insns[t].code) != BPF_JMP32)
9810 		return push_insn(t, t + 1, FALLTHROUGH, env, false);
9811 
9812 	switch (BPF_OP(insns[t].code)) {
9813 	case BPF_EXIT:
9814 		return DONE_EXPLORING;
9815 
9816 	case BPF_CALL:
9817 		if (insns[t].imm == BPF_FUNC_timer_set_callback)
9818 			/* Mark this call insn to trigger is_state_visited() check
9819 			 * before call itself is processed by __check_func_call().
9820 			 * Otherwise new async state will be pushed for further
9821 			 * exploration.
9822 			 */
9823 			init_explored_state(env, t);
9824 		return visit_func_call_insn(t, insn_cnt, insns, env,
9825 					    insns[t].src_reg == BPF_PSEUDO_CALL);
9826 
9827 	case BPF_JA:
9828 		if (BPF_SRC(insns[t].code) != BPF_K)
9829 			return -EINVAL;
9830 
9831 		/* unconditional jump with single edge */
9832 		ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
9833 				true);
9834 		if (ret)
9835 			return ret;
9836 
9837 		/* unconditional jmp is not a good pruning point,
9838 		 * but it's marked, since backtracking needs
9839 		 * to record jmp history in is_state_visited().
9840 		 */
9841 		init_explored_state(env, t + insns[t].off + 1);
9842 		/* tell verifier to check for equivalent states
9843 		 * after every call and jump
9844 		 */
9845 		if (t + 1 < insn_cnt)
9846 			init_explored_state(env, t + 1);
9847 
9848 		return ret;
9849 
9850 	default:
9851 		/* conditional jump with two edges */
9852 		init_explored_state(env, t);
9853 		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
9854 		if (ret)
9855 			return ret;
9856 
9857 		return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
9858 	}
9859 }
9860 
9861 /* non-recursive depth-first-search to detect loops in BPF program
9862  * loop == back-edge in directed graph
9863  */
9864 static int check_cfg(struct bpf_verifier_env *env)
9865 {
9866 	int insn_cnt = env->prog->len;
9867 	int *insn_stack, *insn_state;
9868 	int ret = 0;
9869 	int i;
9870 
9871 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9872 	if (!insn_state)
9873 		return -ENOMEM;
9874 
9875 	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9876 	if (!insn_stack) {
9877 		kvfree(insn_state);
9878 		return -ENOMEM;
9879 	}
9880 
9881 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
9882 	insn_stack[0] = 0; /* 0 is the first instruction */
9883 	env->cfg.cur_stack = 1;
9884 
9885 	while (env->cfg.cur_stack > 0) {
9886 		int t = insn_stack[env->cfg.cur_stack - 1];
9887 
9888 		ret = visit_insn(t, insn_cnt, env);
9889 		switch (ret) {
9890 		case DONE_EXPLORING:
9891 			insn_state[t] = EXPLORED;
9892 			env->cfg.cur_stack--;
9893 			break;
9894 		case KEEP_EXPLORING:
9895 			break;
9896 		default:
9897 			if (ret > 0) {
9898 				verbose(env, "visit_insn internal bug\n");
9899 				ret = -EFAULT;
9900 			}
9901 			goto err_free;
9902 		}
9903 	}
9904 
9905 	if (env->cfg.cur_stack < 0) {
9906 		verbose(env, "pop stack internal bug\n");
9907 		ret = -EFAULT;
9908 		goto err_free;
9909 	}
9910 
9911 	for (i = 0; i < insn_cnt; i++) {
9912 		if (insn_state[i] != EXPLORED) {
9913 			verbose(env, "unreachable insn %d\n", i);
9914 			ret = -EINVAL;
9915 			goto err_free;
9916 		}
9917 	}
9918 	ret = 0; /* cfg looks good */
9919 
9920 err_free:
9921 	kvfree(insn_state);
9922 	kvfree(insn_stack);
9923 	env->cfg.insn_state = env->cfg.insn_stack = NULL;
9924 	return ret;
9925 }
9926 
9927 static int check_abnormal_return(struct bpf_verifier_env *env)
9928 {
9929 	int i;
9930 
9931 	for (i = 1; i < env->subprog_cnt; i++) {
9932 		if (env->subprog_info[i].has_ld_abs) {
9933 			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
9934 			return -EINVAL;
9935 		}
9936 		if (env->subprog_info[i].has_tail_call) {
9937 			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
9938 			return -EINVAL;
9939 		}
9940 	}
9941 	return 0;
9942 }
9943 
9944 /* The minimum supported BTF func info size */
9945 #define MIN_BPF_FUNCINFO_SIZE	8
9946 #define MAX_FUNCINFO_REC_SIZE	252
9947 
9948 static int check_btf_func(struct bpf_verifier_env *env,
9949 			  const union bpf_attr *attr,
9950 			  bpfptr_t uattr)
9951 {
9952 	const struct btf_type *type, *func_proto, *ret_type;
9953 	u32 i, nfuncs, urec_size, min_size;
9954 	u32 krec_size = sizeof(struct bpf_func_info);
9955 	struct bpf_func_info *krecord;
9956 	struct bpf_func_info_aux *info_aux = NULL;
9957 	struct bpf_prog *prog;
9958 	const struct btf *btf;
9959 	bpfptr_t urecord;
9960 	u32 prev_offset = 0;
9961 	bool scalar_return;
9962 	int ret = -ENOMEM;
9963 
9964 	nfuncs = attr->func_info_cnt;
9965 	if (!nfuncs) {
9966 		if (check_abnormal_return(env))
9967 			return -EINVAL;
9968 		return 0;
9969 	}
9970 
9971 	if (nfuncs != env->subprog_cnt) {
9972 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
9973 		return -EINVAL;
9974 	}
9975 
9976 	urec_size = attr->func_info_rec_size;
9977 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
9978 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
9979 	    urec_size % sizeof(u32)) {
9980 		verbose(env, "invalid func info rec size %u\n", urec_size);
9981 		return -EINVAL;
9982 	}
9983 
9984 	prog = env->prog;
9985 	btf = prog->aux->btf;
9986 
9987 	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
9988 	min_size = min_t(u32, krec_size, urec_size);
9989 
9990 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
9991 	if (!krecord)
9992 		return -ENOMEM;
9993 	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
9994 	if (!info_aux)
9995 		goto err_free;
9996 
9997 	for (i = 0; i < nfuncs; i++) {
9998 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
9999 		if (ret) {
10000 			if (ret == -E2BIG) {
10001 				verbose(env, "nonzero tailing record in func info");
10002 				/* set the size kernel expects so loader can zero
10003 				 * out the rest of the record.
10004 				 */
10005 				if (copy_to_bpfptr_offset(uattr,
10006 							  offsetof(union bpf_attr, func_info_rec_size),
10007 							  &min_size, sizeof(min_size)))
10008 					ret = -EFAULT;
10009 			}
10010 			goto err_free;
10011 		}
10012 
10013 		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10014 			ret = -EFAULT;
10015 			goto err_free;
10016 		}
10017 
10018 		/* check insn_off */
10019 		ret = -EINVAL;
10020 		if (i == 0) {
10021 			if (krecord[i].insn_off) {
10022 				verbose(env,
10023 					"nonzero insn_off %u for the first func info record",
10024 					krecord[i].insn_off);
10025 				goto err_free;
10026 			}
10027 		} else if (krecord[i].insn_off <= prev_offset) {
10028 			verbose(env,
10029 				"same or smaller insn offset (%u) than previous func info record (%u)",
10030 				krecord[i].insn_off, prev_offset);
10031 			goto err_free;
10032 		}
10033 
10034 		if (env->subprog_info[i].start != krecord[i].insn_off) {
10035 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10036 			goto err_free;
10037 		}
10038 
10039 		/* check type_id */
10040 		type = btf_type_by_id(btf, krecord[i].type_id);
10041 		if (!type || !btf_type_is_func(type)) {
10042 			verbose(env, "invalid type id %d in func info",
10043 				krecord[i].type_id);
10044 			goto err_free;
10045 		}
10046 		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10047 
10048 		func_proto = btf_type_by_id(btf, type->type);
10049 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10050 			/* btf_func_check() already verified it during BTF load */
10051 			goto err_free;
10052 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10053 		scalar_return =
10054 			btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10055 		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10056 			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10057 			goto err_free;
10058 		}
10059 		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10060 			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10061 			goto err_free;
10062 		}
10063 
10064 		prev_offset = krecord[i].insn_off;
10065 		bpfptr_add(&urecord, urec_size);
10066 	}
10067 
10068 	prog->aux->func_info = krecord;
10069 	prog->aux->func_info_cnt = nfuncs;
10070 	prog->aux->func_info_aux = info_aux;
10071 	return 0;
10072 
10073 err_free:
10074 	kvfree(krecord);
10075 	kfree(info_aux);
10076 	return ret;
10077 }
10078 
10079 static void adjust_btf_func(struct bpf_verifier_env *env)
10080 {
10081 	struct bpf_prog_aux *aux = env->prog->aux;
10082 	int i;
10083 
10084 	if (!aux->func_info)
10085 		return;
10086 
10087 	for (i = 0; i < env->subprog_cnt; i++)
10088 		aux->func_info[i].insn_off = env->subprog_info[i].start;
10089 }
10090 
10091 #define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
10092 		sizeof(((struct bpf_line_info *)(0))->line_col))
10093 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
10094 
10095 static int check_btf_line(struct bpf_verifier_env *env,
10096 			  const union bpf_attr *attr,
10097 			  bpfptr_t uattr)
10098 {
10099 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10100 	struct bpf_subprog_info *sub;
10101 	struct bpf_line_info *linfo;
10102 	struct bpf_prog *prog;
10103 	const struct btf *btf;
10104 	bpfptr_t ulinfo;
10105 	int err;
10106 
10107 	nr_linfo = attr->line_info_cnt;
10108 	if (!nr_linfo)
10109 		return 0;
10110 	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10111 		return -EINVAL;
10112 
10113 	rec_size = attr->line_info_rec_size;
10114 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10115 	    rec_size > MAX_LINEINFO_REC_SIZE ||
10116 	    rec_size & (sizeof(u32) - 1))
10117 		return -EINVAL;
10118 
10119 	/* Need to zero it in case the userspace may
10120 	 * pass in a smaller bpf_line_info object.
10121 	 */
10122 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10123 			 GFP_KERNEL | __GFP_NOWARN);
10124 	if (!linfo)
10125 		return -ENOMEM;
10126 
10127 	prog = env->prog;
10128 	btf = prog->aux->btf;
10129 
10130 	s = 0;
10131 	sub = env->subprog_info;
10132 	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
10133 	expected_size = sizeof(struct bpf_line_info);
10134 	ncopy = min_t(u32, expected_size, rec_size);
10135 	for (i = 0; i < nr_linfo; i++) {
10136 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10137 		if (err) {
10138 			if (err == -E2BIG) {
10139 				verbose(env, "nonzero tailing record in line_info");
10140 				if (copy_to_bpfptr_offset(uattr,
10141 							  offsetof(union bpf_attr, line_info_rec_size),
10142 							  &expected_size, sizeof(expected_size)))
10143 					err = -EFAULT;
10144 			}
10145 			goto err_free;
10146 		}
10147 
10148 		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
10149 			err = -EFAULT;
10150 			goto err_free;
10151 		}
10152 
10153 		/*
10154 		 * Check insn_off to ensure
10155 		 * 1) strictly increasing AND
10156 		 * 2) bounded by prog->len
10157 		 *
10158 		 * The linfo[0].insn_off == 0 check logically falls into
10159 		 * the later "missing bpf_line_info for func..." case
10160 		 * because the first linfo[0].insn_off must be the
10161 		 * first sub also and the first sub must have
10162 		 * subprog_info[0].start == 0.
10163 		 */
10164 		if ((i && linfo[i].insn_off <= prev_offset) ||
10165 		    linfo[i].insn_off >= prog->len) {
10166 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10167 				i, linfo[i].insn_off, prev_offset,
10168 				prog->len);
10169 			err = -EINVAL;
10170 			goto err_free;
10171 		}
10172 
10173 		if (!prog->insnsi[linfo[i].insn_off].code) {
10174 			verbose(env,
10175 				"Invalid insn code at line_info[%u].insn_off\n",
10176 				i);
10177 			err = -EINVAL;
10178 			goto err_free;
10179 		}
10180 
10181 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10182 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
10183 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10184 			err = -EINVAL;
10185 			goto err_free;
10186 		}
10187 
10188 		if (s != env->subprog_cnt) {
10189 			if (linfo[i].insn_off == sub[s].start) {
10190 				sub[s].linfo_idx = i;
10191 				s++;
10192 			} else if (sub[s].start < linfo[i].insn_off) {
10193 				verbose(env, "missing bpf_line_info for func#%u\n", s);
10194 				err = -EINVAL;
10195 				goto err_free;
10196 			}
10197 		}
10198 
10199 		prev_offset = linfo[i].insn_off;
10200 		bpfptr_add(&ulinfo, rec_size);
10201 	}
10202 
10203 	if (s != env->subprog_cnt) {
10204 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10205 			env->subprog_cnt - s, s);
10206 		err = -EINVAL;
10207 		goto err_free;
10208 	}
10209 
10210 	prog->aux->linfo = linfo;
10211 	prog->aux->nr_linfo = nr_linfo;
10212 
10213 	return 0;
10214 
10215 err_free:
10216 	kvfree(linfo);
10217 	return err;
10218 }
10219 
10220 static int check_btf_info(struct bpf_verifier_env *env,
10221 			  const union bpf_attr *attr,
10222 			  bpfptr_t uattr)
10223 {
10224 	struct btf *btf;
10225 	int err;
10226 
10227 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
10228 		if (check_abnormal_return(env))
10229 			return -EINVAL;
10230 		return 0;
10231 	}
10232 
10233 	btf = btf_get_by_fd(attr->prog_btf_fd);
10234 	if (IS_ERR(btf))
10235 		return PTR_ERR(btf);
10236 	if (btf_is_kernel(btf)) {
10237 		btf_put(btf);
10238 		return -EACCES;
10239 	}
10240 	env->prog->aux->btf = btf;
10241 
10242 	err = check_btf_func(env, attr, uattr);
10243 	if (err)
10244 		return err;
10245 
10246 	err = check_btf_line(env, attr, uattr);
10247 	if (err)
10248 		return err;
10249 
10250 	return 0;
10251 }
10252 
10253 /* check %cur's range satisfies %old's */
10254 static bool range_within(struct bpf_reg_state *old,
10255 			 struct bpf_reg_state *cur)
10256 {
10257 	return old->umin_value <= cur->umin_value &&
10258 	       old->umax_value >= cur->umax_value &&
10259 	       old->smin_value <= cur->smin_value &&
10260 	       old->smax_value >= cur->smax_value &&
10261 	       old->u32_min_value <= cur->u32_min_value &&
10262 	       old->u32_max_value >= cur->u32_max_value &&
10263 	       old->s32_min_value <= cur->s32_min_value &&
10264 	       old->s32_max_value >= cur->s32_max_value;
10265 }
10266 
10267 /* If in the old state two registers had the same id, then they need to have
10268  * the same id in the new state as well.  But that id could be different from
10269  * the old state, so we need to track the mapping from old to new ids.
10270  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10271  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
10272  * regs with a different old id could still have new id 9, we don't care about
10273  * that.
10274  * So we look through our idmap to see if this old id has been seen before.  If
10275  * so, we require the new id to match; otherwise, we add the id pair to the map.
10276  */
10277 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
10278 {
10279 	unsigned int i;
10280 
10281 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
10282 		if (!idmap[i].old) {
10283 			/* Reached an empty slot; haven't seen this id before */
10284 			idmap[i].old = old_id;
10285 			idmap[i].cur = cur_id;
10286 			return true;
10287 		}
10288 		if (idmap[i].old == old_id)
10289 			return idmap[i].cur == cur_id;
10290 	}
10291 	/* We ran out of idmap slots, which should be impossible */
10292 	WARN_ON_ONCE(1);
10293 	return false;
10294 }
10295 
10296 static void clean_func_state(struct bpf_verifier_env *env,
10297 			     struct bpf_func_state *st)
10298 {
10299 	enum bpf_reg_liveness live;
10300 	int i, j;
10301 
10302 	for (i = 0; i < BPF_REG_FP; i++) {
10303 		live = st->regs[i].live;
10304 		/* liveness must not touch this register anymore */
10305 		st->regs[i].live |= REG_LIVE_DONE;
10306 		if (!(live & REG_LIVE_READ))
10307 			/* since the register is unused, clear its state
10308 			 * to make further comparison simpler
10309 			 */
10310 			__mark_reg_not_init(env, &st->regs[i]);
10311 	}
10312 
10313 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10314 		live = st->stack[i].spilled_ptr.live;
10315 		/* liveness must not touch this stack slot anymore */
10316 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10317 		if (!(live & REG_LIVE_READ)) {
10318 			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
10319 			for (j = 0; j < BPF_REG_SIZE; j++)
10320 				st->stack[i].slot_type[j] = STACK_INVALID;
10321 		}
10322 	}
10323 }
10324 
10325 static void clean_verifier_state(struct bpf_verifier_env *env,
10326 				 struct bpf_verifier_state *st)
10327 {
10328 	int i;
10329 
10330 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
10331 		/* all regs in this state in all frames were already marked */
10332 		return;
10333 
10334 	for (i = 0; i <= st->curframe; i++)
10335 		clean_func_state(env, st->frame[i]);
10336 }
10337 
10338 /* the parentage chains form a tree.
10339  * the verifier states are added to state lists at given insn and
10340  * pushed into state stack for future exploration.
10341  * when the verifier reaches bpf_exit insn some of the verifer states
10342  * stored in the state lists have their final liveness state already,
10343  * but a lot of states will get revised from liveness point of view when
10344  * the verifier explores other branches.
10345  * Example:
10346  * 1: r0 = 1
10347  * 2: if r1 == 100 goto pc+1
10348  * 3: r0 = 2
10349  * 4: exit
10350  * when the verifier reaches exit insn the register r0 in the state list of
10351  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
10352  * of insn 2 and goes exploring further. At the insn 4 it will walk the
10353  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
10354  *
10355  * Since the verifier pushes the branch states as it sees them while exploring
10356  * the program the condition of walking the branch instruction for the second
10357  * time means that all states below this branch were already explored and
10358  * their final liveness marks are already propagated.
10359  * Hence when the verifier completes the search of state list in is_state_visited()
10360  * we can call this clean_live_states() function to mark all liveness states
10361  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
10362  * will not be used.
10363  * This function also clears the registers and stack for states that !READ
10364  * to simplify state merging.
10365  *
10366  * Important note here that walking the same branch instruction in the callee
10367  * doesn't meant that the states are DONE. The verifier has to compare
10368  * the callsites
10369  */
10370 static void clean_live_states(struct bpf_verifier_env *env, int insn,
10371 			      struct bpf_verifier_state *cur)
10372 {
10373 	struct bpf_verifier_state_list *sl;
10374 	int i;
10375 
10376 	sl = *explored_state(env, insn);
10377 	while (sl) {
10378 		if (sl->state.branches)
10379 			goto next;
10380 		if (sl->state.insn_idx != insn ||
10381 		    sl->state.curframe != cur->curframe)
10382 			goto next;
10383 		for (i = 0; i <= cur->curframe; i++)
10384 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
10385 				goto next;
10386 		clean_verifier_state(env, &sl->state);
10387 next:
10388 		sl = sl->next;
10389 	}
10390 }
10391 
10392 /* Returns true if (rold safe implies rcur safe) */
10393 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
10394 		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
10395 {
10396 	bool equal;
10397 
10398 	if (!(rold->live & REG_LIVE_READ))
10399 		/* explored state didn't use this */
10400 		return true;
10401 
10402 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
10403 
10404 	if (rold->type == PTR_TO_STACK)
10405 		/* two stack pointers are equal only if they're pointing to
10406 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
10407 		 */
10408 		return equal && rold->frameno == rcur->frameno;
10409 
10410 	if (equal)
10411 		return true;
10412 
10413 	if (rold->type == NOT_INIT)
10414 		/* explored state can't have used this */
10415 		return true;
10416 	if (rcur->type == NOT_INIT)
10417 		return false;
10418 	switch (rold->type) {
10419 	case SCALAR_VALUE:
10420 		if (env->explore_alu_limits)
10421 			return false;
10422 		if (rcur->type == SCALAR_VALUE) {
10423 			if (!rold->precise && !rcur->precise)
10424 				return true;
10425 			/* new val must satisfy old val knowledge */
10426 			return range_within(rold, rcur) &&
10427 			       tnum_in(rold->var_off, rcur->var_off);
10428 		} else {
10429 			/* We're trying to use a pointer in place of a scalar.
10430 			 * Even if the scalar was unbounded, this could lead to
10431 			 * pointer leaks because scalars are allowed to leak
10432 			 * while pointers are not. We could make this safe in
10433 			 * special cases if root is calling us, but it's
10434 			 * probably not worth the hassle.
10435 			 */
10436 			return false;
10437 		}
10438 	case PTR_TO_MAP_KEY:
10439 	case PTR_TO_MAP_VALUE:
10440 		/* If the new min/max/var_off satisfy the old ones and
10441 		 * everything else matches, we are OK.
10442 		 * 'id' is not compared, since it's only used for maps with
10443 		 * bpf_spin_lock inside map element and in such cases if
10444 		 * the rest of the prog is valid for one map element then
10445 		 * it's valid for all map elements regardless of the key
10446 		 * used in bpf_map_lookup()
10447 		 */
10448 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
10449 		       range_within(rold, rcur) &&
10450 		       tnum_in(rold->var_off, rcur->var_off);
10451 	case PTR_TO_MAP_VALUE_OR_NULL:
10452 		/* a PTR_TO_MAP_VALUE could be safe to use as a
10453 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
10454 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
10455 		 * checked, doing so could have affected others with the same
10456 		 * id, and we can't check for that because we lost the id when
10457 		 * we converted to a PTR_TO_MAP_VALUE.
10458 		 */
10459 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
10460 			return false;
10461 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
10462 			return false;
10463 		/* Check our ids match any regs they're supposed to */
10464 		return check_ids(rold->id, rcur->id, idmap);
10465 	case PTR_TO_PACKET_META:
10466 	case PTR_TO_PACKET:
10467 		if (rcur->type != rold->type)
10468 			return false;
10469 		/* We must have at least as much range as the old ptr
10470 		 * did, so that any accesses which were safe before are
10471 		 * still safe.  This is true even if old range < old off,
10472 		 * since someone could have accessed through (ptr - k), or
10473 		 * even done ptr -= k in a register, to get a safe access.
10474 		 */
10475 		if (rold->range > rcur->range)
10476 			return false;
10477 		/* If the offsets don't match, we can't trust our alignment;
10478 		 * nor can we be sure that we won't fall out of range.
10479 		 */
10480 		if (rold->off != rcur->off)
10481 			return false;
10482 		/* id relations must be preserved */
10483 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
10484 			return false;
10485 		/* new val must satisfy old val knowledge */
10486 		return range_within(rold, rcur) &&
10487 		       tnum_in(rold->var_off, rcur->var_off);
10488 	case PTR_TO_CTX:
10489 	case CONST_PTR_TO_MAP:
10490 	case PTR_TO_PACKET_END:
10491 	case PTR_TO_FLOW_KEYS:
10492 	case PTR_TO_SOCKET:
10493 	case PTR_TO_SOCKET_OR_NULL:
10494 	case PTR_TO_SOCK_COMMON:
10495 	case PTR_TO_SOCK_COMMON_OR_NULL:
10496 	case PTR_TO_TCP_SOCK:
10497 	case PTR_TO_TCP_SOCK_OR_NULL:
10498 	case PTR_TO_XDP_SOCK:
10499 		/* Only valid matches are exact, which memcmp() above
10500 		 * would have accepted
10501 		 */
10502 	default:
10503 		/* Don't know what's going on, just say it's not safe */
10504 		return false;
10505 	}
10506 
10507 	/* Shouldn't get here; if we do, say it's not safe */
10508 	WARN_ON_ONCE(1);
10509 	return false;
10510 }
10511 
10512 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
10513 		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
10514 {
10515 	int i, spi;
10516 
10517 	/* walk slots of the explored stack and ignore any additional
10518 	 * slots in the current stack, since explored(safe) state
10519 	 * didn't use them
10520 	 */
10521 	for (i = 0; i < old->allocated_stack; i++) {
10522 		spi = i / BPF_REG_SIZE;
10523 
10524 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
10525 			i += BPF_REG_SIZE - 1;
10526 			/* explored state didn't use this */
10527 			continue;
10528 		}
10529 
10530 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
10531 			continue;
10532 
10533 		/* explored stack has more populated slots than current stack
10534 		 * and these slots were used
10535 		 */
10536 		if (i >= cur->allocated_stack)
10537 			return false;
10538 
10539 		/* if old state was safe with misc data in the stack
10540 		 * it will be safe with zero-initialized stack.
10541 		 * The opposite is not true
10542 		 */
10543 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
10544 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
10545 			continue;
10546 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
10547 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
10548 			/* Ex: old explored (safe) state has STACK_SPILL in
10549 			 * this stack slot, but current has STACK_MISC ->
10550 			 * this verifier states are not equivalent,
10551 			 * return false to continue verification of this path
10552 			 */
10553 			return false;
10554 		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
10555 			continue;
10556 		if (!is_spilled_reg(&old->stack[spi]))
10557 			continue;
10558 		if (!regsafe(env, &old->stack[spi].spilled_ptr,
10559 			     &cur->stack[spi].spilled_ptr, idmap))
10560 			/* when explored and current stack slot are both storing
10561 			 * spilled registers, check that stored pointers types
10562 			 * are the same as well.
10563 			 * Ex: explored safe path could have stored
10564 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
10565 			 * but current path has stored:
10566 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
10567 			 * such verifier states are not equivalent.
10568 			 * return false to continue verification of this path
10569 			 */
10570 			return false;
10571 	}
10572 	return true;
10573 }
10574 
10575 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
10576 {
10577 	if (old->acquired_refs != cur->acquired_refs)
10578 		return false;
10579 	return !memcmp(old->refs, cur->refs,
10580 		       sizeof(*old->refs) * old->acquired_refs);
10581 }
10582 
10583 /* compare two verifier states
10584  *
10585  * all states stored in state_list are known to be valid, since
10586  * verifier reached 'bpf_exit' instruction through them
10587  *
10588  * this function is called when verifier exploring different branches of
10589  * execution popped from the state stack. If it sees an old state that has
10590  * more strict register state and more strict stack state then this execution
10591  * branch doesn't need to be explored further, since verifier already
10592  * concluded that more strict state leads to valid finish.
10593  *
10594  * Therefore two states are equivalent if register state is more conservative
10595  * and explored stack state is more conservative than the current one.
10596  * Example:
10597  *       explored                   current
10598  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
10599  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
10600  *
10601  * In other words if current stack state (one being explored) has more
10602  * valid slots than old one that already passed validation, it means
10603  * the verifier can stop exploring and conclude that current state is valid too
10604  *
10605  * Similarly with registers. If explored state has register type as invalid
10606  * whereas register type in current state is meaningful, it means that
10607  * the current state will reach 'bpf_exit' instruction safely
10608  */
10609 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
10610 			      struct bpf_func_state *cur)
10611 {
10612 	int i;
10613 
10614 	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
10615 	for (i = 0; i < MAX_BPF_REG; i++)
10616 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
10617 			     env->idmap_scratch))
10618 			return false;
10619 
10620 	if (!stacksafe(env, old, cur, env->idmap_scratch))
10621 		return false;
10622 
10623 	if (!refsafe(old, cur))
10624 		return false;
10625 
10626 	return true;
10627 }
10628 
10629 static bool states_equal(struct bpf_verifier_env *env,
10630 			 struct bpf_verifier_state *old,
10631 			 struct bpf_verifier_state *cur)
10632 {
10633 	int i;
10634 
10635 	if (old->curframe != cur->curframe)
10636 		return false;
10637 
10638 	/* Verification state from speculative execution simulation
10639 	 * must never prune a non-speculative execution one.
10640 	 */
10641 	if (old->speculative && !cur->speculative)
10642 		return false;
10643 
10644 	if (old->active_spin_lock != cur->active_spin_lock)
10645 		return false;
10646 
10647 	/* for states to be equal callsites have to be the same
10648 	 * and all frame states need to be equivalent
10649 	 */
10650 	for (i = 0; i <= old->curframe; i++) {
10651 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
10652 			return false;
10653 		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
10654 			return false;
10655 	}
10656 	return true;
10657 }
10658 
10659 /* Return 0 if no propagation happened. Return negative error code if error
10660  * happened. Otherwise, return the propagated bit.
10661  */
10662 static int propagate_liveness_reg(struct bpf_verifier_env *env,
10663 				  struct bpf_reg_state *reg,
10664 				  struct bpf_reg_state *parent_reg)
10665 {
10666 	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
10667 	u8 flag = reg->live & REG_LIVE_READ;
10668 	int err;
10669 
10670 	/* When comes here, read flags of PARENT_REG or REG could be any of
10671 	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
10672 	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
10673 	 */
10674 	if (parent_flag == REG_LIVE_READ64 ||
10675 	    /* Or if there is no read flag from REG. */
10676 	    !flag ||
10677 	    /* Or if the read flag from REG is the same as PARENT_REG. */
10678 	    parent_flag == flag)
10679 		return 0;
10680 
10681 	err = mark_reg_read(env, reg, parent_reg, flag);
10682 	if (err)
10683 		return err;
10684 
10685 	return flag;
10686 }
10687 
10688 /* A write screens off any subsequent reads; but write marks come from the
10689  * straight-line code between a state and its parent.  When we arrive at an
10690  * equivalent state (jump target or such) we didn't arrive by the straight-line
10691  * code, so read marks in the state must propagate to the parent regardless
10692  * of the state's write marks. That's what 'parent == state->parent' comparison
10693  * in mark_reg_read() is for.
10694  */
10695 static int propagate_liveness(struct bpf_verifier_env *env,
10696 			      const struct bpf_verifier_state *vstate,
10697 			      struct bpf_verifier_state *vparent)
10698 {
10699 	struct bpf_reg_state *state_reg, *parent_reg;
10700 	struct bpf_func_state *state, *parent;
10701 	int i, frame, err = 0;
10702 
10703 	if (vparent->curframe != vstate->curframe) {
10704 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
10705 		     vparent->curframe, vstate->curframe);
10706 		return -EFAULT;
10707 	}
10708 	/* Propagate read liveness of registers... */
10709 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
10710 	for (frame = 0; frame <= vstate->curframe; frame++) {
10711 		parent = vparent->frame[frame];
10712 		state = vstate->frame[frame];
10713 		parent_reg = parent->regs;
10714 		state_reg = state->regs;
10715 		/* We don't need to worry about FP liveness, it's read-only */
10716 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
10717 			err = propagate_liveness_reg(env, &state_reg[i],
10718 						     &parent_reg[i]);
10719 			if (err < 0)
10720 				return err;
10721 			if (err == REG_LIVE_READ64)
10722 				mark_insn_zext(env, &parent_reg[i]);
10723 		}
10724 
10725 		/* Propagate stack slots. */
10726 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
10727 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
10728 			parent_reg = &parent->stack[i].spilled_ptr;
10729 			state_reg = &state->stack[i].spilled_ptr;
10730 			err = propagate_liveness_reg(env, state_reg,
10731 						     parent_reg);
10732 			if (err < 0)
10733 				return err;
10734 		}
10735 	}
10736 	return 0;
10737 }
10738 
10739 /* find precise scalars in the previous equivalent state and
10740  * propagate them into the current state
10741  */
10742 static int propagate_precision(struct bpf_verifier_env *env,
10743 			       const struct bpf_verifier_state *old)
10744 {
10745 	struct bpf_reg_state *state_reg;
10746 	struct bpf_func_state *state;
10747 	int i, err = 0;
10748 
10749 	state = old->frame[old->curframe];
10750 	state_reg = state->regs;
10751 	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
10752 		if (state_reg->type != SCALAR_VALUE ||
10753 		    !state_reg->precise)
10754 			continue;
10755 		if (env->log.level & BPF_LOG_LEVEL2)
10756 			verbose(env, "propagating r%d\n", i);
10757 		err = mark_chain_precision(env, i);
10758 		if (err < 0)
10759 			return err;
10760 	}
10761 
10762 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
10763 		if (!is_spilled_reg(&state->stack[i]))
10764 			continue;
10765 		state_reg = &state->stack[i].spilled_ptr;
10766 		if (state_reg->type != SCALAR_VALUE ||
10767 		    !state_reg->precise)
10768 			continue;
10769 		if (env->log.level & BPF_LOG_LEVEL2)
10770 			verbose(env, "propagating fp%d\n",
10771 				(-i - 1) * BPF_REG_SIZE);
10772 		err = mark_chain_precision_stack(env, i);
10773 		if (err < 0)
10774 			return err;
10775 	}
10776 	return 0;
10777 }
10778 
10779 static bool states_maybe_looping(struct bpf_verifier_state *old,
10780 				 struct bpf_verifier_state *cur)
10781 {
10782 	struct bpf_func_state *fold, *fcur;
10783 	int i, fr = cur->curframe;
10784 
10785 	if (old->curframe != fr)
10786 		return false;
10787 
10788 	fold = old->frame[fr];
10789 	fcur = cur->frame[fr];
10790 	for (i = 0; i < MAX_BPF_REG; i++)
10791 		if (memcmp(&fold->regs[i], &fcur->regs[i],
10792 			   offsetof(struct bpf_reg_state, parent)))
10793 			return false;
10794 	return true;
10795 }
10796 
10797 
10798 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
10799 {
10800 	struct bpf_verifier_state_list *new_sl;
10801 	struct bpf_verifier_state_list *sl, **pprev;
10802 	struct bpf_verifier_state *cur = env->cur_state, *new;
10803 	int i, j, err, states_cnt = 0;
10804 	bool add_new_state = env->test_state_freq ? true : false;
10805 
10806 	cur->last_insn_idx = env->prev_insn_idx;
10807 	if (!env->insn_aux_data[insn_idx].prune_point)
10808 		/* this 'insn_idx' instruction wasn't marked, so we will not
10809 		 * be doing state search here
10810 		 */
10811 		return 0;
10812 
10813 	/* bpf progs typically have pruning point every 4 instructions
10814 	 * http://vger.kernel.org/bpfconf2019.html#session-1
10815 	 * Do not add new state for future pruning if the verifier hasn't seen
10816 	 * at least 2 jumps and at least 8 instructions.
10817 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
10818 	 * In tests that amounts to up to 50% reduction into total verifier
10819 	 * memory consumption and 20% verifier time speedup.
10820 	 */
10821 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
10822 	    env->insn_processed - env->prev_insn_processed >= 8)
10823 		add_new_state = true;
10824 
10825 	pprev = explored_state(env, insn_idx);
10826 	sl = *pprev;
10827 
10828 	clean_live_states(env, insn_idx, cur);
10829 
10830 	while (sl) {
10831 		states_cnt++;
10832 		if (sl->state.insn_idx != insn_idx)
10833 			goto next;
10834 
10835 		if (sl->state.branches) {
10836 			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
10837 
10838 			if (frame->in_async_callback_fn &&
10839 			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
10840 				/* Different async_entry_cnt means that the verifier is
10841 				 * processing another entry into async callback.
10842 				 * Seeing the same state is not an indication of infinite
10843 				 * loop or infinite recursion.
10844 				 * But finding the same state doesn't mean that it's safe
10845 				 * to stop processing the current state. The previous state
10846 				 * hasn't yet reached bpf_exit, since state.branches > 0.
10847 				 * Checking in_async_callback_fn alone is not enough either.
10848 				 * Since the verifier still needs to catch infinite loops
10849 				 * inside async callbacks.
10850 				 */
10851 			} else if (states_maybe_looping(&sl->state, cur) &&
10852 				   states_equal(env, &sl->state, cur)) {
10853 				verbose_linfo(env, insn_idx, "; ");
10854 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
10855 				return -EINVAL;
10856 			}
10857 			/* if the verifier is processing a loop, avoid adding new state
10858 			 * too often, since different loop iterations have distinct
10859 			 * states and may not help future pruning.
10860 			 * This threshold shouldn't be too low to make sure that
10861 			 * a loop with large bound will be rejected quickly.
10862 			 * The most abusive loop will be:
10863 			 * r1 += 1
10864 			 * if r1 < 1000000 goto pc-2
10865 			 * 1M insn_procssed limit / 100 == 10k peak states.
10866 			 * This threshold shouldn't be too high either, since states
10867 			 * at the end of the loop are likely to be useful in pruning.
10868 			 */
10869 			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
10870 			    env->insn_processed - env->prev_insn_processed < 100)
10871 				add_new_state = false;
10872 			goto miss;
10873 		}
10874 		if (states_equal(env, &sl->state, cur)) {
10875 			sl->hit_cnt++;
10876 			/* reached equivalent register/stack state,
10877 			 * prune the search.
10878 			 * Registers read by the continuation are read by us.
10879 			 * If we have any write marks in env->cur_state, they
10880 			 * will prevent corresponding reads in the continuation
10881 			 * from reaching our parent (an explored_state).  Our
10882 			 * own state will get the read marks recorded, but
10883 			 * they'll be immediately forgotten as we're pruning
10884 			 * this state and will pop a new one.
10885 			 */
10886 			err = propagate_liveness(env, &sl->state, cur);
10887 
10888 			/* if previous state reached the exit with precision and
10889 			 * current state is equivalent to it (except precsion marks)
10890 			 * the precision needs to be propagated back in
10891 			 * the current state.
10892 			 */
10893 			err = err ? : push_jmp_history(env, cur);
10894 			err = err ? : propagate_precision(env, &sl->state);
10895 			if (err)
10896 				return err;
10897 			return 1;
10898 		}
10899 miss:
10900 		/* when new state is not going to be added do not increase miss count.
10901 		 * Otherwise several loop iterations will remove the state
10902 		 * recorded earlier. The goal of these heuristics is to have
10903 		 * states from some iterations of the loop (some in the beginning
10904 		 * and some at the end) to help pruning.
10905 		 */
10906 		if (add_new_state)
10907 			sl->miss_cnt++;
10908 		/* heuristic to determine whether this state is beneficial
10909 		 * to keep checking from state equivalence point of view.
10910 		 * Higher numbers increase max_states_per_insn and verification time,
10911 		 * but do not meaningfully decrease insn_processed.
10912 		 */
10913 		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
10914 			/* the state is unlikely to be useful. Remove it to
10915 			 * speed up verification
10916 			 */
10917 			*pprev = sl->next;
10918 			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
10919 				u32 br = sl->state.branches;
10920 
10921 				WARN_ONCE(br,
10922 					  "BUG live_done but branches_to_explore %d\n",
10923 					  br);
10924 				free_verifier_state(&sl->state, false);
10925 				kfree(sl);
10926 				env->peak_states--;
10927 			} else {
10928 				/* cannot free this state, since parentage chain may
10929 				 * walk it later. Add it for free_list instead to
10930 				 * be freed at the end of verification
10931 				 */
10932 				sl->next = env->free_list;
10933 				env->free_list = sl;
10934 			}
10935 			sl = *pprev;
10936 			continue;
10937 		}
10938 next:
10939 		pprev = &sl->next;
10940 		sl = *pprev;
10941 	}
10942 
10943 	if (env->max_states_per_insn < states_cnt)
10944 		env->max_states_per_insn = states_cnt;
10945 
10946 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
10947 		return push_jmp_history(env, cur);
10948 
10949 	if (!add_new_state)
10950 		return push_jmp_history(env, cur);
10951 
10952 	/* There were no equivalent states, remember the current one.
10953 	 * Technically the current state is not proven to be safe yet,
10954 	 * but it will either reach outer most bpf_exit (which means it's safe)
10955 	 * or it will be rejected. When there are no loops the verifier won't be
10956 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
10957 	 * again on the way to bpf_exit.
10958 	 * When looping the sl->state.branches will be > 0 and this state
10959 	 * will not be considered for equivalence until branches == 0.
10960 	 */
10961 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
10962 	if (!new_sl)
10963 		return -ENOMEM;
10964 	env->total_states++;
10965 	env->peak_states++;
10966 	env->prev_jmps_processed = env->jmps_processed;
10967 	env->prev_insn_processed = env->insn_processed;
10968 
10969 	/* add new state to the head of linked list */
10970 	new = &new_sl->state;
10971 	err = copy_verifier_state(new, cur);
10972 	if (err) {
10973 		free_verifier_state(new, false);
10974 		kfree(new_sl);
10975 		return err;
10976 	}
10977 	new->insn_idx = insn_idx;
10978 	WARN_ONCE(new->branches != 1,
10979 		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
10980 
10981 	cur->parent = new;
10982 	cur->first_insn_idx = insn_idx;
10983 	clear_jmp_history(cur);
10984 	new_sl->next = *explored_state(env, insn_idx);
10985 	*explored_state(env, insn_idx) = new_sl;
10986 	/* connect new state to parentage chain. Current frame needs all
10987 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
10988 	 * to the stack implicitly by JITs) so in callers' frames connect just
10989 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
10990 	 * the state of the call instruction (with WRITTEN set), and r0 comes
10991 	 * from callee with its full parentage chain, anyway.
10992 	 */
10993 	/* clear write marks in current state: the writes we did are not writes
10994 	 * our child did, so they don't screen off its reads from us.
10995 	 * (There are no read marks in current state, because reads always mark
10996 	 * their parent and current state never has children yet.  Only
10997 	 * explored_states can get read marks.)
10998 	 */
10999 	for (j = 0; j <= cur->curframe; j++) {
11000 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11001 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11002 		for (i = 0; i < BPF_REG_FP; i++)
11003 			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11004 	}
11005 
11006 	/* all stack frames are accessible from callee, clear them all */
11007 	for (j = 0; j <= cur->curframe; j++) {
11008 		struct bpf_func_state *frame = cur->frame[j];
11009 		struct bpf_func_state *newframe = new->frame[j];
11010 
11011 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
11012 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
11013 			frame->stack[i].spilled_ptr.parent =
11014 						&newframe->stack[i].spilled_ptr;
11015 		}
11016 	}
11017 	return 0;
11018 }
11019 
11020 /* Return true if it's OK to have the same insn return a different type. */
11021 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11022 {
11023 	switch (type) {
11024 	case PTR_TO_CTX:
11025 	case PTR_TO_SOCKET:
11026 	case PTR_TO_SOCKET_OR_NULL:
11027 	case PTR_TO_SOCK_COMMON:
11028 	case PTR_TO_SOCK_COMMON_OR_NULL:
11029 	case PTR_TO_TCP_SOCK:
11030 	case PTR_TO_TCP_SOCK_OR_NULL:
11031 	case PTR_TO_XDP_SOCK:
11032 	case PTR_TO_BTF_ID:
11033 	case PTR_TO_BTF_ID_OR_NULL:
11034 		return false;
11035 	default:
11036 		return true;
11037 	}
11038 }
11039 
11040 /* If an instruction was previously used with particular pointer types, then we
11041  * need to be careful to avoid cases such as the below, where it may be ok
11042  * for one branch accessing the pointer, but not ok for the other branch:
11043  *
11044  * R1 = sock_ptr
11045  * goto X;
11046  * ...
11047  * R1 = some_other_valid_ptr;
11048  * goto X;
11049  * ...
11050  * R2 = *(u32 *)(R1 + 0);
11051  */
11052 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11053 {
11054 	return src != prev && (!reg_type_mismatch_ok(src) ||
11055 			       !reg_type_mismatch_ok(prev));
11056 }
11057 
11058 static int do_check(struct bpf_verifier_env *env)
11059 {
11060 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11061 	struct bpf_verifier_state *state = env->cur_state;
11062 	struct bpf_insn *insns = env->prog->insnsi;
11063 	struct bpf_reg_state *regs;
11064 	int insn_cnt = env->prog->len;
11065 	bool do_print_state = false;
11066 	int prev_insn_idx = -1;
11067 
11068 	for (;;) {
11069 		struct bpf_insn *insn;
11070 		u8 class;
11071 		int err;
11072 
11073 		env->prev_insn_idx = prev_insn_idx;
11074 		if (env->insn_idx >= insn_cnt) {
11075 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
11076 				env->insn_idx, insn_cnt);
11077 			return -EFAULT;
11078 		}
11079 
11080 		insn = &insns[env->insn_idx];
11081 		class = BPF_CLASS(insn->code);
11082 
11083 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
11084 			verbose(env,
11085 				"BPF program is too large. Processed %d insn\n",
11086 				env->insn_processed);
11087 			return -E2BIG;
11088 		}
11089 
11090 		err = is_state_visited(env, env->insn_idx);
11091 		if (err < 0)
11092 			return err;
11093 		if (err == 1) {
11094 			/* found equivalent state, can prune the search */
11095 			if (env->log.level & BPF_LOG_LEVEL) {
11096 				if (do_print_state)
11097 					verbose(env, "\nfrom %d to %d%s: safe\n",
11098 						env->prev_insn_idx, env->insn_idx,
11099 						env->cur_state->speculative ?
11100 						" (speculative execution)" : "");
11101 				else
11102 					verbose(env, "%d: safe\n", env->insn_idx);
11103 			}
11104 			goto process_bpf_exit;
11105 		}
11106 
11107 		if (signal_pending(current))
11108 			return -EAGAIN;
11109 
11110 		if (need_resched())
11111 			cond_resched();
11112 
11113 		if (env->log.level & BPF_LOG_LEVEL2 ||
11114 		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
11115 			if (env->log.level & BPF_LOG_LEVEL2)
11116 				verbose(env, "%d:", env->insn_idx);
11117 			else
11118 				verbose(env, "\nfrom %d to %d%s:",
11119 					env->prev_insn_idx, env->insn_idx,
11120 					env->cur_state->speculative ?
11121 					" (speculative execution)" : "");
11122 			print_verifier_state(env, state->frame[state->curframe]);
11123 			do_print_state = false;
11124 		}
11125 
11126 		if (env->log.level & BPF_LOG_LEVEL) {
11127 			const struct bpf_insn_cbs cbs = {
11128 				.cb_call	= disasm_kfunc_name,
11129 				.cb_print	= verbose,
11130 				.private_data	= env,
11131 			};
11132 
11133 			verbose_linfo(env, env->insn_idx, "; ");
11134 			verbose(env, "%d: ", env->insn_idx);
11135 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
11136 		}
11137 
11138 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
11139 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11140 							   env->prev_insn_idx);
11141 			if (err)
11142 				return err;
11143 		}
11144 
11145 		regs = cur_regs(env);
11146 		sanitize_mark_insn_seen(env);
11147 		prev_insn_idx = env->insn_idx;
11148 
11149 		if (class == BPF_ALU || class == BPF_ALU64) {
11150 			err = check_alu_op(env, insn);
11151 			if (err)
11152 				return err;
11153 
11154 		} else if (class == BPF_LDX) {
11155 			enum bpf_reg_type *prev_src_type, src_reg_type;
11156 
11157 			/* check for reserved fields is already done */
11158 
11159 			/* check src operand */
11160 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
11161 			if (err)
11162 				return err;
11163 
11164 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11165 			if (err)
11166 				return err;
11167 
11168 			src_reg_type = regs[insn->src_reg].type;
11169 
11170 			/* check that memory (src_reg + off) is readable,
11171 			 * the state of dst_reg will be updated by this func
11172 			 */
11173 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
11174 					       insn->off, BPF_SIZE(insn->code),
11175 					       BPF_READ, insn->dst_reg, false);
11176 			if (err)
11177 				return err;
11178 
11179 			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11180 
11181 			if (*prev_src_type == NOT_INIT) {
11182 				/* saw a valid insn
11183 				 * dst_reg = *(u32 *)(src_reg + off)
11184 				 * save type to validate intersecting paths
11185 				 */
11186 				*prev_src_type = src_reg_type;
11187 
11188 			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
11189 				/* ABuser program is trying to use the same insn
11190 				 * dst_reg = *(u32*) (src_reg + off)
11191 				 * with different pointer types:
11192 				 * src_reg == ctx in one branch and
11193 				 * src_reg == stack|map in some other branch.
11194 				 * Reject it.
11195 				 */
11196 				verbose(env, "same insn cannot be used with different pointers\n");
11197 				return -EINVAL;
11198 			}
11199 
11200 		} else if (class == BPF_STX) {
11201 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
11202 
11203 			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11204 				err = check_atomic(env, env->insn_idx, insn);
11205 				if (err)
11206 					return err;
11207 				env->insn_idx++;
11208 				continue;
11209 			}
11210 
11211 			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11212 				verbose(env, "BPF_STX uses reserved fields\n");
11213 				return -EINVAL;
11214 			}
11215 
11216 			/* check src1 operand */
11217 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
11218 			if (err)
11219 				return err;
11220 			/* check src2 operand */
11221 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11222 			if (err)
11223 				return err;
11224 
11225 			dst_reg_type = regs[insn->dst_reg].type;
11226 
11227 			/* check that memory (dst_reg + off) is writeable */
11228 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11229 					       insn->off, BPF_SIZE(insn->code),
11230 					       BPF_WRITE, insn->src_reg, false);
11231 			if (err)
11232 				return err;
11233 
11234 			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11235 
11236 			if (*prev_dst_type == NOT_INIT) {
11237 				*prev_dst_type = dst_reg_type;
11238 			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
11239 				verbose(env, "same insn cannot be used with different pointers\n");
11240 				return -EINVAL;
11241 			}
11242 
11243 		} else if (class == BPF_ST) {
11244 			if (BPF_MODE(insn->code) != BPF_MEM ||
11245 			    insn->src_reg != BPF_REG_0) {
11246 				verbose(env, "BPF_ST uses reserved fields\n");
11247 				return -EINVAL;
11248 			}
11249 			/* check src operand */
11250 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11251 			if (err)
11252 				return err;
11253 
11254 			if (is_ctx_reg(env, insn->dst_reg)) {
11255 				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
11256 					insn->dst_reg,
11257 					reg_type_str[reg_state(env, insn->dst_reg)->type]);
11258 				return -EACCES;
11259 			}
11260 
11261 			/* check that memory (dst_reg + off) is writeable */
11262 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11263 					       insn->off, BPF_SIZE(insn->code),
11264 					       BPF_WRITE, -1, false);
11265 			if (err)
11266 				return err;
11267 
11268 		} else if (class == BPF_JMP || class == BPF_JMP32) {
11269 			u8 opcode = BPF_OP(insn->code);
11270 
11271 			env->jmps_processed++;
11272 			if (opcode == BPF_CALL) {
11273 				if (BPF_SRC(insn->code) != BPF_K ||
11274 				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11275 				     && insn->off != 0) ||
11276 				    (insn->src_reg != BPF_REG_0 &&
11277 				     insn->src_reg != BPF_PSEUDO_CALL &&
11278 				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
11279 				    insn->dst_reg != BPF_REG_0 ||
11280 				    class == BPF_JMP32) {
11281 					verbose(env, "BPF_CALL uses reserved fields\n");
11282 					return -EINVAL;
11283 				}
11284 
11285 				if (env->cur_state->active_spin_lock &&
11286 				    (insn->src_reg == BPF_PSEUDO_CALL ||
11287 				     insn->imm != BPF_FUNC_spin_unlock)) {
11288 					verbose(env, "function calls are not allowed while holding a lock\n");
11289 					return -EINVAL;
11290 				}
11291 				if (insn->src_reg == BPF_PSEUDO_CALL)
11292 					err = check_func_call(env, insn, &env->insn_idx);
11293 				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
11294 					err = check_kfunc_call(env, insn);
11295 				else
11296 					err = check_helper_call(env, insn, &env->insn_idx);
11297 				if (err)
11298 					return err;
11299 			} else if (opcode == BPF_JA) {
11300 				if (BPF_SRC(insn->code) != BPF_K ||
11301 				    insn->imm != 0 ||
11302 				    insn->src_reg != BPF_REG_0 ||
11303 				    insn->dst_reg != BPF_REG_0 ||
11304 				    class == BPF_JMP32) {
11305 					verbose(env, "BPF_JA uses reserved fields\n");
11306 					return -EINVAL;
11307 				}
11308 
11309 				env->insn_idx += insn->off + 1;
11310 				continue;
11311 
11312 			} else if (opcode == BPF_EXIT) {
11313 				if (BPF_SRC(insn->code) != BPF_K ||
11314 				    insn->imm != 0 ||
11315 				    insn->src_reg != BPF_REG_0 ||
11316 				    insn->dst_reg != BPF_REG_0 ||
11317 				    class == BPF_JMP32) {
11318 					verbose(env, "BPF_EXIT uses reserved fields\n");
11319 					return -EINVAL;
11320 				}
11321 
11322 				if (env->cur_state->active_spin_lock) {
11323 					verbose(env, "bpf_spin_unlock is missing\n");
11324 					return -EINVAL;
11325 				}
11326 
11327 				if (state->curframe) {
11328 					/* exit from nested function */
11329 					err = prepare_func_exit(env, &env->insn_idx);
11330 					if (err)
11331 						return err;
11332 					do_print_state = true;
11333 					continue;
11334 				}
11335 
11336 				err = check_reference_leak(env);
11337 				if (err)
11338 					return err;
11339 
11340 				err = check_return_code(env);
11341 				if (err)
11342 					return err;
11343 process_bpf_exit:
11344 				update_branch_counts(env, env->cur_state);
11345 				err = pop_stack(env, &prev_insn_idx,
11346 						&env->insn_idx, pop_log);
11347 				if (err < 0) {
11348 					if (err != -ENOENT)
11349 						return err;
11350 					break;
11351 				} else {
11352 					do_print_state = true;
11353 					continue;
11354 				}
11355 			} else {
11356 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
11357 				if (err)
11358 					return err;
11359 			}
11360 		} else if (class == BPF_LD) {
11361 			u8 mode = BPF_MODE(insn->code);
11362 
11363 			if (mode == BPF_ABS || mode == BPF_IND) {
11364 				err = check_ld_abs(env, insn);
11365 				if (err)
11366 					return err;
11367 
11368 			} else if (mode == BPF_IMM) {
11369 				err = check_ld_imm(env, insn);
11370 				if (err)
11371 					return err;
11372 
11373 				env->insn_idx++;
11374 				sanitize_mark_insn_seen(env);
11375 			} else {
11376 				verbose(env, "invalid BPF_LD mode\n");
11377 				return -EINVAL;
11378 			}
11379 		} else {
11380 			verbose(env, "unknown insn class %d\n", class);
11381 			return -EINVAL;
11382 		}
11383 
11384 		env->insn_idx++;
11385 	}
11386 
11387 	return 0;
11388 }
11389 
11390 static int find_btf_percpu_datasec(struct btf *btf)
11391 {
11392 	const struct btf_type *t;
11393 	const char *tname;
11394 	int i, n;
11395 
11396 	/*
11397 	 * Both vmlinux and module each have their own ".data..percpu"
11398 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
11399 	 * types to look at only module's own BTF types.
11400 	 */
11401 	n = btf_nr_types(btf);
11402 	if (btf_is_module(btf))
11403 		i = btf_nr_types(btf_vmlinux);
11404 	else
11405 		i = 1;
11406 
11407 	for(; i < n; i++) {
11408 		t = btf_type_by_id(btf, i);
11409 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
11410 			continue;
11411 
11412 		tname = btf_name_by_offset(btf, t->name_off);
11413 		if (!strcmp(tname, ".data..percpu"))
11414 			return i;
11415 	}
11416 
11417 	return -ENOENT;
11418 }
11419 
11420 /* replace pseudo btf_id with kernel symbol address */
11421 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
11422 			       struct bpf_insn *insn,
11423 			       struct bpf_insn_aux_data *aux)
11424 {
11425 	const struct btf_var_secinfo *vsi;
11426 	const struct btf_type *datasec;
11427 	struct btf_mod_pair *btf_mod;
11428 	const struct btf_type *t;
11429 	const char *sym_name;
11430 	bool percpu = false;
11431 	u32 type, id = insn->imm;
11432 	struct btf *btf;
11433 	s32 datasec_id;
11434 	u64 addr;
11435 	int i, btf_fd, err;
11436 
11437 	btf_fd = insn[1].imm;
11438 	if (btf_fd) {
11439 		btf = btf_get_by_fd(btf_fd);
11440 		if (IS_ERR(btf)) {
11441 			verbose(env, "invalid module BTF object FD specified.\n");
11442 			return -EINVAL;
11443 		}
11444 	} else {
11445 		if (!btf_vmlinux) {
11446 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
11447 			return -EINVAL;
11448 		}
11449 		btf = btf_vmlinux;
11450 		btf_get(btf);
11451 	}
11452 
11453 	t = btf_type_by_id(btf, id);
11454 	if (!t) {
11455 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
11456 		err = -ENOENT;
11457 		goto err_put;
11458 	}
11459 
11460 	if (!btf_type_is_var(t)) {
11461 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
11462 		err = -EINVAL;
11463 		goto err_put;
11464 	}
11465 
11466 	sym_name = btf_name_by_offset(btf, t->name_off);
11467 	addr = kallsyms_lookup_name(sym_name);
11468 	if (!addr) {
11469 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
11470 			sym_name);
11471 		err = -ENOENT;
11472 		goto err_put;
11473 	}
11474 
11475 	datasec_id = find_btf_percpu_datasec(btf);
11476 	if (datasec_id > 0) {
11477 		datasec = btf_type_by_id(btf, datasec_id);
11478 		for_each_vsi(i, datasec, vsi) {
11479 			if (vsi->type == id) {
11480 				percpu = true;
11481 				break;
11482 			}
11483 		}
11484 	}
11485 
11486 	insn[0].imm = (u32)addr;
11487 	insn[1].imm = addr >> 32;
11488 
11489 	type = t->type;
11490 	t = btf_type_skip_modifiers(btf, type, NULL);
11491 	if (percpu) {
11492 		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
11493 		aux->btf_var.btf = btf;
11494 		aux->btf_var.btf_id = type;
11495 	} else if (!btf_type_is_struct(t)) {
11496 		const struct btf_type *ret;
11497 		const char *tname;
11498 		u32 tsize;
11499 
11500 		/* resolve the type size of ksym. */
11501 		ret = btf_resolve_size(btf, t, &tsize);
11502 		if (IS_ERR(ret)) {
11503 			tname = btf_name_by_offset(btf, t->name_off);
11504 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
11505 				tname, PTR_ERR(ret));
11506 			err = -EINVAL;
11507 			goto err_put;
11508 		}
11509 		aux->btf_var.reg_type = PTR_TO_MEM;
11510 		aux->btf_var.mem_size = tsize;
11511 	} else {
11512 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
11513 		aux->btf_var.btf = btf;
11514 		aux->btf_var.btf_id = type;
11515 	}
11516 
11517 	/* check whether we recorded this BTF (and maybe module) already */
11518 	for (i = 0; i < env->used_btf_cnt; i++) {
11519 		if (env->used_btfs[i].btf == btf) {
11520 			btf_put(btf);
11521 			return 0;
11522 		}
11523 	}
11524 
11525 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
11526 		err = -E2BIG;
11527 		goto err_put;
11528 	}
11529 
11530 	btf_mod = &env->used_btfs[env->used_btf_cnt];
11531 	btf_mod->btf = btf;
11532 	btf_mod->module = NULL;
11533 
11534 	/* if we reference variables from kernel module, bump its refcount */
11535 	if (btf_is_module(btf)) {
11536 		btf_mod->module = btf_try_get_module(btf);
11537 		if (!btf_mod->module) {
11538 			err = -ENXIO;
11539 			goto err_put;
11540 		}
11541 	}
11542 
11543 	env->used_btf_cnt++;
11544 
11545 	return 0;
11546 err_put:
11547 	btf_put(btf);
11548 	return err;
11549 }
11550 
11551 static int check_map_prealloc(struct bpf_map *map)
11552 {
11553 	return (map->map_type != BPF_MAP_TYPE_HASH &&
11554 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
11555 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
11556 		!(map->map_flags & BPF_F_NO_PREALLOC);
11557 }
11558 
11559 static bool is_tracing_prog_type(enum bpf_prog_type type)
11560 {
11561 	switch (type) {
11562 	case BPF_PROG_TYPE_KPROBE:
11563 	case BPF_PROG_TYPE_TRACEPOINT:
11564 	case BPF_PROG_TYPE_PERF_EVENT:
11565 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
11566 		return true;
11567 	default:
11568 		return false;
11569 	}
11570 }
11571 
11572 static bool is_preallocated_map(struct bpf_map *map)
11573 {
11574 	if (!check_map_prealloc(map))
11575 		return false;
11576 	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
11577 		return false;
11578 	return true;
11579 }
11580 
11581 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
11582 					struct bpf_map *map,
11583 					struct bpf_prog *prog)
11584 
11585 {
11586 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
11587 	/*
11588 	 * Validate that trace type programs use preallocated hash maps.
11589 	 *
11590 	 * For programs attached to PERF events this is mandatory as the
11591 	 * perf NMI can hit any arbitrary code sequence.
11592 	 *
11593 	 * All other trace types using preallocated hash maps are unsafe as
11594 	 * well because tracepoint or kprobes can be inside locked regions
11595 	 * of the memory allocator or at a place where a recursion into the
11596 	 * memory allocator would see inconsistent state.
11597 	 *
11598 	 * On RT enabled kernels run-time allocation of all trace type
11599 	 * programs is strictly prohibited due to lock type constraints. On
11600 	 * !RT kernels it is allowed for backwards compatibility reasons for
11601 	 * now, but warnings are emitted so developers are made aware of
11602 	 * the unsafety and can fix their programs before this is enforced.
11603 	 */
11604 	if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
11605 		if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
11606 			verbose(env, "perf_event programs can only use preallocated hash map\n");
11607 			return -EINVAL;
11608 		}
11609 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
11610 			verbose(env, "trace type programs can only use preallocated hash map\n");
11611 			return -EINVAL;
11612 		}
11613 		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
11614 		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
11615 	}
11616 
11617 	if (map_value_has_spin_lock(map)) {
11618 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
11619 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
11620 			return -EINVAL;
11621 		}
11622 
11623 		if (is_tracing_prog_type(prog_type)) {
11624 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
11625 			return -EINVAL;
11626 		}
11627 
11628 		if (prog->aux->sleepable) {
11629 			verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
11630 			return -EINVAL;
11631 		}
11632 	}
11633 
11634 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
11635 	    !bpf_offload_prog_map_match(prog, map)) {
11636 		verbose(env, "offload device mismatch between prog and map\n");
11637 		return -EINVAL;
11638 	}
11639 
11640 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
11641 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
11642 		return -EINVAL;
11643 	}
11644 
11645 	if (prog->aux->sleepable)
11646 		switch (map->map_type) {
11647 		case BPF_MAP_TYPE_HASH:
11648 		case BPF_MAP_TYPE_LRU_HASH:
11649 		case BPF_MAP_TYPE_ARRAY:
11650 		case BPF_MAP_TYPE_PERCPU_HASH:
11651 		case BPF_MAP_TYPE_PERCPU_ARRAY:
11652 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
11653 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
11654 		case BPF_MAP_TYPE_HASH_OF_MAPS:
11655 			if (!is_preallocated_map(map)) {
11656 				verbose(env,
11657 					"Sleepable programs can only use preallocated maps\n");
11658 				return -EINVAL;
11659 			}
11660 			break;
11661 		case BPF_MAP_TYPE_RINGBUF:
11662 			break;
11663 		default:
11664 			verbose(env,
11665 				"Sleepable programs can only use array, hash, and ringbuf maps\n");
11666 			return -EINVAL;
11667 		}
11668 
11669 	return 0;
11670 }
11671 
11672 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
11673 {
11674 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
11675 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
11676 }
11677 
11678 /* find and rewrite pseudo imm in ld_imm64 instructions:
11679  *
11680  * 1. if it accesses map FD, replace it with actual map pointer.
11681  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
11682  *
11683  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
11684  */
11685 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
11686 {
11687 	struct bpf_insn *insn = env->prog->insnsi;
11688 	int insn_cnt = env->prog->len;
11689 	int i, j, err;
11690 
11691 	err = bpf_prog_calc_tag(env->prog);
11692 	if (err)
11693 		return err;
11694 
11695 	for (i = 0; i < insn_cnt; i++, insn++) {
11696 		if (BPF_CLASS(insn->code) == BPF_LDX &&
11697 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
11698 			verbose(env, "BPF_LDX uses reserved fields\n");
11699 			return -EINVAL;
11700 		}
11701 
11702 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
11703 			struct bpf_insn_aux_data *aux;
11704 			struct bpf_map *map;
11705 			struct fd f;
11706 			u64 addr;
11707 			u32 fd;
11708 
11709 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
11710 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
11711 			    insn[1].off != 0) {
11712 				verbose(env, "invalid bpf_ld_imm64 insn\n");
11713 				return -EINVAL;
11714 			}
11715 
11716 			if (insn[0].src_reg == 0)
11717 				/* valid generic load 64-bit imm */
11718 				goto next_insn;
11719 
11720 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
11721 				aux = &env->insn_aux_data[i];
11722 				err = check_pseudo_btf_id(env, insn, aux);
11723 				if (err)
11724 					return err;
11725 				goto next_insn;
11726 			}
11727 
11728 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
11729 				aux = &env->insn_aux_data[i];
11730 				aux->ptr_type = PTR_TO_FUNC;
11731 				goto next_insn;
11732 			}
11733 
11734 			/* In final convert_pseudo_ld_imm64() step, this is
11735 			 * converted into regular 64-bit imm load insn.
11736 			 */
11737 			switch (insn[0].src_reg) {
11738 			case BPF_PSEUDO_MAP_VALUE:
11739 			case BPF_PSEUDO_MAP_IDX_VALUE:
11740 				break;
11741 			case BPF_PSEUDO_MAP_FD:
11742 			case BPF_PSEUDO_MAP_IDX:
11743 				if (insn[1].imm == 0)
11744 					break;
11745 				fallthrough;
11746 			default:
11747 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
11748 				return -EINVAL;
11749 			}
11750 
11751 			switch (insn[0].src_reg) {
11752 			case BPF_PSEUDO_MAP_IDX_VALUE:
11753 			case BPF_PSEUDO_MAP_IDX:
11754 				if (bpfptr_is_null(env->fd_array)) {
11755 					verbose(env, "fd_idx without fd_array is invalid\n");
11756 					return -EPROTO;
11757 				}
11758 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
11759 							    insn[0].imm * sizeof(fd),
11760 							    sizeof(fd)))
11761 					return -EFAULT;
11762 				break;
11763 			default:
11764 				fd = insn[0].imm;
11765 				break;
11766 			}
11767 
11768 			f = fdget(fd);
11769 			map = __bpf_map_get(f);
11770 			if (IS_ERR(map)) {
11771 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
11772 					insn[0].imm);
11773 				return PTR_ERR(map);
11774 			}
11775 
11776 			err = check_map_prog_compatibility(env, map, env->prog);
11777 			if (err) {
11778 				fdput(f);
11779 				return err;
11780 			}
11781 
11782 			aux = &env->insn_aux_data[i];
11783 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
11784 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
11785 				addr = (unsigned long)map;
11786 			} else {
11787 				u32 off = insn[1].imm;
11788 
11789 				if (off >= BPF_MAX_VAR_OFF) {
11790 					verbose(env, "direct value offset of %u is not allowed\n", off);
11791 					fdput(f);
11792 					return -EINVAL;
11793 				}
11794 
11795 				if (!map->ops->map_direct_value_addr) {
11796 					verbose(env, "no direct value access support for this map type\n");
11797 					fdput(f);
11798 					return -EINVAL;
11799 				}
11800 
11801 				err = map->ops->map_direct_value_addr(map, &addr, off);
11802 				if (err) {
11803 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
11804 						map->value_size, off);
11805 					fdput(f);
11806 					return err;
11807 				}
11808 
11809 				aux->map_off = off;
11810 				addr += off;
11811 			}
11812 
11813 			insn[0].imm = (u32)addr;
11814 			insn[1].imm = addr >> 32;
11815 
11816 			/* check whether we recorded this map already */
11817 			for (j = 0; j < env->used_map_cnt; j++) {
11818 				if (env->used_maps[j] == map) {
11819 					aux->map_index = j;
11820 					fdput(f);
11821 					goto next_insn;
11822 				}
11823 			}
11824 
11825 			if (env->used_map_cnt >= MAX_USED_MAPS) {
11826 				fdput(f);
11827 				return -E2BIG;
11828 			}
11829 
11830 			/* hold the map. If the program is rejected by verifier,
11831 			 * the map will be released by release_maps() or it
11832 			 * will be used by the valid program until it's unloaded
11833 			 * and all maps are released in free_used_maps()
11834 			 */
11835 			bpf_map_inc(map);
11836 
11837 			aux->map_index = env->used_map_cnt;
11838 			env->used_maps[env->used_map_cnt++] = map;
11839 
11840 			if (bpf_map_is_cgroup_storage(map) &&
11841 			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
11842 				verbose(env, "only one cgroup storage of each type is allowed\n");
11843 				fdput(f);
11844 				return -EBUSY;
11845 			}
11846 
11847 			fdput(f);
11848 next_insn:
11849 			insn++;
11850 			i++;
11851 			continue;
11852 		}
11853 
11854 		/* Basic sanity check before we invest more work here. */
11855 		if (!bpf_opcode_in_insntable(insn->code)) {
11856 			verbose(env, "unknown opcode %02x\n", insn->code);
11857 			return -EINVAL;
11858 		}
11859 	}
11860 
11861 	/* now all pseudo BPF_LD_IMM64 instructions load valid
11862 	 * 'struct bpf_map *' into a register instead of user map_fd.
11863 	 * These pointers will be used later by verifier to validate map access.
11864 	 */
11865 	return 0;
11866 }
11867 
11868 /* drop refcnt of maps used by the rejected program */
11869 static void release_maps(struct bpf_verifier_env *env)
11870 {
11871 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
11872 			     env->used_map_cnt);
11873 }
11874 
11875 /* drop refcnt of maps used by the rejected program */
11876 static void release_btfs(struct bpf_verifier_env *env)
11877 {
11878 	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
11879 			     env->used_btf_cnt);
11880 }
11881 
11882 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
11883 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
11884 {
11885 	struct bpf_insn *insn = env->prog->insnsi;
11886 	int insn_cnt = env->prog->len;
11887 	int i;
11888 
11889 	for (i = 0; i < insn_cnt; i++, insn++) {
11890 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
11891 			continue;
11892 		if (insn->src_reg == BPF_PSEUDO_FUNC)
11893 			continue;
11894 		insn->src_reg = 0;
11895 	}
11896 }
11897 
11898 /* single env->prog->insni[off] instruction was replaced with the range
11899  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
11900  * [0, off) and [off, end) to new locations, so the patched range stays zero
11901  */
11902 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
11903 				 struct bpf_insn_aux_data *new_data,
11904 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
11905 {
11906 	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
11907 	struct bpf_insn *insn = new_prog->insnsi;
11908 	u32 old_seen = old_data[off].seen;
11909 	u32 prog_len;
11910 	int i;
11911 
11912 	/* aux info at OFF always needs adjustment, no matter fast path
11913 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
11914 	 * original insn at old prog.
11915 	 */
11916 	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
11917 
11918 	if (cnt == 1)
11919 		return;
11920 	prog_len = new_prog->len;
11921 
11922 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
11923 	memcpy(new_data + off + cnt - 1, old_data + off,
11924 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
11925 	for (i = off; i < off + cnt - 1; i++) {
11926 		/* Expand insni[off]'s seen count to the patched range. */
11927 		new_data[i].seen = old_seen;
11928 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
11929 	}
11930 	env->insn_aux_data = new_data;
11931 	vfree(old_data);
11932 }
11933 
11934 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
11935 {
11936 	int i;
11937 
11938 	if (len == 1)
11939 		return;
11940 	/* NOTE: fake 'exit' subprog should be updated as well. */
11941 	for (i = 0; i <= env->subprog_cnt; i++) {
11942 		if (env->subprog_info[i].start <= off)
11943 			continue;
11944 		env->subprog_info[i].start += len - 1;
11945 	}
11946 }
11947 
11948 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
11949 {
11950 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
11951 	int i, sz = prog->aux->size_poke_tab;
11952 	struct bpf_jit_poke_descriptor *desc;
11953 
11954 	for (i = 0; i < sz; i++) {
11955 		desc = &tab[i];
11956 		if (desc->insn_idx <= off)
11957 			continue;
11958 		desc->insn_idx += len - 1;
11959 	}
11960 }
11961 
11962 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
11963 					    const struct bpf_insn *patch, u32 len)
11964 {
11965 	struct bpf_prog *new_prog;
11966 	struct bpf_insn_aux_data *new_data = NULL;
11967 
11968 	if (len > 1) {
11969 		new_data = vzalloc(array_size(env->prog->len + len - 1,
11970 					      sizeof(struct bpf_insn_aux_data)));
11971 		if (!new_data)
11972 			return NULL;
11973 	}
11974 
11975 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
11976 	if (IS_ERR(new_prog)) {
11977 		if (PTR_ERR(new_prog) == -ERANGE)
11978 			verbose(env,
11979 				"insn %d cannot be patched due to 16-bit range\n",
11980 				env->insn_aux_data[off].orig_idx);
11981 		vfree(new_data);
11982 		return NULL;
11983 	}
11984 	adjust_insn_aux_data(env, new_data, new_prog, off, len);
11985 	adjust_subprog_starts(env, off, len);
11986 	adjust_poke_descs(new_prog, off, len);
11987 	return new_prog;
11988 }
11989 
11990 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
11991 					      u32 off, u32 cnt)
11992 {
11993 	int i, j;
11994 
11995 	/* find first prog starting at or after off (first to remove) */
11996 	for (i = 0; i < env->subprog_cnt; i++)
11997 		if (env->subprog_info[i].start >= off)
11998 			break;
11999 	/* find first prog starting at or after off + cnt (first to stay) */
12000 	for (j = i; j < env->subprog_cnt; j++)
12001 		if (env->subprog_info[j].start >= off + cnt)
12002 			break;
12003 	/* if j doesn't start exactly at off + cnt, we are just removing
12004 	 * the front of previous prog
12005 	 */
12006 	if (env->subprog_info[j].start != off + cnt)
12007 		j--;
12008 
12009 	if (j > i) {
12010 		struct bpf_prog_aux *aux = env->prog->aux;
12011 		int move;
12012 
12013 		/* move fake 'exit' subprog as well */
12014 		move = env->subprog_cnt + 1 - j;
12015 
12016 		memmove(env->subprog_info + i,
12017 			env->subprog_info + j,
12018 			sizeof(*env->subprog_info) * move);
12019 		env->subprog_cnt -= j - i;
12020 
12021 		/* remove func_info */
12022 		if (aux->func_info) {
12023 			move = aux->func_info_cnt - j;
12024 
12025 			memmove(aux->func_info + i,
12026 				aux->func_info + j,
12027 				sizeof(*aux->func_info) * move);
12028 			aux->func_info_cnt -= j - i;
12029 			/* func_info->insn_off is set after all code rewrites,
12030 			 * in adjust_btf_func() - no need to adjust
12031 			 */
12032 		}
12033 	} else {
12034 		/* convert i from "first prog to remove" to "first to adjust" */
12035 		if (env->subprog_info[i].start == off)
12036 			i++;
12037 	}
12038 
12039 	/* update fake 'exit' subprog as well */
12040 	for (; i <= env->subprog_cnt; i++)
12041 		env->subprog_info[i].start -= cnt;
12042 
12043 	return 0;
12044 }
12045 
12046 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12047 				      u32 cnt)
12048 {
12049 	struct bpf_prog *prog = env->prog;
12050 	u32 i, l_off, l_cnt, nr_linfo;
12051 	struct bpf_line_info *linfo;
12052 
12053 	nr_linfo = prog->aux->nr_linfo;
12054 	if (!nr_linfo)
12055 		return 0;
12056 
12057 	linfo = prog->aux->linfo;
12058 
12059 	/* find first line info to remove, count lines to be removed */
12060 	for (i = 0; i < nr_linfo; i++)
12061 		if (linfo[i].insn_off >= off)
12062 			break;
12063 
12064 	l_off = i;
12065 	l_cnt = 0;
12066 	for (; i < nr_linfo; i++)
12067 		if (linfo[i].insn_off < off + cnt)
12068 			l_cnt++;
12069 		else
12070 			break;
12071 
12072 	/* First live insn doesn't match first live linfo, it needs to "inherit"
12073 	 * last removed linfo.  prog is already modified, so prog->len == off
12074 	 * means no live instructions after (tail of the program was removed).
12075 	 */
12076 	if (prog->len != off && l_cnt &&
12077 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12078 		l_cnt--;
12079 		linfo[--i].insn_off = off + cnt;
12080 	}
12081 
12082 	/* remove the line info which refer to the removed instructions */
12083 	if (l_cnt) {
12084 		memmove(linfo + l_off, linfo + i,
12085 			sizeof(*linfo) * (nr_linfo - i));
12086 
12087 		prog->aux->nr_linfo -= l_cnt;
12088 		nr_linfo = prog->aux->nr_linfo;
12089 	}
12090 
12091 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
12092 	for (i = l_off; i < nr_linfo; i++)
12093 		linfo[i].insn_off -= cnt;
12094 
12095 	/* fix up all subprogs (incl. 'exit') which start >= off */
12096 	for (i = 0; i <= env->subprog_cnt; i++)
12097 		if (env->subprog_info[i].linfo_idx > l_off) {
12098 			/* program may have started in the removed region but
12099 			 * may not be fully removed
12100 			 */
12101 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12102 				env->subprog_info[i].linfo_idx -= l_cnt;
12103 			else
12104 				env->subprog_info[i].linfo_idx = l_off;
12105 		}
12106 
12107 	return 0;
12108 }
12109 
12110 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12111 {
12112 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12113 	unsigned int orig_prog_len = env->prog->len;
12114 	int err;
12115 
12116 	if (bpf_prog_is_dev_bound(env->prog->aux))
12117 		bpf_prog_offload_remove_insns(env, off, cnt);
12118 
12119 	err = bpf_remove_insns(env->prog, off, cnt);
12120 	if (err)
12121 		return err;
12122 
12123 	err = adjust_subprog_starts_after_remove(env, off, cnt);
12124 	if (err)
12125 		return err;
12126 
12127 	err = bpf_adj_linfo_after_remove(env, off, cnt);
12128 	if (err)
12129 		return err;
12130 
12131 	memmove(aux_data + off,	aux_data + off + cnt,
12132 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
12133 
12134 	return 0;
12135 }
12136 
12137 /* The verifier does more data flow analysis than llvm and will not
12138  * explore branches that are dead at run time. Malicious programs can
12139  * have dead code too. Therefore replace all dead at-run-time code
12140  * with 'ja -1'.
12141  *
12142  * Just nops are not optimal, e.g. if they would sit at the end of the
12143  * program and through another bug we would manage to jump there, then
12144  * we'd execute beyond program memory otherwise. Returning exception
12145  * code also wouldn't work since we can have subprogs where the dead
12146  * code could be located.
12147  */
12148 static void sanitize_dead_code(struct bpf_verifier_env *env)
12149 {
12150 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12151 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
12152 	struct bpf_insn *insn = env->prog->insnsi;
12153 	const int insn_cnt = env->prog->len;
12154 	int i;
12155 
12156 	for (i = 0; i < insn_cnt; i++) {
12157 		if (aux_data[i].seen)
12158 			continue;
12159 		memcpy(insn + i, &trap, sizeof(trap));
12160 		aux_data[i].zext_dst = false;
12161 	}
12162 }
12163 
12164 static bool insn_is_cond_jump(u8 code)
12165 {
12166 	u8 op;
12167 
12168 	if (BPF_CLASS(code) == BPF_JMP32)
12169 		return true;
12170 
12171 	if (BPF_CLASS(code) != BPF_JMP)
12172 		return false;
12173 
12174 	op = BPF_OP(code);
12175 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12176 }
12177 
12178 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12179 {
12180 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12181 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12182 	struct bpf_insn *insn = env->prog->insnsi;
12183 	const int insn_cnt = env->prog->len;
12184 	int i;
12185 
12186 	for (i = 0; i < insn_cnt; i++, insn++) {
12187 		if (!insn_is_cond_jump(insn->code))
12188 			continue;
12189 
12190 		if (!aux_data[i + 1].seen)
12191 			ja.off = insn->off;
12192 		else if (!aux_data[i + 1 + insn->off].seen)
12193 			ja.off = 0;
12194 		else
12195 			continue;
12196 
12197 		if (bpf_prog_is_dev_bound(env->prog->aux))
12198 			bpf_prog_offload_replace_insn(env, i, &ja);
12199 
12200 		memcpy(insn, &ja, sizeof(ja));
12201 	}
12202 }
12203 
12204 static int opt_remove_dead_code(struct bpf_verifier_env *env)
12205 {
12206 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12207 	int insn_cnt = env->prog->len;
12208 	int i, err;
12209 
12210 	for (i = 0; i < insn_cnt; i++) {
12211 		int j;
12212 
12213 		j = 0;
12214 		while (i + j < insn_cnt && !aux_data[i + j].seen)
12215 			j++;
12216 		if (!j)
12217 			continue;
12218 
12219 		err = verifier_remove_insns(env, i, j);
12220 		if (err)
12221 			return err;
12222 		insn_cnt = env->prog->len;
12223 	}
12224 
12225 	return 0;
12226 }
12227 
12228 static int opt_remove_nops(struct bpf_verifier_env *env)
12229 {
12230 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12231 	struct bpf_insn *insn = env->prog->insnsi;
12232 	int insn_cnt = env->prog->len;
12233 	int i, err;
12234 
12235 	for (i = 0; i < insn_cnt; i++) {
12236 		if (memcmp(&insn[i], &ja, sizeof(ja)))
12237 			continue;
12238 
12239 		err = verifier_remove_insns(env, i, 1);
12240 		if (err)
12241 			return err;
12242 		insn_cnt--;
12243 		i--;
12244 	}
12245 
12246 	return 0;
12247 }
12248 
12249 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12250 					 const union bpf_attr *attr)
12251 {
12252 	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
12253 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
12254 	int i, patch_len, delta = 0, len = env->prog->len;
12255 	struct bpf_insn *insns = env->prog->insnsi;
12256 	struct bpf_prog *new_prog;
12257 	bool rnd_hi32;
12258 
12259 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
12260 	zext_patch[1] = BPF_ZEXT_REG(0);
12261 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12262 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12263 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
12264 	for (i = 0; i < len; i++) {
12265 		int adj_idx = i + delta;
12266 		struct bpf_insn insn;
12267 		int load_reg;
12268 
12269 		insn = insns[adj_idx];
12270 		load_reg = insn_def_regno(&insn);
12271 		if (!aux[adj_idx].zext_dst) {
12272 			u8 code, class;
12273 			u32 imm_rnd;
12274 
12275 			if (!rnd_hi32)
12276 				continue;
12277 
12278 			code = insn.code;
12279 			class = BPF_CLASS(code);
12280 			if (load_reg == -1)
12281 				continue;
12282 
12283 			/* NOTE: arg "reg" (the fourth one) is only used for
12284 			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
12285 			 *       here.
12286 			 */
12287 			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
12288 				if (class == BPF_LD &&
12289 				    BPF_MODE(code) == BPF_IMM)
12290 					i++;
12291 				continue;
12292 			}
12293 
12294 			/* ctx load could be transformed into wider load. */
12295 			if (class == BPF_LDX &&
12296 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
12297 				continue;
12298 
12299 			imm_rnd = get_random_int();
12300 			rnd_hi32_patch[0] = insn;
12301 			rnd_hi32_patch[1].imm = imm_rnd;
12302 			rnd_hi32_patch[3].dst_reg = load_reg;
12303 			patch = rnd_hi32_patch;
12304 			patch_len = 4;
12305 			goto apply_patch_buffer;
12306 		}
12307 
12308 		/* Add in an zero-extend instruction if a) the JIT has requested
12309 		 * it or b) it's a CMPXCHG.
12310 		 *
12311 		 * The latter is because: BPF_CMPXCHG always loads a value into
12312 		 * R0, therefore always zero-extends. However some archs'
12313 		 * equivalent instruction only does this load when the
12314 		 * comparison is successful. This detail of CMPXCHG is
12315 		 * orthogonal to the general zero-extension behaviour of the
12316 		 * CPU, so it's treated independently of bpf_jit_needs_zext.
12317 		 */
12318 		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
12319 			continue;
12320 
12321 		if (WARN_ON(load_reg == -1)) {
12322 			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
12323 			return -EFAULT;
12324 		}
12325 
12326 		zext_patch[0] = insn;
12327 		zext_patch[1].dst_reg = load_reg;
12328 		zext_patch[1].src_reg = load_reg;
12329 		patch = zext_patch;
12330 		patch_len = 2;
12331 apply_patch_buffer:
12332 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
12333 		if (!new_prog)
12334 			return -ENOMEM;
12335 		env->prog = new_prog;
12336 		insns = new_prog->insnsi;
12337 		aux = env->insn_aux_data;
12338 		delta += patch_len - 1;
12339 	}
12340 
12341 	return 0;
12342 }
12343 
12344 /* convert load instructions that access fields of a context type into a
12345  * sequence of instructions that access fields of the underlying structure:
12346  *     struct __sk_buff    -> struct sk_buff
12347  *     struct bpf_sock_ops -> struct sock
12348  */
12349 static int convert_ctx_accesses(struct bpf_verifier_env *env)
12350 {
12351 	const struct bpf_verifier_ops *ops = env->ops;
12352 	int i, cnt, size, ctx_field_size, delta = 0;
12353 	const int insn_cnt = env->prog->len;
12354 	struct bpf_insn insn_buf[16], *insn;
12355 	u32 target_size, size_default, off;
12356 	struct bpf_prog *new_prog;
12357 	enum bpf_access_type type;
12358 	bool is_narrower_load;
12359 
12360 	if (ops->gen_prologue || env->seen_direct_write) {
12361 		if (!ops->gen_prologue) {
12362 			verbose(env, "bpf verifier is misconfigured\n");
12363 			return -EINVAL;
12364 		}
12365 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
12366 					env->prog);
12367 		if (cnt >= ARRAY_SIZE(insn_buf)) {
12368 			verbose(env, "bpf verifier is misconfigured\n");
12369 			return -EINVAL;
12370 		} else if (cnt) {
12371 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
12372 			if (!new_prog)
12373 				return -ENOMEM;
12374 
12375 			env->prog = new_prog;
12376 			delta += cnt - 1;
12377 		}
12378 	}
12379 
12380 	if (bpf_prog_is_dev_bound(env->prog->aux))
12381 		return 0;
12382 
12383 	insn = env->prog->insnsi + delta;
12384 
12385 	for (i = 0; i < insn_cnt; i++, insn++) {
12386 		bpf_convert_ctx_access_t convert_ctx_access;
12387 		bool ctx_access;
12388 
12389 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
12390 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
12391 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
12392 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
12393 			type = BPF_READ;
12394 			ctx_access = true;
12395 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
12396 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
12397 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
12398 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
12399 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
12400 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
12401 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
12402 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
12403 			type = BPF_WRITE;
12404 			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
12405 		} else {
12406 			continue;
12407 		}
12408 
12409 		if (type == BPF_WRITE &&
12410 		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
12411 			struct bpf_insn patch[] = {
12412 				*insn,
12413 				BPF_ST_NOSPEC(),
12414 			};
12415 
12416 			cnt = ARRAY_SIZE(patch);
12417 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
12418 			if (!new_prog)
12419 				return -ENOMEM;
12420 
12421 			delta    += cnt - 1;
12422 			env->prog = new_prog;
12423 			insn      = new_prog->insnsi + i + delta;
12424 			continue;
12425 		}
12426 
12427 		if (!ctx_access)
12428 			continue;
12429 
12430 		switch (env->insn_aux_data[i + delta].ptr_type) {
12431 		case PTR_TO_CTX:
12432 			if (!ops->convert_ctx_access)
12433 				continue;
12434 			convert_ctx_access = ops->convert_ctx_access;
12435 			break;
12436 		case PTR_TO_SOCKET:
12437 		case PTR_TO_SOCK_COMMON:
12438 			convert_ctx_access = bpf_sock_convert_ctx_access;
12439 			break;
12440 		case PTR_TO_TCP_SOCK:
12441 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
12442 			break;
12443 		case PTR_TO_XDP_SOCK:
12444 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
12445 			break;
12446 		case PTR_TO_BTF_ID:
12447 			if (type == BPF_READ) {
12448 				insn->code = BPF_LDX | BPF_PROBE_MEM |
12449 					BPF_SIZE((insn)->code);
12450 				env->prog->aux->num_exentries++;
12451 			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
12452 				verbose(env, "Writes through BTF pointers are not allowed\n");
12453 				return -EINVAL;
12454 			}
12455 			continue;
12456 		default:
12457 			continue;
12458 		}
12459 
12460 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
12461 		size = BPF_LDST_BYTES(insn);
12462 
12463 		/* If the read access is a narrower load of the field,
12464 		 * convert to a 4/8-byte load, to minimum program type specific
12465 		 * convert_ctx_access changes. If conversion is successful,
12466 		 * we will apply proper mask to the result.
12467 		 */
12468 		is_narrower_load = size < ctx_field_size;
12469 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
12470 		off = insn->off;
12471 		if (is_narrower_load) {
12472 			u8 size_code;
12473 
12474 			if (type == BPF_WRITE) {
12475 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
12476 				return -EINVAL;
12477 			}
12478 
12479 			size_code = BPF_H;
12480 			if (ctx_field_size == 4)
12481 				size_code = BPF_W;
12482 			else if (ctx_field_size == 8)
12483 				size_code = BPF_DW;
12484 
12485 			insn->off = off & ~(size_default - 1);
12486 			insn->code = BPF_LDX | BPF_MEM | size_code;
12487 		}
12488 
12489 		target_size = 0;
12490 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
12491 					 &target_size);
12492 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
12493 		    (ctx_field_size && !target_size)) {
12494 			verbose(env, "bpf verifier is misconfigured\n");
12495 			return -EINVAL;
12496 		}
12497 
12498 		if (is_narrower_load && size < target_size) {
12499 			u8 shift = bpf_ctx_narrow_access_offset(
12500 				off, size, size_default) * 8;
12501 			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
12502 				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
12503 				return -EINVAL;
12504 			}
12505 			if (ctx_field_size <= 4) {
12506 				if (shift)
12507 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
12508 									insn->dst_reg,
12509 									shift);
12510 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
12511 								(1 << size * 8) - 1);
12512 			} else {
12513 				if (shift)
12514 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
12515 									insn->dst_reg,
12516 									shift);
12517 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
12518 								(1ULL << size * 8) - 1);
12519 			}
12520 		}
12521 
12522 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12523 		if (!new_prog)
12524 			return -ENOMEM;
12525 
12526 		delta += cnt - 1;
12527 
12528 		/* keep walking new program and skip insns we just inserted */
12529 		env->prog = new_prog;
12530 		insn      = new_prog->insnsi + i + delta;
12531 	}
12532 
12533 	return 0;
12534 }
12535 
12536 static int jit_subprogs(struct bpf_verifier_env *env)
12537 {
12538 	struct bpf_prog *prog = env->prog, **func, *tmp;
12539 	int i, j, subprog_start, subprog_end = 0, len, subprog;
12540 	struct bpf_map *map_ptr;
12541 	struct bpf_insn *insn;
12542 	void *old_bpf_func;
12543 	int err, num_exentries;
12544 
12545 	if (env->subprog_cnt <= 1)
12546 		return 0;
12547 
12548 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12549 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
12550 			continue;
12551 
12552 		/* Upon error here we cannot fall back to interpreter but
12553 		 * need a hard reject of the program. Thus -EFAULT is
12554 		 * propagated in any case.
12555 		 */
12556 		subprog = find_subprog(env, i + insn->imm + 1);
12557 		if (subprog < 0) {
12558 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
12559 				  i + insn->imm + 1);
12560 			return -EFAULT;
12561 		}
12562 		/* temporarily remember subprog id inside insn instead of
12563 		 * aux_data, since next loop will split up all insns into funcs
12564 		 */
12565 		insn->off = subprog;
12566 		/* remember original imm in case JIT fails and fallback
12567 		 * to interpreter will be needed
12568 		 */
12569 		env->insn_aux_data[i].call_imm = insn->imm;
12570 		/* point imm to __bpf_call_base+1 from JITs point of view */
12571 		insn->imm = 1;
12572 		if (bpf_pseudo_func(insn))
12573 			/* jit (e.g. x86_64) may emit fewer instructions
12574 			 * if it learns a u32 imm is the same as a u64 imm.
12575 			 * Force a non zero here.
12576 			 */
12577 			insn[1].imm = 1;
12578 	}
12579 
12580 	err = bpf_prog_alloc_jited_linfo(prog);
12581 	if (err)
12582 		goto out_undo_insn;
12583 
12584 	err = -ENOMEM;
12585 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
12586 	if (!func)
12587 		goto out_undo_insn;
12588 
12589 	for (i = 0; i < env->subprog_cnt; i++) {
12590 		subprog_start = subprog_end;
12591 		subprog_end = env->subprog_info[i + 1].start;
12592 
12593 		len = subprog_end - subprog_start;
12594 		/* bpf_prog_run() doesn't call subprogs directly,
12595 		 * hence main prog stats include the runtime of subprogs.
12596 		 * subprogs don't have IDs and not reachable via prog_get_next_id
12597 		 * func[i]->stats will never be accessed and stays NULL
12598 		 */
12599 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
12600 		if (!func[i])
12601 			goto out_free;
12602 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
12603 		       len * sizeof(struct bpf_insn));
12604 		func[i]->type = prog->type;
12605 		func[i]->len = len;
12606 		if (bpf_prog_calc_tag(func[i]))
12607 			goto out_free;
12608 		func[i]->is_func = 1;
12609 		func[i]->aux->func_idx = i;
12610 		/* Below members will be freed only at prog->aux */
12611 		func[i]->aux->btf = prog->aux->btf;
12612 		func[i]->aux->func_info = prog->aux->func_info;
12613 		func[i]->aux->poke_tab = prog->aux->poke_tab;
12614 		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
12615 
12616 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
12617 			struct bpf_jit_poke_descriptor *poke;
12618 
12619 			poke = &prog->aux->poke_tab[j];
12620 			if (poke->insn_idx < subprog_end &&
12621 			    poke->insn_idx >= subprog_start)
12622 				poke->aux = func[i]->aux;
12623 		}
12624 
12625 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
12626 		 * Long term would need debug info to populate names
12627 		 */
12628 		func[i]->aux->name[0] = 'F';
12629 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
12630 		func[i]->jit_requested = 1;
12631 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
12632 		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
12633 		func[i]->aux->linfo = prog->aux->linfo;
12634 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
12635 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
12636 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
12637 		num_exentries = 0;
12638 		insn = func[i]->insnsi;
12639 		for (j = 0; j < func[i]->len; j++, insn++) {
12640 			if (BPF_CLASS(insn->code) == BPF_LDX &&
12641 			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
12642 				num_exentries++;
12643 		}
12644 		func[i]->aux->num_exentries = num_exentries;
12645 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
12646 		func[i] = bpf_int_jit_compile(func[i]);
12647 		if (!func[i]->jited) {
12648 			err = -ENOTSUPP;
12649 			goto out_free;
12650 		}
12651 		cond_resched();
12652 	}
12653 
12654 	/* at this point all bpf functions were successfully JITed
12655 	 * now populate all bpf_calls with correct addresses and
12656 	 * run last pass of JIT
12657 	 */
12658 	for (i = 0; i < env->subprog_cnt; i++) {
12659 		insn = func[i]->insnsi;
12660 		for (j = 0; j < func[i]->len; j++, insn++) {
12661 			if (bpf_pseudo_func(insn)) {
12662 				subprog = insn->off;
12663 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
12664 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
12665 				continue;
12666 			}
12667 			if (!bpf_pseudo_call(insn))
12668 				continue;
12669 			subprog = insn->off;
12670 			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
12671 		}
12672 
12673 		/* we use the aux data to keep a list of the start addresses
12674 		 * of the JITed images for each function in the program
12675 		 *
12676 		 * for some architectures, such as powerpc64, the imm field
12677 		 * might not be large enough to hold the offset of the start
12678 		 * address of the callee's JITed image from __bpf_call_base
12679 		 *
12680 		 * in such cases, we can lookup the start address of a callee
12681 		 * by using its subprog id, available from the off field of
12682 		 * the call instruction, as an index for this list
12683 		 */
12684 		func[i]->aux->func = func;
12685 		func[i]->aux->func_cnt = env->subprog_cnt;
12686 	}
12687 	for (i = 0; i < env->subprog_cnt; i++) {
12688 		old_bpf_func = func[i]->bpf_func;
12689 		tmp = bpf_int_jit_compile(func[i]);
12690 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
12691 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
12692 			err = -ENOTSUPP;
12693 			goto out_free;
12694 		}
12695 		cond_resched();
12696 	}
12697 
12698 	/* finally lock prog and jit images for all functions and
12699 	 * populate kallsysm
12700 	 */
12701 	for (i = 0; i < env->subprog_cnt; i++) {
12702 		bpf_prog_lock_ro(func[i]);
12703 		bpf_prog_kallsyms_add(func[i]);
12704 	}
12705 
12706 	/* Last step: make now unused interpreter insns from main
12707 	 * prog consistent for later dump requests, so they can
12708 	 * later look the same as if they were interpreted only.
12709 	 */
12710 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12711 		if (bpf_pseudo_func(insn)) {
12712 			insn[0].imm = env->insn_aux_data[i].call_imm;
12713 			insn[1].imm = insn->off;
12714 			insn->off = 0;
12715 			continue;
12716 		}
12717 		if (!bpf_pseudo_call(insn))
12718 			continue;
12719 		insn->off = env->insn_aux_data[i].call_imm;
12720 		subprog = find_subprog(env, i + insn->off + 1);
12721 		insn->imm = subprog;
12722 	}
12723 
12724 	prog->jited = 1;
12725 	prog->bpf_func = func[0]->bpf_func;
12726 	prog->aux->func = func;
12727 	prog->aux->func_cnt = env->subprog_cnt;
12728 	bpf_prog_jit_attempt_done(prog);
12729 	return 0;
12730 out_free:
12731 	/* We failed JIT'ing, so at this point we need to unregister poke
12732 	 * descriptors from subprogs, so that kernel is not attempting to
12733 	 * patch it anymore as we're freeing the subprog JIT memory.
12734 	 */
12735 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
12736 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
12737 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
12738 	}
12739 	/* At this point we're guaranteed that poke descriptors are not
12740 	 * live anymore. We can just unlink its descriptor table as it's
12741 	 * released with the main prog.
12742 	 */
12743 	for (i = 0; i < env->subprog_cnt; i++) {
12744 		if (!func[i])
12745 			continue;
12746 		func[i]->aux->poke_tab = NULL;
12747 		bpf_jit_free(func[i]);
12748 	}
12749 	kfree(func);
12750 out_undo_insn:
12751 	/* cleanup main prog to be interpreted */
12752 	prog->jit_requested = 0;
12753 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12754 		if (!bpf_pseudo_call(insn))
12755 			continue;
12756 		insn->off = 0;
12757 		insn->imm = env->insn_aux_data[i].call_imm;
12758 	}
12759 	bpf_prog_jit_attempt_done(prog);
12760 	return err;
12761 }
12762 
12763 static int fixup_call_args(struct bpf_verifier_env *env)
12764 {
12765 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
12766 	struct bpf_prog *prog = env->prog;
12767 	struct bpf_insn *insn = prog->insnsi;
12768 	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
12769 	int i, depth;
12770 #endif
12771 	int err = 0;
12772 
12773 	if (env->prog->jit_requested &&
12774 	    !bpf_prog_is_dev_bound(env->prog->aux)) {
12775 		err = jit_subprogs(env);
12776 		if (err == 0)
12777 			return 0;
12778 		if (err == -EFAULT)
12779 			return err;
12780 	}
12781 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
12782 	if (has_kfunc_call) {
12783 		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
12784 		return -EINVAL;
12785 	}
12786 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
12787 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
12788 		 * have to be rejected, since interpreter doesn't support them yet.
12789 		 */
12790 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
12791 		return -EINVAL;
12792 	}
12793 	for (i = 0; i < prog->len; i++, insn++) {
12794 		if (bpf_pseudo_func(insn)) {
12795 			/* When JIT fails the progs with callback calls
12796 			 * have to be rejected, since interpreter doesn't support them yet.
12797 			 */
12798 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
12799 			return -EINVAL;
12800 		}
12801 
12802 		if (!bpf_pseudo_call(insn))
12803 			continue;
12804 		depth = get_callee_stack_depth(env, insn, i);
12805 		if (depth < 0)
12806 			return depth;
12807 		bpf_patch_call_args(insn, depth);
12808 	}
12809 	err = 0;
12810 #endif
12811 	return err;
12812 }
12813 
12814 static int fixup_kfunc_call(struct bpf_verifier_env *env,
12815 			    struct bpf_insn *insn)
12816 {
12817 	const struct bpf_kfunc_desc *desc;
12818 
12819 	if (!insn->imm) {
12820 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
12821 		return -EINVAL;
12822 	}
12823 
12824 	/* insn->imm has the btf func_id. Replace it with
12825 	 * an address (relative to __bpf_base_call).
12826 	 */
12827 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
12828 	if (!desc) {
12829 		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
12830 			insn->imm);
12831 		return -EFAULT;
12832 	}
12833 
12834 	insn->imm = desc->imm;
12835 
12836 	return 0;
12837 }
12838 
12839 /* Do various post-verification rewrites in a single program pass.
12840  * These rewrites simplify JIT and interpreter implementations.
12841  */
12842 static int do_misc_fixups(struct bpf_verifier_env *env)
12843 {
12844 	struct bpf_prog *prog = env->prog;
12845 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
12846 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
12847 	struct bpf_insn *insn = prog->insnsi;
12848 	const struct bpf_func_proto *fn;
12849 	const int insn_cnt = prog->len;
12850 	const struct bpf_map_ops *ops;
12851 	struct bpf_insn_aux_data *aux;
12852 	struct bpf_insn insn_buf[16];
12853 	struct bpf_prog *new_prog;
12854 	struct bpf_map *map_ptr;
12855 	int i, ret, cnt, delta = 0;
12856 
12857 	for (i = 0; i < insn_cnt; i++, insn++) {
12858 		/* Make divide-by-zero exceptions impossible. */
12859 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
12860 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
12861 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
12862 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
12863 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
12864 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
12865 			struct bpf_insn *patchlet;
12866 			struct bpf_insn chk_and_div[] = {
12867 				/* [R,W]x div 0 -> 0 */
12868 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12869 					     BPF_JNE | BPF_K, insn->src_reg,
12870 					     0, 2, 0),
12871 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
12872 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12873 				*insn,
12874 			};
12875 			struct bpf_insn chk_and_mod[] = {
12876 				/* [R,W]x mod 0 -> [R,W]x */
12877 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12878 					     BPF_JEQ | BPF_K, insn->src_reg,
12879 					     0, 1 + (is64 ? 0 : 1), 0),
12880 				*insn,
12881 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12882 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
12883 			};
12884 
12885 			patchlet = isdiv ? chk_and_div : chk_and_mod;
12886 			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
12887 				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
12888 
12889 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
12890 			if (!new_prog)
12891 				return -ENOMEM;
12892 
12893 			delta    += cnt - 1;
12894 			env->prog = prog = new_prog;
12895 			insn      = new_prog->insnsi + i + delta;
12896 			continue;
12897 		}
12898 
12899 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
12900 		if (BPF_CLASS(insn->code) == BPF_LD &&
12901 		    (BPF_MODE(insn->code) == BPF_ABS ||
12902 		     BPF_MODE(insn->code) == BPF_IND)) {
12903 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
12904 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
12905 				verbose(env, "bpf verifier is misconfigured\n");
12906 				return -EINVAL;
12907 			}
12908 
12909 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12910 			if (!new_prog)
12911 				return -ENOMEM;
12912 
12913 			delta    += cnt - 1;
12914 			env->prog = prog = new_prog;
12915 			insn      = new_prog->insnsi + i + delta;
12916 			continue;
12917 		}
12918 
12919 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
12920 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
12921 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
12922 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
12923 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
12924 			struct bpf_insn *patch = &insn_buf[0];
12925 			bool issrc, isneg, isimm;
12926 			u32 off_reg;
12927 
12928 			aux = &env->insn_aux_data[i + delta];
12929 			if (!aux->alu_state ||
12930 			    aux->alu_state == BPF_ALU_NON_POINTER)
12931 				continue;
12932 
12933 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
12934 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
12935 				BPF_ALU_SANITIZE_SRC;
12936 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
12937 
12938 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
12939 			if (isimm) {
12940 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12941 			} else {
12942 				if (isneg)
12943 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
12944 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12945 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
12946 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
12947 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
12948 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
12949 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
12950 			}
12951 			if (!issrc)
12952 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
12953 			insn->src_reg = BPF_REG_AX;
12954 			if (isneg)
12955 				insn->code = insn->code == code_add ?
12956 					     code_sub : code_add;
12957 			*patch++ = *insn;
12958 			if (issrc && isneg && !isimm)
12959 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
12960 			cnt = patch - insn_buf;
12961 
12962 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12963 			if (!new_prog)
12964 				return -ENOMEM;
12965 
12966 			delta    += cnt - 1;
12967 			env->prog = prog = new_prog;
12968 			insn      = new_prog->insnsi + i + delta;
12969 			continue;
12970 		}
12971 
12972 		if (insn->code != (BPF_JMP | BPF_CALL))
12973 			continue;
12974 		if (insn->src_reg == BPF_PSEUDO_CALL)
12975 			continue;
12976 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
12977 			ret = fixup_kfunc_call(env, insn);
12978 			if (ret)
12979 				return ret;
12980 			continue;
12981 		}
12982 
12983 		if (insn->imm == BPF_FUNC_get_route_realm)
12984 			prog->dst_needed = 1;
12985 		if (insn->imm == BPF_FUNC_get_prandom_u32)
12986 			bpf_user_rnd_init_once();
12987 		if (insn->imm == BPF_FUNC_override_return)
12988 			prog->kprobe_override = 1;
12989 		if (insn->imm == BPF_FUNC_tail_call) {
12990 			/* If we tail call into other programs, we
12991 			 * cannot make any assumptions since they can
12992 			 * be replaced dynamically during runtime in
12993 			 * the program array.
12994 			 */
12995 			prog->cb_access = 1;
12996 			if (!allow_tail_call_in_subprogs(env))
12997 				prog->aux->stack_depth = MAX_BPF_STACK;
12998 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
12999 
13000 			/* mark bpf_tail_call as different opcode to avoid
13001 			 * conditional branch in the interpreter for every normal
13002 			 * call and to prevent accidental JITing by JIT compiler
13003 			 * that doesn't support bpf_tail_call yet
13004 			 */
13005 			insn->imm = 0;
13006 			insn->code = BPF_JMP | BPF_TAIL_CALL;
13007 
13008 			aux = &env->insn_aux_data[i + delta];
13009 			if (env->bpf_capable && !expect_blinding &&
13010 			    prog->jit_requested &&
13011 			    !bpf_map_key_poisoned(aux) &&
13012 			    !bpf_map_ptr_poisoned(aux) &&
13013 			    !bpf_map_ptr_unpriv(aux)) {
13014 				struct bpf_jit_poke_descriptor desc = {
13015 					.reason = BPF_POKE_REASON_TAIL_CALL,
13016 					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13017 					.tail_call.key = bpf_map_key_immediate(aux),
13018 					.insn_idx = i + delta,
13019 				};
13020 
13021 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
13022 				if (ret < 0) {
13023 					verbose(env, "adding tail call poke descriptor failed\n");
13024 					return ret;
13025 				}
13026 
13027 				insn->imm = ret + 1;
13028 				continue;
13029 			}
13030 
13031 			if (!bpf_map_ptr_unpriv(aux))
13032 				continue;
13033 
13034 			/* instead of changing every JIT dealing with tail_call
13035 			 * emit two extra insns:
13036 			 * if (index >= max_entries) goto out;
13037 			 * index &= array->index_mask;
13038 			 * to avoid out-of-bounds cpu speculation
13039 			 */
13040 			if (bpf_map_ptr_poisoned(aux)) {
13041 				verbose(env, "tail_call abusing map_ptr\n");
13042 				return -EINVAL;
13043 			}
13044 
13045 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13046 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13047 						  map_ptr->max_entries, 2);
13048 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13049 						    container_of(map_ptr,
13050 								 struct bpf_array,
13051 								 map)->index_mask);
13052 			insn_buf[2] = *insn;
13053 			cnt = 3;
13054 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13055 			if (!new_prog)
13056 				return -ENOMEM;
13057 
13058 			delta    += cnt - 1;
13059 			env->prog = prog = new_prog;
13060 			insn      = new_prog->insnsi + i + delta;
13061 			continue;
13062 		}
13063 
13064 		if (insn->imm == BPF_FUNC_timer_set_callback) {
13065 			/* The verifier will process callback_fn as many times as necessary
13066 			 * with different maps and the register states prepared by
13067 			 * set_timer_callback_state will be accurate.
13068 			 *
13069 			 * The following use case is valid:
13070 			 *   map1 is shared by prog1, prog2, prog3.
13071 			 *   prog1 calls bpf_timer_init for some map1 elements
13072 			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
13073 			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
13074 			 *   prog3 calls bpf_timer_start for some map1 elements.
13075 			 *     Those that were not both bpf_timer_init-ed and
13076 			 *     bpf_timer_set_callback-ed will return -EINVAL.
13077 			 */
13078 			struct bpf_insn ld_addrs[2] = {
13079 				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13080 			};
13081 
13082 			insn_buf[0] = ld_addrs[0];
13083 			insn_buf[1] = ld_addrs[1];
13084 			insn_buf[2] = *insn;
13085 			cnt = 3;
13086 
13087 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13088 			if (!new_prog)
13089 				return -ENOMEM;
13090 
13091 			delta    += cnt - 1;
13092 			env->prog = prog = new_prog;
13093 			insn      = new_prog->insnsi + i + delta;
13094 			goto patch_call_imm;
13095 		}
13096 
13097 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
13098 		 * and other inlining handlers are currently limited to 64 bit
13099 		 * only.
13100 		 */
13101 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
13102 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
13103 		     insn->imm == BPF_FUNC_map_update_elem ||
13104 		     insn->imm == BPF_FUNC_map_delete_elem ||
13105 		     insn->imm == BPF_FUNC_map_push_elem   ||
13106 		     insn->imm == BPF_FUNC_map_pop_elem    ||
13107 		     insn->imm == BPF_FUNC_map_peek_elem   ||
13108 		     insn->imm == BPF_FUNC_redirect_map    ||
13109 		     insn->imm == BPF_FUNC_for_each_map_elem)) {
13110 			aux = &env->insn_aux_data[i + delta];
13111 			if (bpf_map_ptr_poisoned(aux))
13112 				goto patch_call_imm;
13113 
13114 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13115 			ops = map_ptr->ops;
13116 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
13117 			    ops->map_gen_lookup) {
13118 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
13119 				if (cnt == -EOPNOTSUPP)
13120 					goto patch_map_ops_generic;
13121 				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13122 					verbose(env, "bpf verifier is misconfigured\n");
13123 					return -EINVAL;
13124 				}
13125 
13126 				new_prog = bpf_patch_insn_data(env, i + delta,
13127 							       insn_buf, cnt);
13128 				if (!new_prog)
13129 					return -ENOMEM;
13130 
13131 				delta    += cnt - 1;
13132 				env->prog = prog = new_prog;
13133 				insn      = new_prog->insnsi + i + delta;
13134 				continue;
13135 			}
13136 
13137 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13138 				     (void *(*)(struct bpf_map *map, void *key))NULL));
13139 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13140 				     (int (*)(struct bpf_map *map, void *key))NULL));
13141 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13142 				     (int (*)(struct bpf_map *map, void *key, void *value,
13143 					      u64 flags))NULL));
13144 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13145 				     (int (*)(struct bpf_map *map, void *value,
13146 					      u64 flags))NULL));
13147 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13148 				     (int (*)(struct bpf_map *map, void *value))NULL));
13149 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13150 				     (int (*)(struct bpf_map *map, void *value))NULL));
13151 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
13152 				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
13153 			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13154 				     (int (*)(struct bpf_map *map,
13155 					      bpf_callback_t callback_fn,
13156 					      void *callback_ctx,
13157 					      u64 flags))NULL));
13158 
13159 patch_map_ops_generic:
13160 			switch (insn->imm) {
13161 			case BPF_FUNC_map_lookup_elem:
13162 				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
13163 				continue;
13164 			case BPF_FUNC_map_update_elem:
13165 				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
13166 				continue;
13167 			case BPF_FUNC_map_delete_elem:
13168 				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
13169 				continue;
13170 			case BPF_FUNC_map_push_elem:
13171 				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
13172 				continue;
13173 			case BPF_FUNC_map_pop_elem:
13174 				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
13175 				continue;
13176 			case BPF_FUNC_map_peek_elem:
13177 				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
13178 				continue;
13179 			case BPF_FUNC_redirect_map:
13180 				insn->imm = BPF_CALL_IMM(ops->map_redirect);
13181 				continue;
13182 			case BPF_FUNC_for_each_map_elem:
13183 				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
13184 				continue;
13185 			}
13186 
13187 			goto patch_call_imm;
13188 		}
13189 
13190 		/* Implement bpf_jiffies64 inline. */
13191 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
13192 		    insn->imm == BPF_FUNC_jiffies64) {
13193 			struct bpf_insn ld_jiffies_addr[2] = {
13194 				BPF_LD_IMM64(BPF_REG_0,
13195 					     (unsigned long)&jiffies),
13196 			};
13197 
13198 			insn_buf[0] = ld_jiffies_addr[0];
13199 			insn_buf[1] = ld_jiffies_addr[1];
13200 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13201 						  BPF_REG_0, 0);
13202 			cnt = 3;
13203 
13204 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13205 						       cnt);
13206 			if (!new_prog)
13207 				return -ENOMEM;
13208 
13209 			delta    += cnt - 1;
13210 			env->prog = prog = new_prog;
13211 			insn      = new_prog->insnsi + i + delta;
13212 			continue;
13213 		}
13214 
13215 		/* Implement bpf_get_func_ip inline. */
13216 		if (prog_type == BPF_PROG_TYPE_TRACING &&
13217 		    insn->imm == BPF_FUNC_get_func_ip) {
13218 			/* Load IP address from ctx - 8 */
13219 			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13220 
13221 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13222 			if (!new_prog)
13223 				return -ENOMEM;
13224 
13225 			env->prog = prog = new_prog;
13226 			insn      = new_prog->insnsi + i + delta;
13227 			continue;
13228 		}
13229 
13230 patch_call_imm:
13231 		fn = env->ops->get_func_proto(insn->imm, env->prog);
13232 		/* all functions that have prototype and verifier allowed
13233 		 * programs to call them, must be real in-kernel functions
13234 		 */
13235 		if (!fn->func) {
13236 			verbose(env,
13237 				"kernel subsystem misconfigured func %s#%d\n",
13238 				func_id_name(insn->imm), insn->imm);
13239 			return -EFAULT;
13240 		}
13241 		insn->imm = fn->func - __bpf_call_base;
13242 	}
13243 
13244 	/* Since poke tab is now finalized, publish aux to tracker. */
13245 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
13246 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
13247 		if (!map_ptr->ops->map_poke_track ||
13248 		    !map_ptr->ops->map_poke_untrack ||
13249 		    !map_ptr->ops->map_poke_run) {
13250 			verbose(env, "bpf verifier is misconfigured\n");
13251 			return -EINVAL;
13252 		}
13253 
13254 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
13255 		if (ret < 0) {
13256 			verbose(env, "tracking tail call prog failed\n");
13257 			return ret;
13258 		}
13259 	}
13260 
13261 	sort_kfunc_descs_by_imm(env->prog);
13262 
13263 	return 0;
13264 }
13265 
13266 static void free_states(struct bpf_verifier_env *env)
13267 {
13268 	struct bpf_verifier_state_list *sl, *sln;
13269 	int i;
13270 
13271 	sl = env->free_list;
13272 	while (sl) {
13273 		sln = sl->next;
13274 		free_verifier_state(&sl->state, false);
13275 		kfree(sl);
13276 		sl = sln;
13277 	}
13278 	env->free_list = NULL;
13279 
13280 	if (!env->explored_states)
13281 		return;
13282 
13283 	for (i = 0; i < state_htab_size(env); i++) {
13284 		sl = env->explored_states[i];
13285 
13286 		while (sl) {
13287 			sln = sl->next;
13288 			free_verifier_state(&sl->state, false);
13289 			kfree(sl);
13290 			sl = sln;
13291 		}
13292 		env->explored_states[i] = NULL;
13293 	}
13294 }
13295 
13296 static int do_check_common(struct bpf_verifier_env *env, int subprog)
13297 {
13298 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13299 	struct bpf_verifier_state *state;
13300 	struct bpf_reg_state *regs;
13301 	int ret, i;
13302 
13303 	env->prev_linfo = NULL;
13304 	env->pass_cnt++;
13305 
13306 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
13307 	if (!state)
13308 		return -ENOMEM;
13309 	state->curframe = 0;
13310 	state->speculative = false;
13311 	state->branches = 1;
13312 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
13313 	if (!state->frame[0]) {
13314 		kfree(state);
13315 		return -ENOMEM;
13316 	}
13317 	env->cur_state = state;
13318 	init_func_state(env, state->frame[0],
13319 			BPF_MAIN_FUNC /* callsite */,
13320 			0 /* frameno */,
13321 			subprog);
13322 
13323 	regs = state->frame[state->curframe]->regs;
13324 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
13325 		ret = btf_prepare_func_args(env, subprog, regs);
13326 		if (ret)
13327 			goto out;
13328 		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
13329 			if (regs[i].type == PTR_TO_CTX)
13330 				mark_reg_known_zero(env, regs, i);
13331 			else if (regs[i].type == SCALAR_VALUE)
13332 				mark_reg_unknown(env, regs, i);
13333 			else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
13334 				const u32 mem_size = regs[i].mem_size;
13335 
13336 				mark_reg_known_zero(env, regs, i);
13337 				regs[i].mem_size = mem_size;
13338 				regs[i].id = ++env->id_gen;
13339 			}
13340 		}
13341 	} else {
13342 		/* 1st arg to a function */
13343 		regs[BPF_REG_1].type = PTR_TO_CTX;
13344 		mark_reg_known_zero(env, regs, BPF_REG_1);
13345 		ret = btf_check_subprog_arg_match(env, subprog, regs);
13346 		if (ret == -EFAULT)
13347 			/* unlikely verifier bug. abort.
13348 			 * ret == 0 and ret < 0 are sadly acceptable for
13349 			 * main() function due to backward compatibility.
13350 			 * Like socket filter program may be written as:
13351 			 * int bpf_prog(struct pt_regs *ctx)
13352 			 * and never dereference that ctx in the program.
13353 			 * 'struct pt_regs' is a type mismatch for socket
13354 			 * filter that should be using 'struct __sk_buff'.
13355 			 */
13356 			goto out;
13357 	}
13358 
13359 	ret = do_check(env);
13360 out:
13361 	/* check for NULL is necessary, since cur_state can be freed inside
13362 	 * do_check() under memory pressure.
13363 	 */
13364 	if (env->cur_state) {
13365 		free_verifier_state(env->cur_state, true);
13366 		env->cur_state = NULL;
13367 	}
13368 	while (!pop_stack(env, NULL, NULL, false));
13369 	if (!ret && pop_log)
13370 		bpf_vlog_reset(&env->log, 0);
13371 	free_states(env);
13372 	return ret;
13373 }
13374 
13375 /* Verify all global functions in a BPF program one by one based on their BTF.
13376  * All global functions must pass verification. Otherwise the whole program is rejected.
13377  * Consider:
13378  * int bar(int);
13379  * int foo(int f)
13380  * {
13381  *    return bar(f);
13382  * }
13383  * int bar(int b)
13384  * {
13385  *    ...
13386  * }
13387  * foo() will be verified first for R1=any_scalar_value. During verification it
13388  * will be assumed that bar() already verified successfully and call to bar()
13389  * from foo() will be checked for type match only. Later bar() will be verified
13390  * independently to check that it's safe for R1=any_scalar_value.
13391  */
13392 static int do_check_subprogs(struct bpf_verifier_env *env)
13393 {
13394 	struct bpf_prog_aux *aux = env->prog->aux;
13395 	int i, ret;
13396 
13397 	if (!aux->func_info)
13398 		return 0;
13399 
13400 	for (i = 1; i < env->subprog_cnt; i++) {
13401 		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
13402 			continue;
13403 		env->insn_idx = env->subprog_info[i].start;
13404 		WARN_ON_ONCE(env->insn_idx == 0);
13405 		ret = do_check_common(env, i);
13406 		if (ret) {
13407 			return ret;
13408 		} else if (env->log.level & BPF_LOG_LEVEL) {
13409 			verbose(env,
13410 				"Func#%d is safe for any args that match its prototype\n",
13411 				i);
13412 		}
13413 	}
13414 	return 0;
13415 }
13416 
13417 static int do_check_main(struct bpf_verifier_env *env)
13418 {
13419 	int ret;
13420 
13421 	env->insn_idx = 0;
13422 	ret = do_check_common(env, 0);
13423 	if (!ret)
13424 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
13425 	return ret;
13426 }
13427 
13428 
13429 static void print_verification_stats(struct bpf_verifier_env *env)
13430 {
13431 	int i;
13432 
13433 	if (env->log.level & BPF_LOG_STATS) {
13434 		verbose(env, "verification time %lld usec\n",
13435 			div_u64(env->verification_time, 1000));
13436 		verbose(env, "stack depth ");
13437 		for (i = 0; i < env->subprog_cnt; i++) {
13438 			u32 depth = env->subprog_info[i].stack_depth;
13439 
13440 			verbose(env, "%d", depth);
13441 			if (i + 1 < env->subprog_cnt)
13442 				verbose(env, "+");
13443 		}
13444 		verbose(env, "\n");
13445 	}
13446 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
13447 		"total_states %d peak_states %d mark_read %d\n",
13448 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
13449 		env->max_states_per_insn, env->total_states,
13450 		env->peak_states, env->longest_mark_read_walk);
13451 }
13452 
13453 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
13454 {
13455 	const struct btf_type *t, *func_proto;
13456 	const struct bpf_struct_ops *st_ops;
13457 	const struct btf_member *member;
13458 	struct bpf_prog *prog = env->prog;
13459 	u32 btf_id, member_idx;
13460 	const char *mname;
13461 
13462 	if (!prog->gpl_compatible) {
13463 		verbose(env, "struct ops programs must have a GPL compatible license\n");
13464 		return -EINVAL;
13465 	}
13466 
13467 	btf_id = prog->aux->attach_btf_id;
13468 	st_ops = bpf_struct_ops_find(btf_id);
13469 	if (!st_ops) {
13470 		verbose(env, "attach_btf_id %u is not a supported struct\n",
13471 			btf_id);
13472 		return -ENOTSUPP;
13473 	}
13474 
13475 	t = st_ops->type;
13476 	member_idx = prog->expected_attach_type;
13477 	if (member_idx >= btf_type_vlen(t)) {
13478 		verbose(env, "attach to invalid member idx %u of struct %s\n",
13479 			member_idx, st_ops->name);
13480 		return -EINVAL;
13481 	}
13482 
13483 	member = &btf_type_member(t)[member_idx];
13484 	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
13485 	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
13486 					       NULL);
13487 	if (!func_proto) {
13488 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
13489 			mname, member_idx, st_ops->name);
13490 		return -EINVAL;
13491 	}
13492 
13493 	if (st_ops->check_member) {
13494 		int err = st_ops->check_member(t, member);
13495 
13496 		if (err) {
13497 			verbose(env, "attach to unsupported member %s of struct %s\n",
13498 				mname, st_ops->name);
13499 			return err;
13500 		}
13501 	}
13502 
13503 	prog->aux->attach_func_proto = func_proto;
13504 	prog->aux->attach_func_name = mname;
13505 	env->ops = st_ops->verifier_ops;
13506 
13507 	return 0;
13508 }
13509 #define SECURITY_PREFIX "security_"
13510 
13511 static int check_attach_modify_return(unsigned long addr, const char *func_name)
13512 {
13513 	if (within_error_injection_list(addr) ||
13514 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
13515 		return 0;
13516 
13517 	return -EINVAL;
13518 }
13519 
13520 /* list of non-sleepable functions that are otherwise on
13521  * ALLOW_ERROR_INJECTION list
13522  */
13523 BTF_SET_START(btf_non_sleepable_error_inject)
13524 /* Three functions below can be called from sleepable and non-sleepable context.
13525  * Assume non-sleepable from bpf safety point of view.
13526  */
13527 BTF_ID(func, __filemap_add_folio)
13528 BTF_ID(func, should_fail_alloc_page)
13529 BTF_ID(func, should_failslab)
13530 BTF_SET_END(btf_non_sleepable_error_inject)
13531 
13532 static int check_non_sleepable_error_inject(u32 btf_id)
13533 {
13534 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
13535 }
13536 
13537 int bpf_check_attach_target(struct bpf_verifier_log *log,
13538 			    const struct bpf_prog *prog,
13539 			    const struct bpf_prog *tgt_prog,
13540 			    u32 btf_id,
13541 			    struct bpf_attach_target_info *tgt_info)
13542 {
13543 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
13544 	const char prefix[] = "btf_trace_";
13545 	int ret = 0, subprog = -1, i;
13546 	const struct btf_type *t;
13547 	bool conservative = true;
13548 	const char *tname;
13549 	struct btf *btf;
13550 	long addr = 0;
13551 
13552 	if (!btf_id) {
13553 		bpf_log(log, "Tracing programs must provide btf_id\n");
13554 		return -EINVAL;
13555 	}
13556 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
13557 	if (!btf) {
13558 		bpf_log(log,
13559 			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
13560 		return -EINVAL;
13561 	}
13562 	t = btf_type_by_id(btf, btf_id);
13563 	if (!t) {
13564 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
13565 		return -EINVAL;
13566 	}
13567 	tname = btf_name_by_offset(btf, t->name_off);
13568 	if (!tname) {
13569 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
13570 		return -EINVAL;
13571 	}
13572 	if (tgt_prog) {
13573 		struct bpf_prog_aux *aux = tgt_prog->aux;
13574 
13575 		for (i = 0; i < aux->func_info_cnt; i++)
13576 			if (aux->func_info[i].type_id == btf_id) {
13577 				subprog = i;
13578 				break;
13579 			}
13580 		if (subprog == -1) {
13581 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
13582 			return -EINVAL;
13583 		}
13584 		conservative = aux->func_info_aux[subprog].unreliable;
13585 		if (prog_extension) {
13586 			if (conservative) {
13587 				bpf_log(log,
13588 					"Cannot replace static functions\n");
13589 				return -EINVAL;
13590 			}
13591 			if (!prog->jit_requested) {
13592 				bpf_log(log,
13593 					"Extension programs should be JITed\n");
13594 				return -EINVAL;
13595 			}
13596 		}
13597 		if (!tgt_prog->jited) {
13598 			bpf_log(log, "Can attach to only JITed progs\n");
13599 			return -EINVAL;
13600 		}
13601 		if (tgt_prog->type == prog->type) {
13602 			/* Cannot fentry/fexit another fentry/fexit program.
13603 			 * Cannot attach program extension to another extension.
13604 			 * It's ok to attach fentry/fexit to extension program.
13605 			 */
13606 			bpf_log(log, "Cannot recursively attach\n");
13607 			return -EINVAL;
13608 		}
13609 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
13610 		    prog_extension &&
13611 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
13612 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
13613 			/* Program extensions can extend all program types
13614 			 * except fentry/fexit. The reason is the following.
13615 			 * The fentry/fexit programs are used for performance
13616 			 * analysis, stats and can be attached to any program
13617 			 * type except themselves. When extension program is
13618 			 * replacing XDP function it is necessary to allow
13619 			 * performance analysis of all functions. Both original
13620 			 * XDP program and its program extension. Hence
13621 			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
13622 			 * allowed. If extending of fentry/fexit was allowed it
13623 			 * would be possible to create long call chain
13624 			 * fentry->extension->fentry->extension beyond
13625 			 * reasonable stack size. Hence extending fentry is not
13626 			 * allowed.
13627 			 */
13628 			bpf_log(log, "Cannot extend fentry/fexit\n");
13629 			return -EINVAL;
13630 		}
13631 	} else {
13632 		if (prog_extension) {
13633 			bpf_log(log, "Cannot replace kernel functions\n");
13634 			return -EINVAL;
13635 		}
13636 	}
13637 
13638 	switch (prog->expected_attach_type) {
13639 	case BPF_TRACE_RAW_TP:
13640 		if (tgt_prog) {
13641 			bpf_log(log,
13642 				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
13643 			return -EINVAL;
13644 		}
13645 		if (!btf_type_is_typedef(t)) {
13646 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
13647 				btf_id);
13648 			return -EINVAL;
13649 		}
13650 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
13651 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
13652 				btf_id, tname);
13653 			return -EINVAL;
13654 		}
13655 		tname += sizeof(prefix) - 1;
13656 		t = btf_type_by_id(btf, t->type);
13657 		if (!btf_type_is_ptr(t))
13658 			/* should never happen in valid vmlinux build */
13659 			return -EINVAL;
13660 		t = btf_type_by_id(btf, t->type);
13661 		if (!btf_type_is_func_proto(t))
13662 			/* should never happen in valid vmlinux build */
13663 			return -EINVAL;
13664 
13665 		break;
13666 	case BPF_TRACE_ITER:
13667 		if (!btf_type_is_func(t)) {
13668 			bpf_log(log, "attach_btf_id %u is not a function\n",
13669 				btf_id);
13670 			return -EINVAL;
13671 		}
13672 		t = btf_type_by_id(btf, t->type);
13673 		if (!btf_type_is_func_proto(t))
13674 			return -EINVAL;
13675 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13676 		if (ret)
13677 			return ret;
13678 		break;
13679 	default:
13680 		if (!prog_extension)
13681 			return -EINVAL;
13682 		fallthrough;
13683 	case BPF_MODIFY_RETURN:
13684 	case BPF_LSM_MAC:
13685 	case BPF_TRACE_FENTRY:
13686 	case BPF_TRACE_FEXIT:
13687 		if (!btf_type_is_func(t)) {
13688 			bpf_log(log, "attach_btf_id %u is not a function\n",
13689 				btf_id);
13690 			return -EINVAL;
13691 		}
13692 		if (prog_extension &&
13693 		    btf_check_type_match(log, prog, btf, t))
13694 			return -EINVAL;
13695 		t = btf_type_by_id(btf, t->type);
13696 		if (!btf_type_is_func_proto(t))
13697 			return -EINVAL;
13698 
13699 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
13700 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
13701 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
13702 			return -EINVAL;
13703 
13704 		if (tgt_prog && conservative)
13705 			t = NULL;
13706 
13707 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13708 		if (ret < 0)
13709 			return ret;
13710 
13711 		if (tgt_prog) {
13712 			if (subprog == 0)
13713 				addr = (long) tgt_prog->bpf_func;
13714 			else
13715 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
13716 		} else {
13717 			addr = kallsyms_lookup_name(tname);
13718 			if (!addr) {
13719 				bpf_log(log,
13720 					"The address of function %s cannot be found\n",
13721 					tname);
13722 				return -ENOENT;
13723 			}
13724 		}
13725 
13726 		if (prog->aux->sleepable) {
13727 			ret = -EINVAL;
13728 			switch (prog->type) {
13729 			case BPF_PROG_TYPE_TRACING:
13730 				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
13731 				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
13732 				 */
13733 				if (!check_non_sleepable_error_inject(btf_id) &&
13734 				    within_error_injection_list(addr))
13735 					ret = 0;
13736 				break;
13737 			case BPF_PROG_TYPE_LSM:
13738 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
13739 				 * Only some of them are sleepable.
13740 				 */
13741 				if (bpf_lsm_is_sleepable_hook(btf_id))
13742 					ret = 0;
13743 				break;
13744 			default:
13745 				break;
13746 			}
13747 			if (ret) {
13748 				bpf_log(log, "%s is not sleepable\n", tname);
13749 				return ret;
13750 			}
13751 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
13752 			if (tgt_prog) {
13753 				bpf_log(log, "can't modify return codes of BPF programs\n");
13754 				return -EINVAL;
13755 			}
13756 			ret = check_attach_modify_return(addr, tname);
13757 			if (ret) {
13758 				bpf_log(log, "%s() is not modifiable\n", tname);
13759 				return ret;
13760 			}
13761 		}
13762 
13763 		break;
13764 	}
13765 	tgt_info->tgt_addr = addr;
13766 	tgt_info->tgt_name = tname;
13767 	tgt_info->tgt_type = t;
13768 	return 0;
13769 }
13770 
13771 BTF_SET_START(btf_id_deny)
13772 BTF_ID_UNUSED
13773 #ifdef CONFIG_SMP
13774 BTF_ID(func, migrate_disable)
13775 BTF_ID(func, migrate_enable)
13776 #endif
13777 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
13778 BTF_ID(func, rcu_read_unlock_strict)
13779 #endif
13780 BTF_SET_END(btf_id_deny)
13781 
13782 static int check_attach_btf_id(struct bpf_verifier_env *env)
13783 {
13784 	struct bpf_prog *prog = env->prog;
13785 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
13786 	struct bpf_attach_target_info tgt_info = {};
13787 	u32 btf_id = prog->aux->attach_btf_id;
13788 	struct bpf_trampoline *tr;
13789 	int ret;
13790 	u64 key;
13791 
13792 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
13793 		if (prog->aux->sleepable)
13794 			/* attach_btf_id checked to be zero already */
13795 			return 0;
13796 		verbose(env, "Syscall programs can only be sleepable\n");
13797 		return -EINVAL;
13798 	}
13799 
13800 	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
13801 	    prog->type != BPF_PROG_TYPE_LSM) {
13802 		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
13803 		return -EINVAL;
13804 	}
13805 
13806 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
13807 		return check_struct_ops_btf_id(env);
13808 
13809 	if (prog->type != BPF_PROG_TYPE_TRACING &&
13810 	    prog->type != BPF_PROG_TYPE_LSM &&
13811 	    prog->type != BPF_PROG_TYPE_EXT)
13812 		return 0;
13813 
13814 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
13815 	if (ret)
13816 		return ret;
13817 
13818 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
13819 		/* to make freplace equivalent to their targets, they need to
13820 		 * inherit env->ops and expected_attach_type for the rest of the
13821 		 * verification
13822 		 */
13823 		env->ops = bpf_verifier_ops[tgt_prog->type];
13824 		prog->expected_attach_type = tgt_prog->expected_attach_type;
13825 	}
13826 
13827 	/* store info about the attachment target that will be used later */
13828 	prog->aux->attach_func_proto = tgt_info.tgt_type;
13829 	prog->aux->attach_func_name = tgt_info.tgt_name;
13830 
13831 	if (tgt_prog) {
13832 		prog->aux->saved_dst_prog_type = tgt_prog->type;
13833 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
13834 	}
13835 
13836 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
13837 		prog->aux->attach_btf_trace = true;
13838 		return 0;
13839 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
13840 		if (!bpf_iter_prog_supported(prog))
13841 			return -EINVAL;
13842 		return 0;
13843 	}
13844 
13845 	if (prog->type == BPF_PROG_TYPE_LSM) {
13846 		ret = bpf_lsm_verify_prog(&env->log, prog);
13847 		if (ret < 0)
13848 			return ret;
13849 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
13850 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
13851 		return -EINVAL;
13852 	}
13853 
13854 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
13855 	tr = bpf_trampoline_get(key, &tgt_info);
13856 	if (!tr)
13857 		return -ENOMEM;
13858 
13859 	prog->aux->dst_trampoline = tr;
13860 	return 0;
13861 }
13862 
13863 struct btf *bpf_get_btf_vmlinux(void)
13864 {
13865 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
13866 		mutex_lock(&bpf_verifier_lock);
13867 		if (!btf_vmlinux)
13868 			btf_vmlinux = btf_parse_vmlinux();
13869 		mutex_unlock(&bpf_verifier_lock);
13870 	}
13871 	return btf_vmlinux;
13872 }
13873 
13874 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
13875 {
13876 	u64 start_time = ktime_get_ns();
13877 	struct bpf_verifier_env *env;
13878 	struct bpf_verifier_log *log;
13879 	int i, len, ret = -EINVAL;
13880 	bool is_priv;
13881 
13882 	/* no program is valid */
13883 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
13884 		return -EINVAL;
13885 
13886 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
13887 	 * allocate/free it every time bpf_check() is called
13888 	 */
13889 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
13890 	if (!env)
13891 		return -ENOMEM;
13892 	log = &env->log;
13893 
13894 	len = (*prog)->len;
13895 	env->insn_aux_data =
13896 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
13897 	ret = -ENOMEM;
13898 	if (!env->insn_aux_data)
13899 		goto err_free_env;
13900 	for (i = 0; i < len; i++)
13901 		env->insn_aux_data[i].orig_idx = i;
13902 	env->prog = *prog;
13903 	env->ops = bpf_verifier_ops[env->prog->type];
13904 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
13905 	is_priv = bpf_capable();
13906 
13907 	bpf_get_btf_vmlinux();
13908 
13909 	/* grab the mutex to protect few globals used by verifier */
13910 	if (!is_priv)
13911 		mutex_lock(&bpf_verifier_lock);
13912 
13913 	if (attr->log_level || attr->log_buf || attr->log_size) {
13914 		/* user requested verbose verifier output
13915 		 * and supplied buffer to store the verification trace
13916 		 */
13917 		log->level = attr->log_level;
13918 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
13919 		log->len_total = attr->log_size;
13920 
13921 		ret = -EINVAL;
13922 		/* log attributes have to be sane */
13923 		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
13924 		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
13925 			goto err_unlock;
13926 	}
13927 
13928 	if (IS_ERR(btf_vmlinux)) {
13929 		/* Either gcc or pahole or kernel are broken. */
13930 		verbose(env, "in-kernel BTF is malformed\n");
13931 		ret = PTR_ERR(btf_vmlinux);
13932 		goto skip_full_check;
13933 	}
13934 
13935 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
13936 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
13937 		env->strict_alignment = true;
13938 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
13939 		env->strict_alignment = false;
13940 
13941 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
13942 	env->allow_uninit_stack = bpf_allow_uninit_stack();
13943 	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
13944 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
13945 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
13946 	env->bpf_capable = bpf_capable();
13947 
13948 	if (is_priv)
13949 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
13950 
13951 	env->explored_states = kvcalloc(state_htab_size(env),
13952 				       sizeof(struct bpf_verifier_state_list *),
13953 				       GFP_USER);
13954 	ret = -ENOMEM;
13955 	if (!env->explored_states)
13956 		goto skip_full_check;
13957 
13958 	ret = add_subprog_and_kfunc(env);
13959 	if (ret < 0)
13960 		goto skip_full_check;
13961 
13962 	ret = check_subprogs(env);
13963 	if (ret < 0)
13964 		goto skip_full_check;
13965 
13966 	ret = check_btf_info(env, attr, uattr);
13967 	if (ret < 0)
13968 		goto skip_full_check;
13969 
13970 	ret = check_attach_btf_id(env);
13971 	if (ret)
13972 		goto skip_full_check;
13973 
13974 	ret = resolve_pseudo_ldimm64(env);
13975 	if (ret < 0)
13976 		goto skip_full_check;
13977 
13978 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
13979 		ret = bpf_prog_offload_verifier_prep(env->prog);
13980 		if (ret)
13981 			goto skip_full_check;
13982 	}
13983 
13984 	ret = check_cfg(env);
13985 	if (ret < 0)
13986 		goto skip_full_check;
13987 
13988 	ret = do_check_subprogs(env);
13989 	ret = ret ?: do_check_main(env);
13990 
13991 	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
13992 		ret = bpf_prog_offload_finalize(env);
13993 
13994 skip_full_check:
13995 	kvfree(env->explored_states);
13996 
13997 	if (ret == 0)
13998 		ret = check_max_stack_depth(env);
13999 
14000 	/* instruction rewrites happen after this point */
14001 	if (is_priv) {
14002 		if (ret == 0)
14003 			opt_hard_wire_dead_code_branches(env);
14004 		if (ret == 0)
14005 			ret = opt_remove_dead_code(env);
14006 		if (ret == 0)
14007 			ret = opt_remove_nops(env);
14008 	} else {
14009 		if (ret == 0)
14010 			sanitize_dead_code(env);
14011 	}
14012 
14013 	if (ret == 0)
14014 		/* program is valid, convert *(u32*)(ctx + off) accesses */
14015 		ret = convert_ctx_accesses(env);
14016 
14017 	if (ret == 0)
14018 		ret = do_misc_fixups(env);
14019 
14020 	/* do 32-bit optimization after insn patching has done so those patched
14021 	 * insns could be handled correctly.
14022 	 */
14023 	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14024 		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14025 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14026 								     : false;
14027 	}
14028 
14029 	if (ret == 0)
14030 		ret = fixup_call_args(env);
14031 
14032 	env->verification_time = ktime_get_ns() - start_time;
14033 	print_verification_stats(env);
14034 	env->prog->aux->verified_insns = env->insn_processed;
14035 
14036 	if (log->level && bpf_verifier_log_full(log))
14037 		ret = -ENOSPC;
14038 	if (log->level && !log->ubuf) {
14039 		ret = -EFAULT;
14040 		goto err_release_maps;
14041 	}
14042 
14043 	if (ret)
14044 		goto err_release_maps;
14045 
14046 	if (env->used_map_cnt) {
14047 		/* if program passed verifier, update used_maps in bpf_prog_info */
14048 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14049 							  sizeof(env->used_maps[0]),
14050 							  GFP_KERNEL);
14051 
14052 		if (!env->prog->aux->used_maps) {
14053 			ret = -ENOMEM;
14054 			goto err_release_maps;
14055 		}
14056 
14057 		memcpy(env->prog->aux->used_maps, env->used_maps,
14058 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
14059 		env->prog->aux->used_map_cnt = env->used_map_cnt;
14060 	}
14061 	if (env->used_btf_cnt) {
14062 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
14063 		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14064 							  sizeof(env->used_btfs[0]),
14065 							  GFP_KERNEL);
14066 		if (!env->prog->aux->used_btfs) {
14067 			ret = -ENOMEM;
14068 			goto err_release_maps;
14069 		}
14070 
14071 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
14072 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14073 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14074 	}
14075 	if (env->used_map_cnt || env->used_btf_cnt) {
14076 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
14077 		 * bpf_ld_imm64 instructions
14078 		 */
14079 		convert_pseudo_ld_imm64(env);
14080 	}
14081 
14082 	adjust_btf_func(env);
14083 
14084 err_release_maps:
14085 	if (!env->prog->aux->used_maps)
14086 		/* if we didn't copy map pointers into bpf_prog_info, release
14087 		 * them now. Otherwise free_used_maps() will release them.
14088 		 */
14089 		release_maps(env);
14090 	if (!env->prog->aux->used_btfs)
14091 		release_btfs(env);
14092 
14093 	/* extension progs temporarily inherit the attach_type of their targets
14094 	   for verification purposes, so set it back to zero before returning
14095 	 */
14096 	if (env->prog->type == BPF_PROG_TYPE_EXT)
14097 		env->prog->expected_attach_type = 0;
14098 
14099 	*prog = env->prog;
14100 err_unlock:
14101 	if (!is_priv)
14102 		mutex_unlock(&bpf_verifier_lock);
14103 	vfree(env->insn_aux_data);
14104 err_free_env:
14105 	kfree(env);
14106 	return ret;
14107 }
14108