xref: /openbmc/linux/kernel/bpf/verifier.c (revision bf8981a2aa082d9d64771b47c8a1c9c388d8cd40)
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  * Copyright (c) 2016 Facebook
3  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 2 of the GNU General Public
7  * License as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  */
14 #include <uapi/linux/btf.h>
15 #include <linux/kernel.h>
16 #include <linux/types.h>
17 #include <linux/slab.h>
18 #include <linux/bpf.h>
19 #include <linux/btf.h>
20 #include <linux/bpf_verifier.h>
21 #include <linux/filter.h>
22 #include <net/netlink.h>
23 #include <linux/file.h>
24 #include <linux/vmalloc.h>
25 #include <linux/stringify.h>
26 #include <linux/bsearch.h>
27 #include <linux/sort.h>
28 #include <linux/perf_event.h>
29 #include <linux/ctype.h>
30 
31 #include "disasm.h"
32 
33 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34 #define BPF_PROG_TYPE(_id, _name) \
35 	[_id] = & _name ## _verifier_ops,
36 #define BPF_MAP_TYPE(_id, _ops)
37 #include <linux/bpf_types.h>
38 #undef BPF_PROG_TYPE
39 #undef BPF_MAP_TYPE
40 };
41 
42 /* bpf_check() is a static code analyzer that walks eBPF program
43  * instruction by instruction and updates register/stack state.
44  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
45  *
46  * The first pass is depth-first-search to check that the program is a DAG.
47  * It rejects the following programs:
48  * - larger than BPF_MAXINSNS insns
49  * - if loop is present (detected via back-edge)
50  * - unreachable insns exist (shouldn't be a forest. program = one function)
51  * - out of bounds or malformed jumps
52  * The second pass is all possible path descent from the 1st insn.
53  * Since it's analyzing all pathes through the program, the length of the
54  * analysis is limited to 64k insn, which may be hit even if total number of
55  * insn is less then 4K, but there are too many branches that change stack/regs.
56  * Number of 'branches to be analyzed' is limited to 1k
57  *
58  * On entry to each instruction, each register has a type, and the instruction
59  * changes the types of the registers depending on instruction semantics.
60  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
61  * copied to R1.
62  *
63  * All registers are 64-bit.
64  * R0 - return register
65  * R1-R5 argument passing registers
66  * R6-R9 callee saved registers
67  * R10 - frame pointer read-only
68  *
69  * At the start of BPF program the register R1 contains a pointer to bpf_context
70  * and has type PTR_TO_CTX.
71  *
72  * Verifier tracks arithmetic operations on pointers in case:
73  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
74  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
75  * 1st insn copies R10 (which has FRAME_PTR) type into R1
76  * and 2nd arithmetic instruction is pattern matched to recognize
77  * that it wants to construct a pointer to some element within stack.
78  * So after 2nd insn, the register R1 has type PTR_TO_STACK
79  * (and -20 constant is saved for further stack bounds checking).
80  * Meaning that this reg is a pointer to stack plus known immediate constant.
81  *
82  * Most of the time the registers have SCALAR_VALUE type, which
83  * means the register has some value, but it's not a valid pointer.
84  * (like pointer plus pointer becomes SCALAR_VALUE type)
85  *
86  * When verifier sees load or store instructions the type of base register
87  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
88  * four pointer types recognized by check_mem_access() function.
89  *
90  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
91  * and the range of [ptr, ptr + map's value_size) is accessible.
92  *
93  * registers used to pass values to function calls are checked against
94  * function argument constraints.
95  *
96  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
97  * It means that the register type passed to this function must be
98  * PTR_TO_STACK and it will be used inside the function as
99  * 'pointer to map element key'
100  *
101  * For example the argument constraints for bpf_map_lookup_elem():
102  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
103  *   .arg1_type = ARG_CONST_MAP_PTR,
104  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
105  *
106  * ret_type says that this function returns 'pointer to map elem value or null'
107  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
108  * 2nd argument should be a pointer to stack, which will be used inside
109  * the helper function as a pointer to map element key.
110  *
111  * On the kernel side the helper function looks like:
112  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
113  * {
114  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
115  *    void *key = (void *) (unsigned long) r2;
116  *    void *value;
117  *
118  *    here kernel can access 'key' and 'map' pointers safely, knowing that
119  *    [key, key + map->key_size) bytes are valid and were initialized on
120  *    the stack of eBPF program.
121  * }
122  *
123  * Corresponding eBPF program may look like:
124  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
125  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
126  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
127  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
128  * here verifier looks at prototype of map_lookup_elem() and sees:
129  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
130  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
131  *
132  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
133  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
134  * and were initialized prior to this call.
135  * If it's ok, then verifier allows this BPF_CALL insn and looks at
136  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
137  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
138  * returns ether pointer to map value or NULL.
139  *
140  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
141  * insn, the register holding that pointer in the true branch changes state to
142  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
143  * branch. See check_cond_jmp_op().
144  *
145  * After the call R0 is set to return type of the function and registers R1-R5
146  * are set to NOT_INIT to indicate that they are no longer readable.
147  *
148  * The following reference types represent a potential reference to a kernel
149  * resource which, after first being allocated, must be checked and freed by
150  * the BPF program:
151  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
152  *
153  * When the verifier sees a helper call return a reference type, it allocates a
154  * pointer id for the reference and stores it in the current function state.
155  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
156  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
157  * passes through a NULL-check conditional. For the branch wherein the state is
158  * changed to CONST_IMM, the verifier releases the reference.
159  *
160  * For each helper function that allocates a reference, such as
161  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
162  * bpf_sk_release(). When a reference type passes into the release function,
163  * the verifier also releases the reference. If any unchecked or unreleased
164  * reference remains at the end of the program, the verifier rejects it.
165  */
166 
167 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
168 struct bpf_verifier_stack_elem {
169 	/* verifer state is 'st'
170 	 * before processing instruction 'insn_idx'
171 	 * and after processing instruction 'prev_insn_idx'
172 	 */
173 	struct bpf_verifier_state st;
174 	int insn_idx;
175 	int prev_insn_idx;
176 	struct bpf_verifier_stack_elem *next;
177 };
178 
179 #define BPF_COMPLEXITY_LIMIT_INSNS	131072
180 #define BPF_COMPLEXITY_LIMIT_STACK	1024
181 #define BPF_COMPLEXITY_LIMIT_STATES	64
182 
183 #define BPF_MAP_PTR_UNPRIV	1UL
184 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
185 					  POISON_POINTER_DELTA))
186 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
187 
188 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
189 {
190 	return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
191 }
192 
193 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
194 {
195 	return aux->map_state & BPF_MAP_PTR_UNPRIV;
196 }
197 
198 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
199 			      const struct bpf_map *map, bool unpriv)
200 {
201 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
202 	unpriv |= bpf_map_ptr_unpriv(aux);
203 	aux->map_state = (unsigned long)map |
204 			 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
205 }
206 
207 struct bpf_call_arg_meta {
208 	struct bpf_map *map_ptr;
209 	bool raw_mode;
210 	bool pkt_access;
211 	int regno;
212 	int access_size;
213 	s64 msize_smax_value;
214 	u64 msize_umax_value;
215 	int ref_obj_id;
216 	int func_id;
217 };
218 
219 static DEFINE_MUTEX(bpf_verifier_lock);
220 
221 static const struct bpf_line_info *
222 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
223 {
224 	const struct bpf_line_info *linfo;
225 	const struct bpf_prog *prog;
226 	u32 i, nr_linfo;
227 
228 	prog = env->prog;
229 	nr_linfo = prog->aux->nr_linfo;
230 
231 	if (!nr_linfo || insn_off >= prog->len)
232 		return NULL;
233 
234 	linfo = prog->aux->linfo;
235 	for (i = 1; i < nr_linfo; i++)
236 		if (insn_off < linfo[i].insn_off)
237 			break;
238 
239 	return &linfo[i - 1];
240 }
241 
242 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
243 		       va_list args)
244 {
245 	unsigned int n;
246 
247 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
248 
249 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
250 		  "verifier log line truncated - local buffer too short\n");
251 
252 	n = min(log->len_total - log->len_used - 1, n);
253 	log->kbuf[n] = '\0';
254 
255 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
256 		log->len_used += n;
257 	else
258 		log->ubuf = NULL;
259 }
260 
261 /* log_level controls verbosity level of eBPF verifier.
262  * bpf_verifier_log_write() is used to dump the verification trace to the log,
263  * so the user can figure out what's wrong with the program
264  */
265 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
266 					   const char *fmt, ...)
267 {
268 	va_list args;
269 
270 	if (!bpf_verifier_log_needed(&env->log))
271 		return;
272 
273 	va_start(args, fmt);
274 	bpf_verifier_vlog(&env->log, fmt, args);
275 	va_end(args);
276 }
277 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
278 
279 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
280 {
281 	struct bpf_verifier_env *env = private_data;
282 	va_list args;
283 
284 	if (!bpf_verifier_log_needed(&env->log))
285 		return;
286 
287 	va_start(args, fmt);
288 	bpf_verifier_vlog(&env->log, fmt, args);
289 	va_end(args);
290 }
291 
292 static const char *ltrim(const char *s)
293 {
294 	while (isspace(*s))
295 		s++;
296 
297 	return s;
298 }
299 
300 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
301 					 u32 insn_off,
302 					 const char *prefix_fmt, ...)
303 {
304 	const struct bpf_line_info *linfo;
305 
306 	if (!bpf_verifier_log_needed(&env->log))
307 		return;
308 
309 	linfo = find_linfo(env, insn_off);
310 	if (!linfo || linfo == env->prev_linfo)
311 		return;
312 
313 	if (prefix_fmt) {
314 		va_list args;
315 
316 		va_start(args, prefix_fmt);
317 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
318 		va_end(args);
319 	}
320 
321 	verbose(env, "%s\n",
322 		ltrim(btf_name_by_offset(env->prog->aux->btf,
323 					 linfo->line_off)));
324 
325 	env->prev_linfo = linfo;
326 }
327 
328 static bool type_is_pkt_pointer(enum bpf_reg_type type)
329 {
330 	return type == PTR_TO_PACKET ||
331 	       type == PTR_TO_PACKET_META;
332 }
333 
334 static bool type_is_sk_pointer(enum bpf_reg_type type)
335 {
336 	return type == PTR_TO_SOCKET ||
337 		type == PTR_TO_SOCK_COMMON ||
338 		type == PTR_TO_TCP_SOCK;
339 }
340 
341 static bool reg_type_may_be_null(enum bpf_reg_type type)
342 {
343 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
344 	       type == PTR_TO_SOCKET_OR_NULL ||
345 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
346 	       type == PTR_TO_TCP_SOCK_OR_NULL;
347 }
348 
349 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
350 {
351 	return reg->type == PTR_TO_MAP_VALUE &&
352 		map_value_has_spin_lock(reg->map_ptr);
353 }
354 
355 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
356 {
357 	return type == PTR_TO_SOCKET ||
358 		type == PTR_TO_SOCKET_OR_NULL ||
359 		type == PTR_TO_TCP_SOCK ||
360 		type == PTR_TO_TCP_SOCK_OR_NULL;
361 }
362 
363 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
364 {
365 	return type == ARG_PTR_TO_SOCK_COMMON;
366 }
367 
368 /* Determine whether the function releases some resources allocated by another
369  * function call. The first reference type argument will be assumed to be
370  * released by release_reference().
371  */
372 static bool is_release_function(enum bpf_func_id func_id)
373 {
374 	return func_id == BPF_FUNC_sk_release;
375 }
376 
377 static bool is_acquire_function(enum bpf_func_id func_id)
378 {
379 	return func_id == BPF_FUNC_sk_lookup_tcp ||
380 		func_id == BPF_FUNC_sk_lookup_udp ||
381 		func_id == BPF_FUNC_skc_lookup_tcp;
382 }
383 
384 static bool is_ptr_cast_function(enum bpf_func_id func_id)
385 {
386 	return func_id == BPF_FUNC_tcp_sock ||
387 		func_id == BPF_FUNC_sk_fullsock;
388 }
389 
390 /* string representation of 'enum bpf_reg_type' */
391 static const char * const reg_type_str[] = {
392 	[NOT_INIT]		= "?",
393 	[SCALAR_VALUE]		= "inv",
394 	[PTR_TO_CTX]		= "ctx",
395 	[CONST_PTR_TO_MAP]	= "map_ptr",
396 	[PTR_TO_MAP_VALUE]	= "map_value",
397 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
398 	[PTR_TO_STACK]		= "fp",
399 	[PTR_TO_PACKET]		= "pkt",
400 	[PTR_TO_PACKET_META]	= "pkt_meta",
401 	[PTR_TO_PACKET_END]	= "pkt_end",
402 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
403 	[PTR_TO_SOCKET]		= "sock",
404 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
405 	[PTR_TO_SOCK_COMMON]	= "sock_common",
406 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
407 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
408 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
409 };
410 
411 static char slot_type_char[] = {
412 	[STACK_INVALID]	= '?',
413 	[STACK_SPILL]	= 'r',
414 	[STACK_MISC]	= 'm',
415 	[STACK_ZERO]	= '0',
416 };
417 
418 static void print_liveness(struct bpf_verifier_env *env,
419 			   enum bpf_reg_liveness live)
420 {
421 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
422 	    verbose(env, "_");
423 	if (live & REG_LIVE_READ)
424 		verbose(env, "r");
425 	if (live & REG_LIVE_WRITTEN)
426 		verbose(env, "w");
427 	if (live & REG_LIVE_DONE)
428 		verbose(env, "D");
429 }
430 
431 static struct bpf_func_state *func(struct bpf_verifier_env *env,
432 				   const struct bpf_reg_state *reg)
433 {
434 	struct bpf_verifier_state *cur = env->cur_state;
435 
436 	return cur->frame[reg->frameno];
437 }
438 
439 static void print_verifier_state(struct bpf_verifier_env *env,
440 				 const struct bpf_func_state *state)
441 {
442 	const struct bpf_reg_state *reg;
443 	enum bpf_reg_type t;
444 	int i;
445 
446 	if (state->frameno)
447 		verbose(env, " frame%d:", state->frameno);
448 	for (i = 0; i < MAX_BPF_REG; i++) {
449 		reg = &state->regs[i];
450 		t = reg->type;
451 		if (t == NOT_INIT)
452 			continue;
453 		verbose(env, " R%d", i);
454 		print_liveness(env, reg->live);
455 		verbose(env, "=%s", reg_type_str[t]);
456 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
457 		    tnum_is_const(reg->var_off)) {
458 			/* reg->off should be 0 for SCALAR_VALUE */
459 			verbose(env, "%lld", reg->var_off.value + reg->off);
460 			if (t == PTR_TO_STACK)
461 				verbose(env, ",call_%d", func(env, reg)->callsite);
462 		} else {
463 			verbose(env, "(id=%d", reg->id);
464 			if (reg_type_may_be_refcounted_or_null(t))
465 				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
466 			if (t != SCALAR_VALUE)
467 				verbose(env, ",off=%d", reg->off);
468 			if (type_is_pkt_pointer(t))
469 				verbose(env, ",r=%d", reg->range);
470 			else if (t == CONST_PTR_TO_MAP ||
471 				 t == PTR_TO_MAP_VALUE ||
472 				 t == PTR_TO_MAP_VALUE_OR_NULL)
473 				verbose(env, ",ks=%d,vs=%d",
474 					reg->map_ptr->key_size,
475 					reg->map_ptr->value_size);
476 			if (tnum_is_const(reg->var_off)) {
477 				/* Typically an immediate SCALAR_VALUE, but
478 				 * could be a pointer whose offset is too big
479 				 * for reg->off
480 				 */
481 				verbose(env, ",imm=%llx", reg->var_off.value);
482 			} else {
483 				if (reg->smin_value != reg->umin_value &&
484 				    reg->smin_value != S64_MIN)
485 					verbose(env, ",smin_value=%lld",
486 						(long long)reg->smin_value);
487 				if (reg->smax_value != reg->umax_value &&
488 				    reg->smax_value != S64_MAX)
489 					verbose(env, ",smax_value=%lld",
490 						(long long)reg->smax_value);
491 				if (reg->umin_value != 0)
492 					verbose(env, ",umin_value=%llu",
493 						(unsigned long long)reg->umin_value);
494 				if (reg->umax_value != U64_MAX)
495 					verbose(env, ",umax_value=%llu",
496 						(unsigned long long)reg->umax_value);
497 				if (!tnum_is_unknown(reg->var_off)) {
498 					char tn_buf[48];
499 
500 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
501 					verbose(env, ",var_off=%s", tn_buf);
502 				}
503 			}
504 			verbose(env, ")");
505 		}
506 	}
507 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
508 		char types_buf[BPF_REG_SIZE + 1];
509 		bool valid = false;
510 		int j;
511 
512 		for (j = 0; j < BPF_REG_SIZE; j++) {
513 			if (state->stack[i].slot_type[j] != STACK_INVALID)
514 				valid = true;
515 			types_buf[j] = slot_type_char[
516 					state->stack[i].slot_type[j]];
517 		}
518 		types_buf[BPF_REG_SIZE] = 0;
519 		if (!valid)
520 			continue;
521 		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
522 		print_liveness(env, state->stack[i].spilled_ptr.live);
523 		if (state->stack[i].slot_type[0] == STACK_SPILL)
524 			verbose(env, "=%s",
525 				reg_type_str[state->stack[i].spilled_ptr.type]);
526 		else
527 			verbose(env, "=%s", types_buf);
528 	}
529 	if (state->acquired_refs && state->refs[0].id) {
530 		verbose(env, " refs=%d", state->refs[0].id);
531 		for (i = 1; i < state->acquired_refs; i++)
532 			if (state->refs[i].id)
533 				verbose(env, ",%d", state->refs[i].id);
534 	}
535 	verbose(env, "\n");
536 }
537 
538 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
539 static int copy_##NAME##_state(struct bpf_func_state *dst,		\
540 			       const struct bpf_func_state *src)	\
541 {									\
542 	if (!src->FIELD)						\
543 		return 0;						\
544 	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
545 		/* internal bug, make state invalid to reject the program */ \
546 		memset(dst, 0, sizeof(*dst));				\
547 		return -EFAULT;						\
548 	}								\
549 	memcpy(dst->FIELD, src->FIELD,					\
550 	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
551 	return 0;							\
552 }
553 /* copy_reference_state() */
554 COPY_STATE_FN(reference, acquired_refs, refs, 1)
555 /* copy_stack_state() */
556 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
557 #undef COPY_STATE_FN
558 
559 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
560 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
561 				  bool copy_old)			\
562 {									\
563 	u32 old_size = state->COUNT;					\
564 	struct bpf_##NAME##_state *new_##FIELD;				\
565 	int slot = size / SIZE;						\
566 									\
567 	if (size <= old_size || !size) {				\
568 		if (copy_old)						\
569 			return 0;					\
570 		state->COUNT = slot * SIZE;				\
571 		if (!size && old_size) {				\
572 			kfree(state->FIELD);				\
573 			state->FIELD = NULL;				\
574 		}							\
575 		return 0;						\
576 	}								\
577 	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
578 				    GFP_KERNEL);			\
579 	if (!new_##FIELD)						\
580 		return -ENOMEM;						\
581 	if (copy_old) {							\
582 		if (state->FIELD)					\
583 			memcpy(new_##FIELD, state->FIELD,		\
584 			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
585 		memset(new_##FIELD + old_size / SIZE, 0,		\
586 		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
587 	}								\
588 	state->COUNT = slot * SIZE;					\
589 	kfree(state->FIELD);						\
590 	state->FIELD = new_##FIELD;					\
591 	return 0;							\
592 }
593 /* realloc_reference_state() */
594 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
595 /* realloc_stack_state() */
596 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
597 #undef REALLOC_STATE_FN
598 
599 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
600  * make it consume minimal amount of memory. check_stack_write() access from
601  * the program calls into realloc_func_state() to grow the stack size.
602  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
603  * which realloc_stack_state() copies over. It points to previous
604  * bpf_verifier_state which is never reallocated.
605  */
606 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
607 			      int refs_size, bool copy_old)
608 {
609 	int err = realloc_reference_state(state, refs_size, copy_old);
610 	if (err)
611 		return err;
612 	return realloc_stack_state(state, stack_size, copy_old);
613 }
614 
615 /* Acquire a pointer id from the env and update the state->refs to include
616  * this new pointer reference.
617  * On success, returns a valid pointer id to associate with the register
618  * On failure, returns a negative errno.
619  */
620 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
621 {
622 	struct bpf_func_state *state = cur_func(env);
623 	int new_ofs = state->acquired_refs;
624 	int id, err;
625 
626 	err = realloc_reference_state(state, state->acquired_refs + 1, true);
627 	if (err)
628 		return err;
629 	id = ++env->id_gen;
630 	state->refs[new_ofs].id = id;
631 	state->refs[new_ofs].insn_idx = insn_idx;
632 
633 	return id;
634 }
635 
636 /* release function corresponding to acquire_reference_state(). Idempotent. */
637 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
638 {
639 	int i, last_idx;
640 
641 	last_idx = state->acquired_refs - 1;
642 	for (i = 0; i < state->acquired_refs; i++) {
643 		if (state->refs[i].id == ptr_id) {
644 			if (last_idx && i != last_idx)
645 				memcpy(&state->refs[i], &state->refs[last_idx],
646 				       sizeof(*state->refs));
647 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
648 			state->acquired_refs--;
649 			return 0;
650 		}
651 	}
652 	return -EINVAL;
653 }
654 
655 static int transfer_reference_state(struct bpf_func_state *dst,
656 				    struct bpf_func_state *src)
657 {
658 	int err = realloc_reference_state(dst, src->acquired_refs, false);
659 	if (err)
660 		return err;
661 	err = copy_reference_state(dst, src);
662 	if (err)
663 		return err;
664 	return 0;
665 }
666 
667 static void free_func_state(struct bpf_func_state *state)
668 {
669 	if (!state)
670 		return;
671 	kfree(state->refs);
672 	kfree(state->stack);
673 	kfree(state);
674 }
675 
676 static void free_verifier_state(struct bpf_verifier_state *state,
677 				bool free_self)
678 {
679 	int i;
680 
681 	for (i = 0; i <= state->curframe; i++) {
682 		free_func_state(state->frame[i]);
683 		state->frame[i] = NULL;
684 	}
685 	if (free_self)
686 		kfree(state);
687 }
688 
689 /* copy verifier state from src to dst growing dst stack space
690  * when necessary to accommodate larger src stack
691  */
692 static int copy_func_state(struct bpf_func_state *dst,
693 			   const struct bpf_func_state *src)
694 {
695 	int err;
696 
697 	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
698 				 false);
699 	if (err)
700 		return err;
701 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
702 	err = copy_reference_state(dst, src);
703 	if (err)
704 		return err;
705 	return copy_stack_state(dst, src);
706 }
707 
708 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
709 			       const struct bpf_verifier_state *src)
710 {
711 	struct bpf_func_state *dst;
712 	int i, err;
713 
714 	/* if dst has more stack frames then src frame, free them */
715 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
716 		free_func_state(dst_state->frame[i]);
717 		dst_state->frame[i] = NULL;
718 	}
719 	dst_state->speculative = src->speculative;
720 	dst_state->curframe = src->curframe;
721 	dst_state->active_spin_lock = src->active_spin_lock;
722 	for (i = 0; i <= src->curframe; i++) {
723 		dst = dst_state->frame[i];
724 		if (!dst) {
725 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
726 			if (!dst)
727 				return -ENOMEM;
728 			dst_state->frame[i] = dst;
729 		}
730 		err = copy_func_state(dst, src->frame[i]);
731 		if (err)
732 			return err;
733 	}
734 	return 0;
735 }
736 
737 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
738 		     int *insn_idx)
739 {
740 	struct bpf_verifier_state *cur = env->cur_state;
741 	struct bpf_verifier_stack_elem *elem, *head = env->head;
742 	int err;
743 
744 	if (env->head == NULL)
745 		return -ENOENT;
746 
747 	if (cur) {
748 		err = copy_verifier_state(cur, &head->st);
749 		if (err)
750 			return err;
751 	}
752 	if (insn_idx)
753 		*insn_idx = head->insn_idx;
754 	if (prev_insn_idx)
755 		*prev_insn_idx = head->prev_insn_idx;
756 	elem = head->next;
757 	free_verifier_state(&head->st, false);
758 	kfree(head);
759 	env->head = elem;
760 	env->stack_size--;
761 	return 0;
762 }
763 
764 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
765 					     int insn_idx, int prev_insn_idx,
766 					     bool speculative)
767 {
768 	struct bpf_verifier_state *cur = env->cur_state;
769 	struct bpf_verifier_stack_elem *elem;
770 	int err;
771 
772 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
773 	if (!elem)
774 		goto err;
775 
776 	elem->insn_idx = insn_idx;
777 	elem->prev_insn_idx = prev_insn_idx;
778 	elem->next = env->head;
779 	env->head = elem;
780 	env->stack_size++;
781 	err = copy_verifier_state(&elem->st, cur);
782 	if (err)
783 		goto err;
784 	elem->st.speculative |= speculative;
785 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
786 		verbose(env, "BPF program is too complex\n");
787 		goto err;
788 	}
789 	return &elem->st;
790 err:
791 	free_verifier_state(env->cur_state, true);
792 	env->cur_state = NULL;
793 	/* pop all elements and return */
794 	while (!pop_stack(env, NULL, NULL));
795 	return NULL;
796 }
797 
798 #define CALLER_SAVED_REGS 6
799 static const int caller_saved[CALLER_SAVED_REGS] = {
800 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
801 };
802 
803 static void __mark_reg_not_init(struct bpf_reg_state *reg);
804 
805 /* Mark the unknown part of a register (variable offset or scalar value) as
806  * known to have the value @imm.
807  */
808 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
809 {
810 	/* Clear id, off, and union(map_ptr, range) */
811 	memset(((u8 *)reg) + sizeof(reg->type), 0,
812 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
813 	reg->var_off = tnum_const(imm);
814 	reg->smin_value = (s64)imm;
815 	reg->smax_value = (s64)imm;
816 	reg->umin_value = imm;
817 	reg->umax_value = imm;
818 }
819 
820 /* Mark the 'variable offset' part of a register as zero.  This should be
821  * used only on registers holding a pointer type.
822  */
823 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
824 {
825 	__mark_reg_known(reg, 0);
826 }
827 
828 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
829 {
830 	__mark_reg_known(reg, 0);
831 	reg->type = SCALAR_VALUE;
832 }
833 
834 static void mark_reg_known_zero(struct bpf_verifier_env *env,
835 				struct bpf_reg_state *regs, u32 regno)
836 {
837 	if (WARN_ON(regno >= MAX_BPF_REG)) {
838 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
839 		/* Something bad happened, let's kill all regs */
840 		for (regno = 0; regno < MAX_BPF_REG; regno++)
841 			__mark_reg_not_init(regs + regno);
842 		return;
843 	}
844 	__mark_reg_known_zero(regs + regno);
845 }
846 
847 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
848 {
849 	return type_is_pkt_pointer(reg->type);
850 }
851 
852 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
853 {
854 	return reg_is_pkt_pointer(reg) ||
855 	       reg->type == PTR_TO_PACKET_END;
856 }
857 
858 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
859 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
860 				    enum bpf_reg_type which)
861 {
862 	/* The register can already have a range from prior markings.
863 	 * This is fine as long as it hasn't been advanced from its
864 	 * origin.
865 	 */
866 	return reg->type == which &&
867 	       reg->id == 0 &&
868 	       reg->off == 0 &&
869 	       tnum_equals_const(reg->var_off, 0);
870 }
871 
872 /* Attempts to improve min/max values based on var_off information */
873 static void __update_reg_bounds(struct bpf_reg_state *reg)
874 {
875 	/* min signed is max(sign bit) | min(other bits) */
876 	reg->smin_value = max_t(s64, reg->smin_value,
877 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
878 	/* max signed is min(sign bit) | max(other bits) */
879 	reg->smax_value = min_t(s64, reg->smax_value,
880 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
881 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
882 	reg->umax_value = min(reg->umax_value,
883 			      reg->var_off.value | reg->var_off.mask);
884 }
885 
886 /* Uses signed min/max values to inform unsigned, and vice-versa */
887 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
888 {
889 	/* Learn sign from signed bounds.
890 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
891 	 * are the same, so combine.  This works even in the negative case, e.g.
892 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
893 	 */
894 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
895 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
896 							  reg->umin_value);
897 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
898 							  reg->umax_value);
899 		return;
900 	}
901 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
902 	 * boundary, so we must be careful.
903 	 */
904 	if ((s64)reg->umax_value >= 0) {
905 		/* Positive.  We can't learn anything from the smin, but smax
906 		 * is positive, hence safe.
907 		 */
908 		reg->smin_value = reg->umin_value;
909 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
910 							  reg->umax_value);
911 	} else if ((s64)reg->umin_value < 0) {
912 		/* Negative.  We can't learn anything from the smax, but smin
913 		 * is negative, hence safe.
914 		 */
915 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
916 							  reg->umin_value);
917 		reg->smax_value = reg->umax_value;
918 	}
919 }
920 
921 /* Attempts to improve var_off based on unsigned min/max information */
922 static void __reg_bound_offset(struct bpf_reg_state *reg)
923 {
924 	reg->var_off = tnum_intersect(reg->var_off,
925 				      tnum_range(reg->umin_value,
926 						 reg->umax_value));
927 }
928 
929 /* Reset the min/max bounds of a register */
930 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
931 {
932 	reg->smin_value = S64_MIN;
933 	reg->smax_value = S64_MAX;
934 	reg->umin_value = 0;
935 	reg->umax_value = U64_MAX;
936 }
937 
938 /* Mark a register as having a completely unknown (scalar) value. */
939 static void __mark_reg_unknown(struct bpf_reg_state *reg)
940 {
941 	/*
942 	 * Clear type, id, off, and union(map_ptr, range) and
943 	 * padding between 'type' and union
944 	 */
945 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
946 	reg->type = SCALAR_VALUE;
947 	reg->var_off = tnum_unknown;
948 	reg->frameno = 0;
949 	__mark_reg_unbounded(reg);
950 }
951 
952 static void mark_reg_unknown(struct bpf_verifier_env *env,
953 			     struct bpf_reg_state *regs, u32 regno)
954 {
955 	if (WARN_ON(regno >= MAX_BPF_REG)) {
956 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
957 		/* Something bad happened, let's kill all regs except FP */
958 		for (regno = 0; regno < BPF_REG_FP; regno++)
959 			__mark_reg_not_init(regs + regno);
960 		return;
961 	}
962 	__mark_reg_unknown(regs + regno);
963 }
964 
965 static void __mark_reg_not_init(struct bpf_reg_state *reg)
966 {
967 	__mark_reg_unknown(reg);
968 	reg->type = NOT_INIT;
969 }
970 
971 static void mark_reg_not_init(struct bpf_verifier_env *env,
972 			      struct bpf_reg_state *regs, u32 regno)
973 {
974 	if (WARN_ON(regno >= MAX_BPF_REG)) {
975 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
976 		/* Something bad happened, let's kill all regs except FP */
977 		for (regno = 0; regno < BPF_REG_FP; regno++)
978 			__mark_reg_not_init(regs + regno);
979 		return;
980 	}
981 	__mark_reg_not_init(regs + regno);
982 }
983 
984 static void init_reg_state(struct bpf_verifier_env *env,
985 			   struct bpf_func_state *state)
986 {
987 	struct bpf_reg_state *regs = state->regs;
988 	int i;
989 
990 	for (i = 0; i < MAX_BPF_REG; i++) {
991 		mark_reg_not_init(env, regs, i);
992 		regs[i].live = REG_LIVE_NONE;
993 		regs[i].parent = NULL;
994 	}
995 
996 	/* frame pointer */
997 	regs[BPF_REG_FP].type = PTR_TO_STACK;
998 	mark_reg_known_zero(env, regs, BPF_REG_FP);
999 	regs[BPF_REG_FP].frameno = state->frameno;
1000 
1001 	/* 1st arg to a function */
1002 	regs[BPF_REG_1].type = PTR_TO_CTX;
1003 	mark_reg_known_zero(env, regs, BPF_REG_1);
1004 }
1005 
1006 #define BPF_MAIN_FUNC (-1)
1007 static void init_func_state(struct bpf_verifier_env *env,
1008 			    struct bpf_func_state *state,
1009 			    int callsite, int frameno, int subprogno)
1010 {
1011 	state->callsite = callsite;
1012 	state->frameno = frameno;
1013 	state->subprogno = subprogno;
1014 	init_reg_state(env, state);
1015 }
1016 
1017 enum reg_arg_type {
1018 	SRC_OP,		/* register is used as source operand */
1019 	DST_OP,		/* register is used as destination operand */
1020 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1021 };
1022 
1023 static int cmp_subprogs(const void *a, const void *b)
1024 {
1025 	return ((struct bpf_subprog_info *)a)->start -
1026 	       ((struct bpf_subprog_info *)b)->start;
1027 }
1028 
1029 static int find_subprog(struct bpf_verifier_env *env, int off)
1030 {
1031 	struct bpf_subprog_info *p;
1032 
1033 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1034 		    sizeof(env->subprog_info[0]), cmp_subprogs);
1035 	if (!p)
1036 		return -ENOENT;
1037 	return p - env->subprog_info;
1038 
1039 }
1040 
1041 static int add_subprog(struct bpf_verifier_env *env, int off)
1042 {
1043 	int insn_cnt = env->prog->len;
1044 	int ret;
1045 
1046 	if (off >= insn_cnt || off < 0) {
1047 		verbose(env, "call to invalid destination\n");
1048 		return -EINVAL;
1049 	}
1050 	ret = find_subprog(env, off);
1051 	if (ret >= 0)
1052 		return 0;
1053 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1054 		verbose(env, "too many subprograms\n");
1055 		return -E2BIG;
1056 	}
1057 	env->subprog_info[env->subprog_cnt++].start = off;
1058 	sort(env->subprog_info, env->subprog_cnt,
1059 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1060 	return 0;
1061 }
1062 
1063 static int check_subprogs(struct bpf_verifier_env *env)
1064 {
1065 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1066 	struct bpf_subprog_info *subprog = env->subprog_info;
1067 	struct bpf_insn *insn = env->prog->insnsi;
1068 	int insn_cnt = env->prog->len;
1069 
1070 	/* Add entry function. */
1071 	ret = add_subprog(env, 0);
1072 	if (ret < 0)
1073 		return ret;
1074 
1075 	/* determine subprog starts. The end is one before the next starts */
1076 	for (i = 0; i < insn_cnt; i++) {
1077 		if (insn[i].code != (BPF_JMP | BPF_CALL))
1078 			continue;
1079 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1080 			continue;
1081 		if (!env->allow_ptr_leaks) {
1082 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
1083 			return -EPERM;
1084 		}
1085 		ret = add_subprog(env, i + insn[i].imm + 1);
1086 		if (ret < 0)
1087 			return ret;
1088 	}
1089 
1090 	/* Add a fake 'exit' subprog which could simplify subprog iteration
1091 	 * logic. 'subprog_cnt' should not be increased.
1092 	 */
1093 	subprog[env->subprog_cnt].start = insn_cnt;
1094 
1095 	if (env->log.level > 1)
1096 		for (i = 0; i < env->subprog_cnt; i++)
1097 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1098 
1099 	/* now check that all jumps are within the same subprog */
1100 	subprog_start = subprog[cur_subprog].start;
1101 	subprog_end = subprog[cur_subprog + 1].start;
1102 	for (i = 0; i < insn_cnt; i++) {
1103 		u8 code = insn[i].code;
1104 
1105 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1106 			goto next;
1107 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1108 			goto next;
1109 		off = i + insn[i].off + 1;
1110 		if (off < subprog_start || off >= subprog_end) {
1111 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
1112 			return -EINVAL;
1113 		}
1114 next:
1115 		if (i == subprog_end - 1) {
1116 			/* to avoid fall-through from one subprog into another
1117 			 * the last insn of the subprog should be either exit
1118 			 * or unconditional jump back
1119 			 */
1120 			if (code != (BPF_JMP | BPF_EXIT) &&
1121 			    code != (BPF_JMP | BPF_JA)) {
1122 				verbose(env, "last insn is not an exit or jmp\n");
1123 				return -EINVAL;
1124 			}
1125 			subprog_start = subprog_end;
1126 			cur_subprog++;
1127 			if (cur_subprog < env->subprog_cnt)
1128 				subprog_end = subprog[cur_subprog + 1].start;
1129 		}
1130 	}
1131 	return 0;
1132 }
1133 
1134 /* Parentage chain of this register (or stack slot) should take care of all
1135  * issues like callee-saved registers, stack slot allocation time, etc.
1136  */
1137 static int mark_reg_read(struct bpf_verifier_env *env,
1138 			 const struct bpf_reg_state *state,
1139 			 struct bpf_reg_state *parent)
1140 {
1141 	bool writes = parent == state->parent; /* Observe write marks */
1142 
1143 	while (parent) {
1144 		/* if read wasn't screened by an earlier write ... */
1145 		if (writes && state->live & REG_LIVE_WRITTEN)
1146 			break;
1147 		if (parent->live & REG_LIVE_DONE) {
1148 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1149 				reg_type_str[parent->type],
1150 				parent->var_off.value, parent->off);
1151 			return -EFAULT;
1152 		}
1153 		/* ... then we depend on parent's value */
1154 		parent->live |= REG_LIVE_READ;
1155 		state = parent;
1156 		parent = state->parent;
1157 		writes = true;
1158 	}
1159 	return 0;
1160 }
1161 
1162 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1163 			 enum reg_arg_type t)
1164 {
1165 	struct bpf_verifier_state *vstate = env->cur_state;
1166 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1167 	struct bpf_reg_state *regs = state->regs;
1168 
1169 	if (regno >= MAX_BPF_REG) {
1170 		verbose(env, "R%d is invalid\n", regno);
1171 		return -EINVAL;
1172 	}
1173 
1174 	if (t == SRC_OP) {
1175 		/* check whether register used as source operand can be read */
1176 		if (regs[regno].type == NOT_INIT) {
1177 			verbose(env, "R%d !read_ok\n", regno);
1178 			return -EACCES;
1179 		}
1180 		/* We don't need to worry about FP liveness because it's read-only */
1181 		if (regno != BPF_REG_FP)
1182 			return mark_reg_read(env, &regs[regno],
1183 					     regs[regno].parent);
1184 	} else {
1185 		/* check whether register used as dest operand can be written to */
1186 		if (regno == BPF_REG_FP) {
1187 			verbose(env, "frame pointer is read only\n");
1188 			return -EACCES;
1189 		}
1190 		regs[regno].live |= REG_LIVE_WRITTEN;
1191 		if (t == DST_OP)
1192 			mark_reg_unknown(env, regs, regno);
1193 	}
1194 	return 0;
1195 }
1196 
1197 static bool is_spillable_regtype(enum bpf_reg_type type)
1198 {
1199 	switch (type) {
1200 	case PTR_TO_MAP_VALUE:
1201 	case PTR_TO_MAP_VALUE_OR_NULL:
1202 	case PTR_TO_STACK:
1203 	case PTR_TO_CTX:
1204 	case PTR_TO_PACKET:
1205 	case PTR_TO_PACKET_META:
1206 	case PTR_TO_PACKET_END:
1207 	case PTR_TO_FLOW_KEYS:
1208 	case CONST_PTR_TO_MAP:
1209 	case PTR_TO_SOCKET:
1210 	case PTR_TO_SOCKET_OR_NULL:
1211 	case PTR_TO_SOCK_COMMON:
1212 	case PTR_TO_SOCK_COMMON_OR_NULL:
1213 	case PTR_TO_TCP_SOCK:
1214 	case PTR_TO_TCP_SOCK_OR_NULL:
1215 		return true;
1216 	default:
1217 		return false;
1218 	}
1219 }
1220 
1221 /* Does this register contain a constant zero? */
1222 static bool register_is_null(struct bpf_reg_state *reg)
1223 {
1224 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1225 }
1226 
1227 /* check_stack_read/write functions track spill/fill of registers,
1228  * stack boundary and alignment are checked in check_mem_access()
1229  */
1230 static int check_stack_write(struct bpf_verifier_env *env,
1231 			     struct bpf_func_state *state, /* func where register points to */
1232 			     int off, int size, int value_regno, int insn_idx)
1233 {
1234 	struct bpf_func_state *cur; /* state of the current function */
1235 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1236 	enum bpf_reg_type type;
1237 
1238 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1239 				 state->acquired_refs, true);
1240 	if (err)
1241 		return err;
1242 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1243 	 * so it's aligned access and [off, off + size) are within stack limits
1244 	 */
1245 	if (!env->allow_ptr_leaks &&
1246 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
1247 	    size != BPF_REG_SIZE) {
1248 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
1249 		return -EACCES;
1250 	}
1251 
1252 	cur = env->cur_state->frame[env->cur_state->curframe];
1253 	if (value_regno >= 0 &&
1254 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
1255 
1256 		/* register containing pointer is being spilled into stack */
1257 		if (size != BPF_REG_SIZE) {
1258 			verbose(env, "invalid size of register spill\n");
1259 			return -EACCES;
1260 		}
1261 
1262 		if (state != cur && type == PTR_TO_STACK) {
1263 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1264 			return -EINVAL;
1265 		}
1266 
1267 		/* save register state */
1268 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
1269 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1270 
1271 		for (i = 0; i < BPF_REG_SIZE; i++) {
1272 			if (state->stack[spi].slot_type[i] == STACK_MISC &&
1273 			    !env->allow_ptr_leaks) {
1274 				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1275 				int soff = (-spi - 1) * BPF_REG_SIZE;
1276 
1277 				/* detected reuse of integer stack slot with a pointer
1278 				 * which means either llvm is reusing stack slot or
1279 				 * an attacker is trying to exploit CVE-2018-3639
1280 				 * (speculative store bypass)
1281 				 * Have to sanitize that slot with preemptive
1282 				 * store of zero.
1283 				 */
1284 				if (*poff && *poff != soff) {
1285 					/* disallow programs where single insn stores
1286 					 * into two different stack slots, since verifier
1287 					 * cannot sanitize them
1288 					 */
1289 					verbose(env,
1290 						"insn %d cannot access two stack slots fp%d and fp%d",
1291 						insn_idx, *poff, soff);
1292 					return -EINVAL;
1293 				}
1294 				*poff = soff;
1295 			}
1296 			state->stack[spi].slot_type[i] = STACK_SPILL;
1297 		}
1298 	} else {
1299 		u8 type = STACK_MISC;
1300 
1301 		/* regular write of data into stack destroys any spilled ptr */
1302 		state->stack[spi].spilled_ptr.type = NOT_INIT;
1303 		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1304 		if (state->stack[spi].slot_type[0] == STACK_SPILL)
1305 			for (i = 0; i < BPF_REG_SIZE; i++)
1306 				state->stack[spi].slot_type[i] = STACK_MISC;
1307 
1308 		/* only mark the slot as written if all 8 bytes were written
1309 		 * otherwise read propagation may incorrectly stop too soon
1310 		 * when stack slots are partially written.
1311 		 * This heuristic means that read propagation will be
1312 		 * conservative, since it will add reg_live_read marks
1313 		 * to stack slots all the way to first state when programs
1314 		 * writes+reads less than 8 bytes
1315 		 */
1316 		if (size == BPF_REG_SIZE)
1317 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1318 
1319 		/* when we zero initialize stack slots mark them as such */
1320 		if (value_regno >= 0 &&
1321 		    register_is_null(&cur->regs[value_regno]))
1322 			type = STACK_ZERO;
1323 
1324 		/* Mark slots affected by this stack write. */
1325 		for (i = 0; i < size; i++)
1326 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1327 				type;
1328 	}
1329 	return 0;
1330 }
1331 
1332 static int check_stack_read(struct bpf_verifier_env *env,
1333 			    struct bpf_func_state *reg_state /* func where register points to */,
1334 			    int off, int size, int value_regno)
1335 {
1336 	struct bpf_verifier_state *vstate = env->cur_state;
1337 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1338 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1339 	u8 *stype;
1340 
1341 	if (reg_state->allocated_stack <= slot) {
1342 		verbose(env, "invalid read from stack off %d+0 size %d\n",
1343 			off, size);
1344 		return -EACCES;
1345 	}
1346 	stype = reg_state->stack[spi].slot_type;
1347 
1348 	if (stype[0] == STACK_SPILL) {
1349 		if (size != BPF_REG_SIZE) {
1350 			verbose(env, "invalid size of register spill\n");
1351 			return -EACCES;
1352 		}
1353 		for (i = 1; i < BPF_REG_SIZE; i++) {
1354 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1355 				verbose(env, "corrupted spill memory\n");
1356 				return -EACCES;
1357 			}
1358 		}
1359 
1360 		if (value_regno >= 0) {
1361 			/* restore register state from stack */
1362 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1363 			/* mark reg as written since spilled pointer state likely
1364 			 * has its liveness marks cleared by is_state_visited()
1365 			 * which resets stack/reg liveness for state transitions
1366 			 */
1367 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1368 		}
1369 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1370 			      reg_state->stack[spi].spilled_ptr.parent);
1371 		return 0;
1372 	} else {
1373 		int zeros = 0;
1374 
1375 		for (i = 0; i < size; i++) {
1376 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1377 				continue;
1378 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1379 				zeros++;
1380 				continue;
1381 			}
1382 			verbose(env, "invalid read from stack off %d+%d size %d\n",
1383 				off, i, size);
1384 			return -EACCES;
1385 		}
1386 		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1387 			      reg_state->stack[spi].spilled_ptr.parent);
1388 		if (value_regno >= 0) {
1389 			if (zeros == size) {
1390 				/* any size read into register is zero extended,
1391 				 * so the whole register == const_zero
1392 				 */
1393 				__mark_reg_const_zero(&state->regs[value_regno]);
1394 			} else {
1395 				/* have read misc data from the stack */
1396 				mark_reg_unknown(env, state->regs, value_regno);
1397 			}
1398 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1399 		}
1400 		return 0;
1401 	}
1402 }
1403 
1404 static int check_stack_access(struct bpf_verifier_env *env,
1405 			      const struct bpf_reg_state *reg,
1406 			      int off, int size)
1407 {
1408 	/* Stack accesses must be at a fixed offset, so that we
1409 	 * can determine what type of data were returned. See
1410 	 * check_stack_read().
1411 	 */
1412 	if (!tnum_is_const(reg->var_off)) {
1413 		char tn_buf[48];
1414 
1415 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1416 		verbose(env, "variable stack access var_off=%s off=%d size=%d",
1417 			tn_buf, off, size);
1418 		return -EACCES;
1419 	}
1420 
1421 	if (off >= 0 || off < -MAX_BPF_STACK) {
1422 		verbose(env, "invalid stack off=%d size=%d\n", off, size);
1423 		return -EACCES;
1424 	}
1425 
1426 	return 0;
1427 }
1428 
1429 /* check read/write into map element returned by bpf_map_lookup_elem() */
1430 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1431 			      int size, bool zero_size_allowed)
1432 {
1433 	struct bpf_reg_state *regs = cur_regs(env);
1434 	struct bpf_map *map = regs[regno].map_ptr;
1435 
1436 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1437 	    off + size > map->value_size) {
1438 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1439 			map->value_size, off, size);
1440 		return -EACCES;
1441 	}
1442 	return 0;
1443 }
1444 
1445 /* check read/write into a map element with possible variable offset */
1446 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1447 			    int off, int size, bool zero_size_allowed)
1448 {
1449 	struct bpf_verifier_state *vstate = env->cur_state;
1450 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1451 	struct bpf_reg_state *reg = &state->regs[regno];
1452 	int err;
1453 
1454 	/* We may have adjusted the register to this map value, so we
1455 	 * need to try adding each of min_value and max_value to off
1456 	 * to make sure our theoretical access will be safe.
1457 	 */
1458 	if (env->log.level)
1459 		print_verifier_state(env, state);
1460 
1461 	/* The minimum value is only important with signed
1462 	 * comparisons where we can't assume the floor of a
1463 	 * value is 0.  If we are using signed variables for our
1464 	 * index'es we need to make sure that whatever we use
1465 	 * will have a set floor within our range.
1466 	 */
1467 	if (reg->smin_value < 0 &&
1468 	    (reg->smin_value == S64_MIN ||
1469 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
1470 	      reg->smin_value + off < 0)) {
1471 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1472 			regno);
1473 		return -EACCES;
1474 	}
1475 	err = __check_map_access(env, regno, reg->smin_value + off, size,
1476 				 zero_size_allowed);
1477 	if (err) {
1478 		verbose(env, "R%d min value is outside of the array range\n",
1479 			regno);
1480 		return err;
1481 	}
1482 
1483 	/* If we haven't set a max value then we need to bail since we can't be
1484 	 * sure we won't do bad things.
1485 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
1486 	 */
1487 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1488 		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1489 			regno);
1490 		return -EACCES;
1491 	}
1492 	err = __check_map_access(env, regno, reg->umax_value + off, size,
1493 				 zero_size_allowed);
1494 	if (err)
1495 		verbose(env, "R%d max value is outside of the array range\n",
1496 			regno);
1497 
1498 	if (map_value_has_spin_lock(reg->map_ptr)) {
1499 		u32 lock = reg->map_ptr->spin_lock_off;
1500 
1501 		/* if any part of struct bpf_spin_lock can be touched by
1502 		 * load/store reject this program.
1503 		 * To check that [x1, x2) overlaps with [y1, y2)
1504 		 * it is sufficient to check x1 < y2 && y1 < x2.
1505 		 */
1506 		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1507 		     lock < reg->umax_value + off + size) {
1508 			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1509 			return -EACCES;
1510 		}
1511 	}
1512 	return err;
1513 }
1514 
1515 #define MAX_PACKET_OFF 0xffff
1516 
1517 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1518 				       const struct bpf_call_arg_meta *meta,
1519 				       enum bpf_access_type t)
1520 {
1521 	switch (env->prog->type) {
1522 	/* Program types only with direct read access go here! */
1523 	case BPF_PROG_TYPE_LWT_IN:
1524 	case BPF_PROG_TYPE_LWT_OUT:
1525 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1526 	case BPF_PROG_TYPE_SK_REUSEPORT:
1527 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
1528 	case BPF_PROG_TYPE_CGROUP_SKB:
1529 		if (t == BPF_WRITE)
1530 			return false;
1531 		/* fallthrough */
1532 
1533 	/* Program types with direct read + write access go here! */
1534 	case BPF_PROG_TYPE_SCHED_CLS:
1535 	case BPF_PROG_TYPE_SCHED_ACT:
1536 	case BPF_PROG_TYPE_XDP:
1537 	case BPF_PROG_TYPE_LWT_XMIT:
1538 	case BPF_PROG_TYPE_SK_SKB:
1539 	case BPF_PROG_TYPE_SK_MSG:
1540 		if (meta)
1541 			return meta->pkt_access;
1542 
1543 		env->seen_direct_write = true;
1544 		return true;
1545 	default:
1546 		return false;
1547 	}
1548 }
1549 
1550 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1551 				 int off, int size, bool zero_size_allowed)
1552 {
1553 	struct bpf_reg_state *regs = cur_regs(env);
1554 	struct bpf_reg_state *reg = &regs[regno];
1555 
1556 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1557 	    (u64)off + size > reg->range) {
1558 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1559 			off, size, regno, reg->id, reg->off, reg->range);
1560 		return -EACCES;
1561 	}
1562 	return 0;
1563 }
1564 
1565 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1566 			       int size, bool zero_size_allowed)
1567 {
1568 	struct bpf_reg_state *regs = cur_regs(env);
1569 	struct bpf_reg_state *reg = &regs[regno];
1570 	int err;
1571 
1572 	/* We may have added a variable offset to the packet pointer; but any
1573 	 * reg->range we have comes after that.  We are only checking the fixed
1574 	 * offset.
1575 	 */
1576 
1577 	/* We don't allow negative numbers, because we aren't tracking enough
1578 	 * detail to prove they're safe.
1579 	 */
1580 	if (reg->smin_value < 0) {
1581 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1582 			regno);
1583 		return -EACCES;
1584 	}
1585 	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1586 	if (err) {
1587 		verbose(env, "R%d offset is outside of the packet\n", regno);
1588 		return err;
1589 	}
1590 
1591 	/* __check_packet_access has made sure "off + size - 1" is within u16.
1592 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1593 	 * otherwise find_good_pkt_pointers would have refused to set range info
1594 	 * that __check_packet_access would have rejected this pkt access.
1595 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1596 	 */
1597 	env->prog->aux->max_pkt_offset =
1598 		max_t(u32, env->prog->aux->max_pkt_offset,
1599 		      off + reg->umax_value + size - 1);
1600 
1601 	return err;
1602 }
1603 
1604 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
1605 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1606 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
1607 {
1608 	struct bpf_insn_access_aux info = {
1609 		.reg_type = *reg_type,
1610 	};
1611 
1612 	if (env->ops->is_valid_access &&
1613 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1614 		/* A non zero info.ctx_field_size indicates that this field is a
1615 		 * candidate for later verifier transformation to load the whole
1616 		 * field and then apply a mask when accessed with a narrower
1617 		 * access than actual ctx access size. A zero info.ctx_field_size
1618 		 * will only allow for whole field access and rejects any other
1619 		 * type of narrower access.
1620 		 */
1621 		*reg_type = info.reg_type;
1622 
1623 		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1624 		/* remember the offset of last byte accessed in ctx */
1625 		if (env->prog->aux->max_ctx_offset < off + size)
1626 			env->prog->aux->max_ctx_offset = off + size;
1627 		return 0;
1628 	}
1629 
1630 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1631 	return -EACCES;
1632 }
1633 
1634 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1635 				  int size)
1636 {
1637 	if (size < 0 || off < 0 ||
1638 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
1639 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
1640 			off, size);
1641 		return -EACCES;
1642 	}
1643 	return 0;
1644 }
1645 
1646 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1647 			     u32 regno, int off, int size,
1648 			     enum bpf_access_type t)
1649 {
1650 	struct bpf_reg_state *regs = cur_regs(env);
1651 	struct bpf_reg_state *reg = &regs[regno];
1652 	struct bpf_insn_access_aux info = {};
1653 	bool valid;
1654 
1655 	if (reg->smin_value < 0) {
1656 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1657 			regno);
1658 		return -EACCES;
1659 	}
1660 
1661 	switch (reg->type) {
1662 	case PTR_TO_SOCK_COMMON:
1663 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1664 		break;
1665 	case PTR_TO_SOCKET:
1666 		valid = bpf_sock_is_valid_access(off, size, t, &info);
1667 		break;
1668 	case PTR_TO_TCP_SOCK:
1669 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1670 		break;
1671 	default:
1672 		valid = false;
1673 	}
1674 
1675 
1676 	if (valid) {
1677 		env->insn_aux_data[insn_idx].ctx_field_size =
1678 			info.ctx_field_size;
1679 		return 0;
1680 	}
1681 
1682 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
1683 		regno, reg_type_str[reg->type], off, size);
1684 
1685 	return -EACCES;
1686 }
1687 
1688 static bool __is_pointer_value(bool allow_ptr_leaks,
1689 			       const struct bpf_reg_state *reg)
1690 {
1691 	if (allow_ptr_leaks)
1692 		return false;
1693 
1694 	return reg->type != SCALAR_VALUE;
1695 }
1696 
1697 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1698 {
1699 	return cur_regs(env) + regno;
1700 }
1701 
1702 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1703 {
1704 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
1705 }
1706 
1707 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1708 {
1709 	const struct bpf_reg_state *reg = reg_state(env, regno);
1710 
1711 	return reg->type == PTR_TO_CTX;
1712 }
1713 
1714 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1715 {
1716 	const struct bpf_reg_state *reg = reg_state(env, regno);
1717 
1718 	return type_is_sk_pointer(reg->type);
1719 }
1720 
1721 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1722 {
1723 	const struct bpf_reg_state *reg = reg_state(env, regno);
1724 
1725 	return type_is_pkt_pointer(reg->type);
1726 }
1727 
1728 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1729 {
1730 	const struct bpf_reg_state *reg = reg_state(env, regno);
1731 
1732 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1733 	return reg->type == PTR_TO_FLOW_KEYS;
1734 }
1735 
1736 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1737 				   const struct bpf_reg_state *reg,
1738 				   int off, int size, bool strict)
1739 {
1740 	struct tnum reg_off;
1741 	int ip_align;
1742 
1743 	/* Byte size accesses are always allowed. */
1744 	if (!strict || size == 1)
1745 		return 0;
1746 
1747 	/* For platforms that do not have a Kconfig enabling
1748 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1749 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
1750 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1751 	 * to this code only in strict mode where we want to emulate
1752 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
1753 	 * unconditional IP align value of '2'.
1754 	 */
1755 	ip_align = 2;
1756 
1757 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1758 	if (!tnum_is_aligned(reg_off, size)) {
1759 		char tn_buf[48];
1760 
1761 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1762 		verbose(env,
1763 			"misaligned packet access off %d+%s+%d+%d size %d\n",
1764 			ip_align, tn_buf, reg->off, off, size);
1765 		return -EACCES;
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1772 				       const struct bpf_reg_state *reg,
1773 				       const char *pointer_desc,
1774 				       int off, int size, bool strict)
1775 {
1776 	struct tnum reg_off;
1777 
1778 	/* Byte size accesses are always allowed. */
1779 	if (!strict || size == 1)
1780 		return 0;
1781 
1782 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1783 	if (!tnum_is_aligned(reg_off, size)) {
1784 		char tn_buf[48];
1785 
1786 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1787 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1788 			pointer_desc, tn_buf, reg->off, off, size);
1789 		return -EACCES;
1790 	}
1791 
1792 	return 0;
1793 }
1794 
1795 static int check_ptr_alignment(struct bpf_verifier_env *env,
1796 			       const struct bpf_reg_state *reg, int off,
1797 			       int size, bool strict_alignment_once)
1798 {
1799 	bool strict = env->strict_alignment || strict_alignment_once;
1800 	const char *pointer_desc = "";
1801 
1802 	switch (reg->type) {
1803 	case PTR_TO_PACKET:
1804 	case PTR_TO_PACKET_META:
1805 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
1806 		 * right in front, treat it the very same way.
1807 		 */
1808 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
1809 	case PTR_TO_FLOW_KEYS:
1810 		pointer_desc = "flow keys ";
1811 		break;
1812 	case PTR_TO_MAP_VALUE:
1813 		pointer_desc = "value ";
1814 		break;
1815 	case PTR_TO_CTX:
1816 		pointer_desc = "context ";
1817 		break;
1818 	case PTR_TO_STACK:
1819 		pointer_desc = "stack ";
1820 		/* The stack spill tracking logic in check_stack_write()
1821 		 * and check_stack_read() relies on stack accesses being
1822 		 * aligned.
1823 		 */
1824 		strict = true;
1825 		break;
1826 	case PTR_TO_SOCKET:
1827 		pointer_desc = "sock ";
1828 		break;
1829 	case PTR_TO_SOCK_COMMON:
1830 		pointer_desc = "sock_common ";
1831 		break;
1832 	case PTR_TO_TCP_SOCK:
1833 		pointer_desc = "tcp_sock ";
1834 		break;
1835 	default:
1836 		break;
1837 	}
1838 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1839 					   strict);
1840 }
1841 
1842 static int update_stack_depth(struct bpf_verifier_env *env,
1843 			      const struct bpf_func_state *func,
1844 			      int off)
1845 {
1846 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
1847 
1848 	if (stack >= -off)
1849 		return 0;
1850 
1851 	/* update known max for given subprogram */
1852 	env->subprog_info[func->subprogno].stack_depth = -off;
1853 	return 0;
1854 }
1855 
1856 /* starting from main bpf function walk all instructions of the function
1857  * and recursively walk all callees that given function can call.
1858  * Ignore jump and exit insns.
1859  * Since recursion is prevented by check_cfg() this algorithm
1860  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1861  */
1862 static int check_max_stack_depth(struct bpf_verifier_env *env)
1863 {
1864 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1865 	struct bpf_subprog_info *subprog = env->subprog_info;
1866 	struct bpf_insn *insn = env->prog->insnsi;
1867 	int ret_insn[MAX_CALL_FRAMES];
1868 	int ret_prog[MAX_CALL_FRAMES];
1869 
1870 process_func:
1871 	/* round up to 32-bytes, since this is granularity
1872 	 * of interpreter stack size
1873 	 */
1874 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
1875 	if (depth > MAX_BPF_STACK) {
1876 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
1877 			frame + 1, depth);
1878 		return -EACCES;
1879 	}
1880 continue_func:
1881 	subprog_end = subprog[idx + 1].start;
1882 	for (; i < subprog_end; i++) {
1883 		if (insn[i].code != (BPF_JMP | BPF_CALL))
1884 			continue;
1885 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1886 			continue;
1887 		/* remember insn and function to return to */
1888 		ret_insn[frame] = i + 1;
1889 		ret_prog[frame] = idx;
1890 
1891 		/* find the callee */
1892 		i = i + insn[i].imm + 1;
1893 		idx = find_subprog(env, i);
1894 		if (idx < 0) {
1895 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1896 				  i);
1897 			return -EFAULT;
1898 		}
1899 		frame++;
1900 		if (frame >= MAX_CALL_FRAMES) {
1901 			verbose(env, "the call stack of %d frames is too deep !\n",
1902 				frame);
1903 			return -E2BIG;
1904 		}
1905 		goto process_func;
1906 	}
1907 	/* end of for() loop means the last insn of the 'subprog'
1908 	 * was reached. Doesn't matter whether it was JA or EXIT
1909 	 */
1910 	if (frame == 0)
1911 		return 0;
1912 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
1913 	frame--;
1914 	i = ret_insn[frame];
1915 	idx = ret_prog[frame];
1916 	goto continue_func;
1917 }
1918 
1919 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1920 static int get_callee_stack_depth(struct bpf_verifier_env *env,
1921 				  const struct bpf_insn *insn, int idx)
1922 {
1923 	int start = idx + insn->imm + 1, subprog;
1924 
1925 	subprog = find_subprog(env, start);
1926 	if (subprog < 0) {
1927 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1928 			  start);
1929 		return -EFAULT;
1930 	}
1931 	return env->subprog_info[subprog].stack_depth;
1932 }
1933 #endif
1934 
1935 static int check_ctx_reg(struct bpf_verifier_env *env,
1936 			 const struct bpf_reg_state *reg, int regno)
1937 {
1938 	/* Access to ctx or passing it to a helper is only allowed in
1939 	 * its original, unmodified form.
1940 	 */
1941 
1942 	if (reg->off) {
1943 		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1944 			regno, reg->off);
1945 		return -EACCES;
1946 	}
1947 
1948 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1949 		char tn_buf[48];
1950 
1951 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1952 		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1953 		return -EACCES;
1954 	}
1955 
1956 	return 0;
1957 }
1958 
1959 /* truncate register to smaller size (in bytes)
1960  * must be called with size < BPF_REG_SIZE
1961  */
1962 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1963 {
1964 	u64 mask;
1965 
1966 	/* clear high bits in bit representation */
1967 	reg->var_off = tnum_cast(reg->var_off, size);
1968 
1969 	/* fix arithmetic bounds */
1970 	mask = ((u64)1 << (size * 8)) - 1;
1971 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1972 		reg->umin_value &= mask;
1973 		reg->umax_value &= mask;
1974 	} else {
1975 		reg->umin_value = 0;
1976 		reg->umax_value = mask;
1977 	}
1978 	reg->smin_value = reg->umin_value;
1979 	reg->smax_value = reg->umax_value;
1980 }
1981 
1982 /* check whether memory at (regno + off) is accessible for t = (read | write)
1983  * if t==write, value_regno is a register which value is stored into memory
1984  * if t==read, value_regno is a register which will receive the value from memory
1985  * if t==write && value_regno==-1, some unknown value is stored into memory
1986  * if t==read && value_regno==-1, don't care what we read from memory
1987  */
1988 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1989 			    int off, int bpf_size, enum bpf_access_type t,
1990 			    int value_regno, bool strict_alignment_once)
1991 {
1992 	struct bpf_reg_state *regs = cur_regs(env);
1993 	struct bpf_reg_state *reg = regs + regno;
1994 	struct bpf_func_state *state;
1995 	int size, err = 0;
1996 
1997 	size = bpf_size_to_bytes(bpf_size);
1998 	if (size < 0)
1999 		return size;
2000 
2001 	/* alignment checks will add in reg->off themselves */
2002 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
2003 	if (err)
2004 		return err;
2005 
2006 	/* for access checks, reg->off is just part of off */
2007 	off += reg->off;
2008 
2009 	if (reg->type == PTR_TO_MAP_VALUE) {
2010 		if (t == BPF_WRITE && value_regno >= 0 &&
2011 		    is_pointer_value(env, value_regno)) {
2012 			verbose(env, "R%d leaks addr into map\n", value_regno);
2013 			return -EACCES;
2014 		}
2015 
2016 		err = check_map_access(env, regno, off, size, false);
2017 		if (!err && t == BPF_READ && value_regno >= 0)
2018 			mark_reg_unknown(env, regs, value_regno);
2019 
2020 	} else if (reg->type == PTR_TO_CTX) {
2021 		enum bpf_reg_type reg_type = SCALAR_VALUE;
2022 
2023 		if (t == BPF_WRITE && value_regno >= 0 &&
2024 		    is_pointer_value(env, value_regno)) {
2025 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
2026 			return -EACCES;
2027 		}
2028 
2029 		err = check_ctx_reg(env, reg, regno);
2030 		if (err < 0)
2031 			return err;
2032 
2033 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
2034 		if (!err && t == BPF_READ && value_regno >= 0) {
2035 			/* ctx access returns either a scalar, or a
2036 			 * PTR_TO_PACKET[_META,_END]. In the latter
2037 			 * case, we know the offset is zero.
2038 			 */
2039 			if (reg_type == SCALAR_VALUE) {
2040 				mark_reg_unknown(env, regs, value_regno);
2041 			} else {
2042 				mark_reg_known_zero(env, regs,
2043 						    value_regno);
2044 				if (reg_type_may_be_null(reg_type))
2045 					regs[value_regno].id = ++env->id_gen;
2046 			}
2047 			regs[value_regno].type = reg_type;
2048 		}
2049 
2050 	} else if (reg->type == PTR_TO_STACK) {
2051 		off += reg->var_off.value;
2052 		err = check_stack_access(env, reg, off, size);
2053 		if (err)
2054 			return err;
2055 
2056 		state = func(env, reg);
2057 		err = update_stack_depth(env, state, off);
2058 		if (err)
2059 			return err;
2060 
2061 		if (t == BPF_WRITE)
2062 			err = check_stack_write(env, state, off, size,
2063 						value_regno, insn_idx);
2064 		else
2065 			err = check_stack_read(env, state, off, size,
2066 					       value_regno);
2067 	} else if (reg_is_pkt_pointer(reg)) {
2068 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
2069 			verbose(env, "cannot write into packet\n");
2070 			return -EACCES;
2071 		}
2072 		if (t == BPF_WRITE && value_regno >= 0 &&
2073 		    is_pointer_value(env, value_regno)) {
2074 			verbose(env, "R%d leaks addr into packet\n",
2075 				value_regno);
2076 			return -EACCES;
2077 		}
2078 		err = check_packet_access(env, regno, off, size, false);
2079 		if (!err && t == BPF_READ && value_regno >= 0)
2080 			mark_reg_unknown(env, regs, value_regno);
2081 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
2082 		if (t == BPF_WRITE && value_regno >= 0 &&
2083 		    is_pointer_value(env, value_regno)) {
2084 			verbose(env, "R%d leaks addr into flow keys\n",
2085 				value_regno);
2086 			return -EACCES;
2087 		}
2088 
2089 		err = check_flow_keys_access(env, off, size);
2090 		if (!err && t == BPF_READ && value_regno >= 0)
2091 			mark_reg_unknown(env, regs, value_regno);
2092 	} else if (type_is_sk_pointer(reg->type)) {
2093 		if (t == BPF_WRITE) {
2094 			verbose(env, "R%d cannot write into %s\n",
2095 				regno, reg_type_str[reg->type]);
2096 			return -EACCES;
2097 		}
2098 		err = check_sock_access(env, insn_idx, regno, off, size, t);
2099 		if (!err && value_regno >= 0)
2100 			mark_reg_unknown(env, regs, value_regno);
2101 	} else {
2102 		verbose(env, "R%d invalid mem access '%s'\n", regno,
2103 			reg_type_str[reg->type]);
2104 		return -EACCES;
2105 	}
2106 
2107 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
2108 	    regs[value_regno].type == SCALAR_VALUE) {
2109 		/* b/h/w load zero-extends, mark upper bits as known 0 */
2110 		coerce_reg_to_size(&regs[value_regno], size);
2111 	}
2112 	return err;
2113 }
2114 
2115 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
2116 {
2117 	int err;
2118 
2119 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2120 	    insn->imm != 0) {
2121 		verbose(env, "BPF_XADD uses reserved fields\n");
2122 		return -EINVAL;
2123 	}
2124 
2125 	/* check src1 operand */
2126 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
2127 	if (err)
2128 		return err;
2129 
2130 	/* check src2 operand */
2131 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2132 	if (err)
2133 		return err;
2134 
2135 	if (is_pointer_value(env, insn->src_reg)) {
2136 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
2137 		return -EACCES;
2138 	}
2139 
2140 	if (is_ctx_reg(env, insn->dst_reg) ||
2141 	    is_pkt_reg(env, insn->dst_reg) ||
2142 	    is_flow_key_reg(env, insn->dst_reg) ||
2143 	    is_sk_reg(env, insn->dst_reg)) {
2144 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2145 			insn->dst_reg,
2146 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
2147 		return -EACCES;
2148 	}
2149 
2150 	/* check whether atomic_add can read the memory */
2151 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2152 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
2153 	if (err)
2154 		return err;
2155 
2156 	/* check whether atomic_add can write into the same memory */
2157 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2158 				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
2159 }
2160 
2161 /* when register 'regno' is passed into function that will read 'access_size'
2162  * bytes from that pointer, make sure that it's within stack boundary
2163  * and all elements of stack are initialized.
2164  * Unlike most pointer bounds-checking functions, this one doesn't take an
2165  * 'off' argument, so it has to add in reg->off itself.
2166  */
2167 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2168 				int access_size, bool zero_size_allowed,
2169 				struct bpf_call_arg_meta *meta)
2170 {
2171 	struct bpf_reg_state *reg = reg_state(env, regno);
2172 	struct bpf_func_state *state = func(env, reg);
2173 	int off, i, slot, spi;
2174 
2175 	if (reg->type != PTR_TO_STACK) {
2176 		/* Allow zero-byte read from NULL, regardless of pointer type */
2177 		if (zero_size_allowed && access_size == 0 &&
2178 		    register_is_null(reg))
2179 			return 0;
2180 
2181 		verbose(env, "R%d type=%s expected=%s\n", regno,
2182 			reg_type_str[reg->type],
2183 			reg_type_str[PTR_TO_STACK]);
2184 		return -EACCES;
2185 	}
2186 
2187 	/* Only allow fixed-offset stack reads */
2188 	if (!tnum_is_const(reg->var_off)) {
2189 		char tn_buf[48];
2190 
2191 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2192 		verbose(env, "invalid variable stack read R%d var_off=%s\n",
2193 			regno, tn_buf);
2194 		return -EACCES;
2195 	}
2196 	off = reg->off + reg->var_off.value;
2197 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
2198 	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
2199 		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
2200 			regno, off, access_size);
2201 		return -EACCES;
2202 	}
2203 
2204 	if (meta && meta->raw_mode) {
2205 		meta->access_size = access_size;
2206 		meta->regno = regno;
2207 		return 0;
2208 	}
2209 
2210 	for (i = 0; i < access_size; i++) {
2211 		u8 *stype;
2212 
2213 		slot = -(off + i) - 1;
2214 		spi = slot / BPF_REG_SIZE;
2215 		if (state->allocated_stack <= slot)
2216 			goto err;
2217 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2218 		if (*stype == STACK_MISC)
2219 			goto mark;
2220 		if (*stype == STACK_ZERO) {
2221 			/* helper can write anything into the stack */
2222 			*stype = STACK_MISC;
2223 			goto mark;
2224 		}
2225 err:
2226 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2227 			off, i, access_size);
2228 		return -EACCES;
2229 mark:
2230 		/* reading any byte out of 8-byte 'spill_slot' will cause
2231 		 * the whole slot to be marked as 'read'
2232 		 */
2233 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
2234 			      state->stack[spi].spilled_ptr.parent);
2235 	}
2236 	return update_stack_depth(env, state, off);
2237 }
2238 
2239 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2240 				   int access_size, bool zero_size_allowed,
2241 				   struct bpf_call_arg_meta *meta)
2242 {
2243 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2244 
2245 	switch (reg->type) {
2246 	case PTR_TO_PACKET:
2247 	case PTR_TO_PACKET_META:
2248 		return check_packet_access(env, regno, reg->off, access_size,
2249 					   zero_size_allowed);
2250 	case PTR_TO_MAP_VALUE:
2251 		return check_map_access(env, regno, reg->off, access_size,
2252 					zero_size_allowed);
2253 	default: /* scalar_value|ptr_to_stack or invalid ptr */
2254 		return check_stack_boundary(env, regno, access_size,
2255 					    zero_size_allowed, meta);
2256 	}
2257 }
2258 
2259 /* Implementation details:
2260  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2261  * Two bpf_map_lookups (even with the same key) will have different reg->id.
2262  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2263  * value_or_null->value transition, since the verifier only cares about
2264  * the range of access to valid map value pointer and doesn't care about actual
2265  * address of the map element.
2266  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2267  * reg->id > 0 after value_or_null->value transition. By doing so
2268  * two bpf_map_lookups will be considered two different pointers that
2269  * point to different bpf_spin_locks.
2270  * The verifier allows taking only one bpf_spin_lock at a time to avoid
2271  * dead-locks.
2272  * Since only one bpf_spin_lock is allowed the checks are simpler than
2273  * reg_is_refcounted() logic. The verifier needs to remember only
2274  * one spin_lock instead of array of acquired_refs.
2275  * cur_state->active_spin_lock remembers which map value element got locked
2276  * and clears it after bpf_spin_unlock.
2277  */
2278 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2279 			     bool is_lock)
2280 {
2281 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2282 	struct bpf_verifier_state *cur = env->cur_state;
2283 	bool is_const = tnum_is_const(reg->var_off);
2284 	struct bpf_map *map = reg->map_ptr;
2285 	u64 val = reg->var_off.value;
2286 
2287 	if (reg->type != PTR_TO_MAP_VALUE) {
2288 		verbose(env, "R%d is not a pointer to map_value\n", regno);
2289 		return -EINVAL;
2290 	}
2291 	if (!is_const) {
2292 		verbose(env,
2293 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2294 			regno);
2295 		return -EINVAL;
2296 	}
2297 	if (!map->btf) {
2298 		verbose(env,
2299 			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
2300 			map->name);
2301 		return -EINVAL;
2302 	}
2303 	if (!map_value_has_spin_lock(map)) {
2304 		if (map->spin_lock_off == -E2BIG)
2305 			verbose(env,
2306 				"map '%s' has more than one 'struct bpf_spin_lock'\n",
2307 				map->name);
2308 		else if (map->spin_lock_off == -ENOENT)
2309 			verbose(env,
2310 				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
2311 				map->name);
2312 		else
2313 			verbose(env,
2314 				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2315 				map->name);
2316 		return -EINVAL;
2317 	}
2318 	if (map->spin_lock_off != val + reg->off) {
2319 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2320 			val + reg->off);
2321 		return -EINVAL;
2322 	}
2323 	if (is_lock) {
2324 		if (cur->active_spin_lock) {
2325 			verbose(env,
2326 				"Locking two bpf_spin_locks are not allowed\n");
2327 			return -EINVAL;
2328 		}
2329 		cur->active_spin_lock = reg->id;
2330 	} else {
2331 		if (!cur->active_spin_lock) {
2332 			verbose(env, "bpf_spin_unlock without taking a lock\n");
2333 			return -EINVAL;
2334 		}
2335 		if (cur->active_spin_lock != reg->id) {
2336 			verbose(env, "bpf_spin_unlock of different lock\n");
2337 			return -EINVAL;
2338 		}
2339 		cur->active_spin_lock = 0;
2340 	}
2341 	return 0;
2342 }
2343 
2344 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2345 {
2346 	return type == ARG_PTR_TO_MEM ||
2347 	       type == ARG_PTR_TO_MEM_OR_NULL ||
2348 	       type == ARG_PTR_TO_UNINIT_MEM;
2349 }
2350 
2351 static bool arg_type_is_mem_size(enum bpf_arg_type type)
2352 {
2353 	return type == ARG_CONST_SIZE ||
2354 	       type == ARG_CONST_SIZE_OR_ZERO;
2355 }
2356 
2357 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2358 			  enum bpf_arg_type arg_type,
2359 			  struct bpf_call_arg_meta *meta)
2360 {
2361 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2362 	enum bpf_reg_type expected_type, type = reg->type;
2363 	int err = 0;
2364 
2365 	if (arg_type == ARG_DONTCARE)
2366 		return 0;
2367 
2368 	err = check_reg_arg(env, regno, SRC_OP);
2369 	if (err)
2370 		return err;
2371 
2372 	if (arg_type == ARG_ANYTHING) {
2373 		if (is_pointer_value(env, regno)) {
2374 			verbose(env, "R%d leaks addr into helper function\n",
2375 				regno);
2376 			return -EACCES;
2377 		}
2378 		return 0;
2379 	}
2380 
2381 	if (type_is_pkt_pointer(type) &&
2382 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
2383 		verbose(env, "helper access to the packet is not allowed\n");
2384 		return -EACCES;
2385 	}
2386 
2387 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
2388 	    arg_type == ARG_PTR_TO_MAP_VALUE ||
2389 	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2390 		expected_type = PTR_TO_STACK;
2391 		if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
2392 		    type != expected_type)
2393 			goto err_type;
2394 	} else if (arg_type == ARG_CONST_SIZE ||
2395 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
2396 		expected_type = SCALAR_VALUE;
2397 		if (type != expected_type)
2398 			goto err_type;
2399 	} else if (arg_type == ARG_CONST_MAP_PTR) {
2400 		expected_type = CONST_PTR_TO_MAP;
2401 		if (type != expected_type)
2402 			goto err_type;
2403 	} else if (arg_type == ARG_PTR_TO_CTX) {
2404 		expected_type = PTR_TO_CTX;
2405 		if (type != expected_type)
2406 			goto err_type;
2407 		err = check_ctx_reg(env, reg, regno);
2408 		if (err < 0)
2409 			return err;
2410 	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2411 		expected_type = PTR_TO_SOCK_COMMON;
2412 		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2413 		if (!type_is_sk_pointer(type))
2414 			goto err_type;
2415 		if (reg->ref_obj_id) {
2416 			if (meta->ref_obj_id) {
2417 				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
2418 					regno, reg->ref_obj_id,
2419 					meta->ref_obj_id);
2420 				return -EFAULT;
2421 			}
2422 			meta->ref_obj_id = reg->ref_obj_id;
2423 		}
2424 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2425 		if (meta->func_id == BPF_FUNC_spin_lock) {
2426 			if (process_spin_lock(env, regno, true))
2427 				return -EACCES;
2428 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
2429 			if (process_spin_lock(env, regno, false))
2430 				return -EACCES;
2431 		} else {
2432 			verbose(env, "verifier internal error\n");
2433 			return -EFAULT;
2434 		}
2435 	} else if (arg_type_is_mem_ptr(arg_type)) {
2436 		expected_type = PTR_TO_STACK;
2437 		/* One exception here. In case function allows for NULL to be
2438 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
2439 		 * happens during stack boundary checking.
2440 		 */
2441 		if (register_is_null(reg) &&
2442 		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
2443 			/* final test in check_stack_boundary() */;
2444 		else if (!type_is_pkt_pointer(type) &&
2445 			 type != PTR_TO_MAP_VALUE &&
2446 			 type != expected_type)
2447 			goto err_type;
2448 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
2449 	} else {
2450 		verbose(env, "unsupported arg_type %d\n", arg_type);
2451 		return -EFAULT;
2452 	}
2453 
2454 	if (arg_type == ARG_CONST_MAP_PTR) {
2455 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
2456 		meta->map_ptr = reg->map_ptr;
2457 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2458 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
2459 		 * check that [key, key + map->key_size) are within
2460 		 * stack limits and initialized
2461 		 */
2462 		if (!meta->map_ptr) {
2463 			/* in function declaration map_ptr must come before
2464 			 * map_key, so that it's verified and known before
2465 			 * we have to check map_key here. Otherwise it means
2466 			 * that kernel subsystem misconfigured verifier
2467 			 */
2468 			verbose(env, "invalid map_ptr to access map->key\n");
2469 			return -EACCES;
2470 		}
2471 		err = check_helper_mem_access(env, regno,
2472 					      meta->map_ptr->key_size, false,
2473 					      NULL);
2474 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2475 		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2476 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
2477 		 * check [value, value + map->value_size) validity
2478 		 */
2479 		if (!meta->map_ptr) {
2480 			/* kernel subsystem misconfigured verifier */
2481 			verbose(env, "invalid map_ptr to access map->value\n");
2482 			return -EACCES;
2483 		}
2484 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
2485 		err = check_helper_mem_access(env, regno,
2486 					      meta->map_ptr->value_size, false,
2487 					      meta);
2488 	} else if (arg_type_is_mem_size(arg_type)) {
2489 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
2490 
2491 		/* remember the mem_size which may be used later
2492 		 * to refine return values.
2493 		 */
2494 		meta->msize_smax_value = reg->smax_value;
2495 		meta->msize_umax_value = reg->umax_value;
2496 
2497 		/* The register is SCALAR_VALUE; the access check
2498 		 * happens using its boundaries.
2499 		 */
2500 		if (!tnum_is_const(reg->var_off))
2501 			/* For unprivileged variable accesses, disable raw
2502 			 * mode so that the program is required to
2503 			 * initialize all the memory that the helper could
2504 			 * just partially fill up.
2505 			 */
2506 			meta = NULL;
2507 
2508 		if (reg->smin_value < 0) {
2509 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
2510 				regno);
2511 			return -EACCES;
2512 		}
2513 
2514 		if (reg->umin_value == 0) {
2515 			err = check_helper_mem_access(env, regno - 1, 0,
2516 						      zero_size_allowed,
2517 						      meta);
2518 			if (err)
2519 				return err;
2520 		}
2521 
2522 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
2523 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
2524 				regno);
2525 			return -EACCES;
2526 		}
2527 		err = check_helper_mem_access(env, regno - 1,
2528 					      reg->umax_value,
2529 					      zero_size_allowed, meta);
2530 	}
2531 
2532 	return err;
2533 err_type:
2534 	verbose(env, "R%d type=%s expected=%s\n", regno,
2535 		reg_type_str[type], reg_type_str[expected_type]);
2536 	return -EACCES;
2537 }
2538 
2539 static int check_map_func_compatibility(struct bpf_verifier_env *env,
2540 					struct bpf_map *map, int func_id)
2541 {
2542 	if (!map)
2543 		return 0;
2544 
2545 	/* We need a two way check, first is from map perspective ... */
2546 	switch (map->map_type) {
2547 	case BPF_MAP_TYPE_PROG_ARRAY:
2548 		if (func_id != BPF_FUNC_tail_call)
2549 			goto error;
2550 		break;
2551 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2552 		if (func_id != BPF_FUNC_perf_event_read &&
2553 		    func_id != BPF_FUNC_perf_event_output &&
2554 		    func_id != BPF_FUNC_perf_event_read_value)
2555 			goto error;
2556 		break;
2557 	case BPF_MAP_TYPE_STACK_TRACE:
2558 		if (func_id != BPF_FUNC_get_stackid)
2559 			goto error;
2560 		break;
2561 	case BPF_MAP_TYPE_CGROUP_ARRAY:
2562 		if (func_id != BPF_FUNC_skb_under_cgroup &&
2563 		    func_id != BPF_FUNC_current_task_under_cgroup)
2564 			goto error;
2565 		break;
2566 	case BPF_MAP_TYPE_CGROUP_STORAGE:
2567 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
2568 		if (func_id != BPF_FUNC_get_local_storage)
2569 			goto error;
2570 		break;
2571 	/* devmap returns a pointer to a live net_device ifindex that we cannot
2572 	 * allow to be modified from bpf side. So do not allow lookup elements
2573 	 * for now.
2574 	 */
2575 	case BPF_MAP_TYPE_DEVMAP:
2576 		if (func_id != BPF_FUNC_redirect_map)
2577 			goto error;
2578 		break;
2579 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
2580 	 * appear.
2581 	 */
2582 	case BPF_MAP_TYPE_CPUMAP:
2583 	case BPF_MAP_TYPE_XSKMAP:
2584 		if (func_id != BPF_FUNC_redirect_map)
2585 			goto error;
2586 		break;
2587 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2588 	case BPF_MAP_TYPE_HASH_OF_MAPS:
2589 		if (func_id != BPF_FUNC_map_lookup_elem)
2590 			goto error;
2591 		break;
2592 	case BPF_MAP_TYPE_SOCKMAP:
2593 		if (func_id != BPF_FUNC_sk_redirect_map &&
2594 		    func_id != BPF_FUNC_sock_map_update &&
2595 		    func_id != BPF_FUNC_map_delete_elem &&
2596 		    func_id != BPF_FUNC_msg_redirect_map)
2597 			goto error;
2598 		break;
2599 	case BPF_MAP_TYPE_SOCKHASH:
2600 		if (func_id != BPF_FUNC_sk_redirect_hash &&
2601 		    func_id != BPF_FUNC_sock_hash_update &&
2602 		    func_id != BPF_FUNC_map_delete_elem &&
2603 		    func_id != BPF_FUNC_msg_redirect_hash)
2604 			goto error;
2605 		break;
2606 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2607 		if (func_id != BPF_FUNC_sk_select_reuseport)
2608 			goto error;
2609 		break;
2610 	case BPF_MAP_TYPE_QUEUE:
2611 	case BPF_MAP_TYPE_STACK:
2612 		if (func_id != BPF_FUNC_map_peek_elem &&
2613 		    func_id != BPF_FUNC_map_pop_elem &&
2614 		    func_id != BPF_FUNC_map_push_elem)
2615 			goto error;
2616 		break;
2617 	default:
2618 		break;
2619 	}
2620 
2621 	/* ... and second from the function itself. */
2622 	switch (func_id) {
2623 	case BPF_FUNC_tail_call:
2624 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2625 			goto error;
2626 		if (env->subprog_cnt > 1) {
2627 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2628 			return -EINVAL;
2629 		}
2630 		break;
2631 	case BPF_FUNC_perf_event_read:
2632 	case BPF_FUNC_perf_event_output:
2633 	case BPF_FUNC_perf_event_read_value:
2634 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2635 			goto error;
2636 		break;
2637 	case BPF_FUNC_get_stackid:
2638 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2639 			goto error;
2640 		break;
2641 	case BPF_FUNC_current_task_under_cgroup:
2642 	case BPF_FUNC_skb_under_cgroup:
2643 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2644 			goto error;
2645 		break;
2646 	case BPF_FUNC_redirect_map:
2647 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2648 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
2649 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
2650 			goto error;
2651 		break;
2652 	case BPF_FUNC_sk_redirect_map:
2653 	case BPF_FUNC_msg_redirect_map:
2654 	case BPF_FUNC_sock_map_update:
2655 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2656 			goto error;
2657 		break;
2658 	case BPF_FUNC_sk_redirect_hash:
2659 	case BPF_FUNC_msg_redirect_hash:
2660 	case BPF_FUNC_sock_hash_update:
2661 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
2662 			goto error;
2663 		break;
2664 	case BPF_FUNC_get_local_storage:
2665 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2666 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
2667 			goto error;
2668 		break;
2669 	case BPF_FUNC_sk_select_reuseport:
2670 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2671 			goto error;
2672 		break;
2673 	case BPF_FUNC_map_peek_elem:
2674 	case BPF_FUNC_map_pop_elem:
2675 	case BPF_FUNC_map_push_elem:
2676 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2677 		    map->map_type != BPF_MAP_TYPE_STACK)
2678 			goto error;
2679 		break;
2680 	default:
2681 		break;
2682 	}
2683 
2684 	return 0;
2685 error:
2686 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
2687 		map->map_type, func_id_name(func_id), func_id);
2688 	return -EINVAL;
2689 }
2690 
2691 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
2692 {
2693 	int count = 0;
2694 
2695 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2696 		count++;
2697 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2698 		count++;
2699 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2700 		count++;
2701 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2702 		count++;
2703 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2704 		count++;
2705 
2706 	/* We only support one arg being in raw mode at the moment,
2707 	 * which is sufficient for the helper functions we have
2708 	 * right now.
2709 	 */
2710 	return count <= 1;
2711 }
2712 
2713 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2714 				    enum bpf_arg_type arg_next)
2715 {
2716 	return (arg_type_is_mem_ptr(arg_curr) &&
2717 	        !arg_type_is_mem_size(arg_next)) ||
2718 	       (!arg_type_is_mem_ptr(arg_curr) &&
2719 		arg_type_is_mem_size(arg_next));
2720 }
2721 
2722 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2723 {
2724 	/* bpf_xxx(..., buf, len) call will access 'len'
2725 	 * bytes from memory 'buf'. Both arg types need
2726 	 * to be paired, so make sure there's no buggy
2727 	 * helper function specification.
2728 	 */
2729 	if (arg_type_is_mem_size(fn->arg1_type) ||
2730 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
2731 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2732 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2733 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2734 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2735 		return false;
2736 
2737 	return true;
2738 }
2739 
2740 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
2741 {
2742 	int count = 0;
2743 
2744 	if (arg_type_may_be_refcounted(fn->arg1_type))
2745 		count++;
2746 	if (arg_type_may_be_refcounted(fn->arg2_type))
2747 		count++;
2748 	if (arg_type_may_be_refcounted(fn->arg3_type))
2749 		count++;
2750 	if (arg_type_may_be_refcounted(fn->arg4_type))
2751 		count++;
2752 	if (arg_type_may_be_refcounted(fn->arg5_type))
2753 		count++;
2754 
2755 	/* A reference acquiring function cannot acquire
2756 	 * another refcounted ptr.
2757 	 */
2758 	if (is_acquire_function(func_id) && count)
2759 		return false;
2760 
2761 	/* We only support one arg being unreferenced at the moment,
2762 	 * which is sufficient for the helper functions we have right now.
2763 	 */
2764 	return count <= 1;
2765 }
2766 
2767 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
2768 {
2769 	return check_raw_mode_ok(fn) &&
2770 	       check_arg_pair_ok(fn) &&
2771 	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
2772 }
2773 
2774 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2775  * are now invalid, so turn them into unknown SCALAR_VALUE.
2776  */
2777 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2778 				     struct bpf_func_state *state)
2779 {
2780 	struct bpf_reg_state *regs = state->regs, *reg;
2781 	int i;
2782 
2783 	for (i = 0; i < MAX_BPF_REG; i++)
2784 		if (reg_is_pkt_pointer_any(&regs[i]))
2785 			mark_reg_unknown(env, regs, i);
2786 
2787 	bpf_for_each_spilled_reg(i, state, reg) {
2788 		if (!reg)
2789 			continue;
2790 		if (reg_is_pkt_pointer_any(reg))
2791 			__mark_reg_unknown(reg);
2792 	}
2793 }
2794 
2795 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2796 {
2797 	struct bpf_verifier_state *vstate = env->cur_state;
2798 	int i;
2799 
2800 	for (i = 0; i <= vstate->curframe; i++)
2801 		__clear_all_pkt_pointers(env, vstate->frame[i]);
2802 }
2803 
2804 static void release_reg_references(struct bpf_verifier_env *env,
2805 				   struct bpf_func_state *state,
2806 				   int ref_obj_id)
2807 {
2808 	struct bpf_reg_state *regs = state->regs, *reg;
2809 	int i;
2810 
2811 	for (i = 0; i < MAX_BPF_REG; i++)
2812 		if (regs[i].ref_obj_id == ref_obj_id)
2813 			mark_reg_unknown(env, regs, i);
2814 
2815 	bpf_for_each_spilled_reg(i, state, reg) {
2816 		if (!reg)
2817 			continue;
2818 		if (reg->ref_obj_id == ref_obj_id)
2819 			__mark_reg_unknown(reg);
2820 	}
2821 }
2822 
2823 /* The pointer with the specified id has released its reference to kernel
2824  * resources. Identify all copies of the same pointer and clear the reference.
2825  */
2826 static int release_reference(struct bpf_verifier_env *env,
2827 			     int ref_obj_id)
2828 {
2829 	struct bpf_verifier_state *vstate = env->cur_state;
2830 	int err;
2831 	int i;
2832 
2833 	err = release_reference_state(cur_func(env), ref_obj_id);
2834 	if (err)
2835 		return err;
2836 
2837 	for (i = 0; i <= vstate->curframe; i++)
2838 		release_reg_references(env, vstate->frame[i], ref_obj_id);
2839 
2840 	return 0;
2841 }
2842 
2843 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2844 			   int *insn_idx)
2845 {
2846 	struct bpf_verifier_state *state = env->cur_state;
2847 	struct bpf_func_state *caller, *callee;
2848 	int i, err, subprog, target_insn;
2849 
2850 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
2851 		verbose(env, "the call stack of %d frames is too deep\n",
2852 			state->curframe + 2);
2853 		return -E2BIG;
2854 	}
2855 
2856 	target_insn = *insn_idx + insn->imm;
2857 	subprog = find_subprog(env, target_insn + 1);
2858 	if (subprog < 0) {
2859 		verbose(env, "verifier bug. No program starts at insn %d\n",
2860 			target_insn + 1);
2861 		return -EFAULT;
2862 	}
2863 
2864 	caller = state->frame[state->curframe];
2865 	if (state->frame[state->curframe + 1]) {
2866 		verbose(env, "verifier bug. Frame %d already allocated\n",
2867 			state->curframe + 1);
2868 		return -EFAULT;
2869 	}
2870 
2871 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2872 	if (!callee)
2873 		return -ENOMEM;
2874 	state->frame[state->curframe + 1] = callee;
2875 
2876 	/* callee cannot access r0, r6 - r9 for reading and has to write
2877 	 * into its own stack before reading from it.
2878 	 * callee can read/write into caller's stack
2879 	 */
2880 	init_func_state(env, callee,
2881 			/* remember the callsite, it will be used by bpf_exit */
2882 			*insn_idx /* callsite */,
2883 			state->curframe + 1 /* frameno within this callchain */,
2884 			subprog /* subprog number within this prog */);
2885 
2886 	/* Transfer references to the callee */
2887 	err = transfer_reference_state(callee, caller);
2888 	if (err)
2889 		return err;
2890 
2891 	/* copy r1 - r5 args that callee can access.  The copy includes parent
2892 	 * pointers, which connects us up to the liveness chain
2893 	 */
2894 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2895 		callee->regs[i] = caller->regs[i];
2896 
2897 	/* after the call registers r0 - r5 were scratched */
2898 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
2899 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
2900 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2901 	}
2902 
2903 	/* only increment it after check_reg_arg() finished */
2904 	state->curframe++;
2905 
2906 	/* and go analyze first insn of the callee */
2907 	*insn_idx = target_insn;
2908 
2909 	if (env->log.level) {
2910 		verbose(env, "caller:\n");
2911 		print_verifier_state(env, caller);
2912 		verbose(env, "callee:\n");
2913 		print_verifier_state(env, callee);
2914 	}
2915 	return 0;
2916 }
2917 
2918 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2919 {
2920 	struct bpf_verifier_state *state = env->cur_state;
2921 	struct bpf_func_state *caller, *callee;
2922 	struct bpf_reg_state *r0;
2923 	int err;
2924 
2925 	callee = state->frame[state->curframe];
2926 	r0 = &callee->regs[BPF_REG_0];
2927 	if (r0->type == PTR_TO_STACK) {
2928 		/* technically it's ok to return caller's stack pointer
2929 		 * (or caller's caller's pointer) back to the caller,
2930 		 * since these pointers are valid. Only current stack
2931 		 * pointer will be invalid as soon as function exits,
2932 		 * but let's be conservative
2933 		 */
2934 		verbose(env, "cannot return stack pointer to the caller\n");
2935 		return -EINVAL;
2936 	}
2937 
2938 	state->curframe--;
2939 	caller = state->frame[state->curframe];
2940 	/* return to the caller whatever r0 had in the callee */
2941 	caller->regs[BPF_REG_0] = *r0;
2942 
2943 	/* Transfer references to the caller */
2944 	err = transfer_reference_state(caller, callee);
2945 	if (err)
2946 		return err;
2947 
2948 	*insn_idx = callee->callsite + 1;
2949 	if (env->log.level) {
2950 		verbose(env, "returning from callee:\n");
2951 		print_verifier_state(env, callee);
2952 		verbose(env, "to caller at %d:\n", *insn_idx);
2953 		print_verifier_state(env, caller);
2954 	}
2955 	/* clear everything in the callee */
2956 	free_func_state(callee);
2957 	state->frame[state->curframe + 1] = NULL;
2958 	return 0;
2959 }
2960 
2961 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2962 				   int func_id,
2963 				   struct bpf_call_arg_meta *meta)
2964 {
2965 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2966 
2967 	if (ret_type != RET_INTEGER ||
2968 	    (func_id != BPF_FUNC_get_stack &&
2969 	     func_id != BPF_FUNC_probe_read_str))
2970 		return;
2971 
2972 	ret_reg->smax_value = meta->msize_smax_value;
2973 	ret_reg->umax_value = meta->msize_umax_value;
2974 	__reg_deduce_bounds(ret_reg);
2975 	__reg_bound_offset(ret_reg);
2976 }
2977 
2978 static int
2979 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2980 		int func_id, int insn_idx)
2981 {
2982 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2983 
2984 	if (func_id != BPF_FUNC_tail_call &&
2985 	    func_id != BPF_FUNC_map_lookup_elem &&
2986 	    func_id != BPF_FUNC_map_update_elem &&
2987 	    func_id != BPF_FUNC_map_delete_elem &&
2988 	    func_id != BPF_FUNC_map_push_elem &&
2989 	    func_id != BPF_FUNC_map_pop_elem &&
2990 	    func_id != BPF_FUNC_map_peek_elem)
2991 		return 0;
2992 
2993 	if (meta->map_ptr == NULL) {
2994 		verbose(env, "kernel subsystem misconfigured verifier\n");
2995 		return -EINVAL;
2996 	}
2997 
2998 	if (!BPF_MAP_PTR(aux->map_state))
2999 		bpf_map_ptr_store(aux, meta->map_ptr,
3000 				  meta->map_ptr->unpriv_array);
3001 	else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
3002 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
3003 				  meta->map_ptr->unpriv_array);
3004 	return 0;
3005 }
3006 
3007 static int check_reference_leak(struct bpf_verifier_env *env)
3008 {
3009 	struct bpf_func_state *state = cur_func(env);
3010 	int i;
3011 
3012 	for (i = 0; i < state->acquired_refs; i++) {
3013 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
3014 			state->refs[i].id, state->refs[i].insn_idx);
3015 	}
3016 	return state->acquired_refs ? -EINVAL : 0;
3017 }
3018 
3019 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
3020 {
3021 	const struct bpf_func_proto *fn = NULL;
3022 	struct bpf_reg_state *regs;
3023 	struct bpf_call_arg_meta meta;
3024 	bool changes_data;
3025 	int i, err;
3026 
3027 	/* find function prototype */
3028 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
3029 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
3030 			func_id);
3031 		return -EINVAL;
3032 	}
3033 
3034 	if (env->ops->get_func_proto)
3035 		fn = env->ops->get_func_proto(func_id, env->prog);
3036 	if (!fn) {
3037 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
3038 			func_id);
3039 		return -EINVAL;
3040 	}
3041 
3042 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
3043 	if (!env->prog->gpl_compatible && fn->gpl_only) {
3044 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
3045 		return -EINVAL;
3046 	}
3047 
3048 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
3049 	changes_data = bpf_helper_changes_pkt_data(fn->func);
3050 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
3051 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
3052 			func_id_name(func_id), func_id);
3053 		return -EINVAL;
3054 	}
3055 
3056 	memset(&meta, 0, sizeof(meta));
3057 	meta.pkt_access = fn->pkt_access;
3058 
3059 	err = check_func_proto(fn, func_id);
3060 	if (err) {
3061 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
3062 			func_id_name(func_id), func_id);
3063 		return err;
3064 	}
3065 
3066 	meta.func_id = func_id;
3067 	/* check args */
3068 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
3069 	if (err)
3070 		return err;
3071 	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
3072 	if (err)
3073 		return err;
3074 	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
3075 	if (err)
3076 		return err;
3077 	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
3078 	if (err)
3079 		return err;
3080 	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
3081 	if (err)
3082 		return err;
3083 
3084 	err = record_func_map(env, &meta, func_id, insn_idx);
3085 	if (err)
3086 		return err;
3087 
3088 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
3089 	 * is inferred from register state.
3090 	 */
3091 	for (i = 0; i < meta.access_size; i++) {
3092 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
3093 				       BPF_WRITE, -1, false);
3094 		if (err)
3095 			return err;
3096 	}
3097 
3098 	if (func_id == BPF_FUNC_tail_call) {
3099 		err = check_reference_leak(env);
3100 		if (err) {
3101 			verbose(env, "tail_call would lead to reference leak\n");
3102 			return err;
3103 		}
3104 	} else if (is_release_function(func_id)) {
3105 		err = release_reference(env, meta.ref_obj_id);
3106 		if (err) {
3107 			verbose(env, "func %s#%d reference has not been acquired before\n",
3108 				func_id_name(func_id), func_id);
3109 			return err;
3110 		}
3111 	}
3112 
3113 	regs = cur_regs(env);
3114 
3115 	/* check that flags argument in get_local_storage(map, flags) is 0,
3116 	 * this is required because get_local_storage() can't return an error.
3117 	 */
3118 	if (func_id == BPF_FUNC_get_local_storage &&
3119 	    !register_is_null(&regs[BPF_REG_2])) {
3120 		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
3121 		return -EINVAL;
3122 	}
3123 
3124 	/* reset caller saved regs */
3125 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
3126 		mark_reg_not_init(env, regs, caller_saved[i]);
3127 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3128 	}
3129 
3130 	/* update return register (already marked as written above) */
3131 	if (fn->ret_type == RET_INTEGER) {
3132 		/* sets type to SCALAR_VALUE */
3133 		mark_reg_unknown(env, regs, BPF_REG_0);
3134 	} else if (fn->ret_type == RET_VOID) {
3135 		regs[BPF_REG_0].type = NOT_INIT;
3136 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
3137 		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3138 		/* There is no offset yet applied, variable or fixed */
3139 		mark_reg_known_zero(env, regs, BPF_REG_0);
3140 		/* remember map_ptr, so that check_map_access()
3141 		 * can check 'value_size' boundary of memory access
3142 		 * to map element returned from bpf_map_lookup_elem()
3143 		 */
3144 		if (meta.map_ptr == NULL) {
3145 			verbose(env,
3146 				"kernel subsystem misconfigured verifier\n");
3147 			return -EINVAL;
3148 		}
3149 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
3150 		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3151 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
3152 			if (map_value_has_spin_lock(meta.map_ptr))
3153 				regs[BPF_REG_0].id = ++env->id_gen;
3154 		} else {
3155 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
3156 			regs[BPF_REG_0].id = ++env->id_gen;
3157 		}
3158 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3159 		mark_reg_known_zero(env, regs, BPF_REG_0);
3160 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
3161 		regs[BPF_REG_0].id = ++env->id_gen;
3162 	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
3163 		mark_reg_known_zero(env, regs, BPF_REG_0);
3164 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
3165 		regs[BPF_REG_0].id = ++env->id_gen;
3166 	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3167 		mark_reg_known_zero(env, regs, BPF_REG_0);
3168 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3169 		regs[BPF_REG_0].id = ++env->id_gen;
3170 	} else {
3171 		verbose(env, "unknown return type %d of func %s#%d\n",
3172 			fn->ret_type, func_id_name(func_id), func_id);
3173 		return -EINVAL;
3174 	}
3175 
3176 	if (is_ptr_cast_function(func_id)) {
3177 		/* For release_reference() */
3178 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
3179 	} else if (is_acquire_function(func_id)) {
3180 		int id = acquire_reference_state(env, insn_idx);
3181 
3182 		if (id < 0)
3183 			return id;
3184 		/* For mark_ptr_or_null_reg() */
3185 		regs[BPF_REG_0].id = id;
3186 		/* For release_reference() */
3187 		regs[BPF_REG_0].ref_obj_id = id;
3188 	}
3189 
3190 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
3191 
3192 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
3193 	if (err)
3194 		return err;
3195 
3196 	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
3197 		const char *err_str;
3198 
3199 #ifdef CONFIG_PERF_EVENTS
3200 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
3201 		err_str = "cannot get callchain buffer for func %s#%d\n";
3202 #else
3203 		err = -ENOTSUPP;
3204 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
3205 #endif
3206 		if (err) {
3207 			verbose(env, err_str, func_id_name(func_id), func_id);
3208 			return err;
3209 		}
3210 
3211 		env->prog->has_callchain_buf = true;
3212 	}
3213 
3214 	if (changes_data)
3215 		clear_all_pkt_pointers(env);
3216 	return 0;
3217 }
3218 
3219 static bool signed_add_overflows(s64 a, s64 b)
3220 {
3221 	/* Do the add in u64, where overflow is well-defined */
3222 	s64 res = (s64)((u64)a + (u64)b);
3223 
3224 	if (b < 0)
3225 		return res > a;
3226 	return res < a;
3227 }
3228 
3229 static bool signed_sub_overflows(s64 a, s64 b)
3230 {
3231 	/* Do the sub in u64, where overflow is well-defined */
3232 	s64 res = (s64)((u64)a - (u64)b);
3233 
3234 	if (b < 0)
3235 		return res < a;
3236 	return res > a;
3237 }
3238 
3239 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
3240 				  const struct bpf_reg_state *reg,
3241 				  enum bpf_reg_type type)
3242 {
3243 	bool known = tnum_is_const(reg->var_off);
3244 	s64 val = reg->var_off.value;
3245 	s64 smin = reg->smin_value;
3246 
3247 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
3248 		verbose(env, "math between %s pointer and %lld is not allowed\n",
3249 			reg_type_str[type], val);
3250 		return false;
3251 	}
3252 
3253 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
3254 		verbose(env, "%s pointer offset %d is not allowed\n",
3255 			reg_type_str[type], reg->off);
3256 		return false;
3257 	}
3258 
3259 	if (smin == S64_MIN) {
3260 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
3261 			reg_type_str[type]);
3262 		return false;
3263 	}
3264 
3265 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
3266 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
3267 			smin, reg_type_str[type]);
3268 		return false;
3269 	}
3270 
3271 	return true;
3272 }
3273 
3274 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
3275 {
3276 	return &env->insn_aux_data[env->insn_idx];
3277 }
3278 
3279 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3280 			      u32 *ptr_limit, u8 opcode, bool off_is_neg)
3281 {
3282 	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
3283 			    (opcode == BPF_SUB && !off_is_neg);
3284 	u32 off;
3285 
3286 	switch (ptr_reg->type) {
3287 	case PTR_TO_STACK:
3288 		off = ptr_reg->off + ptr_reg->var_off.value;
3289 		if (mask_to_left)
3290 			*ptr_limit = MAX_BPF_STACK + off;
3291 		else
3292 			*ptr_limit = -off;
3293 		return 0;
3294 	case PTR_TO_MAP_VALUE:
3295 		if (mask_to_left) {
3296 			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
3297 		} else {
3298 			off = ptr_reg->smin_value + ptr_reg->off;
3299 			*ptr_limit = ptr_reg->map_ptr->value_size - off;
3300 		}
3301 		return 0;
3302 	default:
3303 		return -EINVAL;
3304 	}
3305 }
3306 
3307 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3308 				    const struct bpf_insn *insn)
3309 {
3310 	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3311 }
3312 
3313 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3314 				       u32 alu_state, u32 alu_limit)
3315 {
3316 	/* If we arrived here from different branches with different
3317 	 * state or limits to sanitize, then this won't work.
3318 	 */
3319 	if (aux->alu_state &&
3320 	    (aux->alu_state != alu_state ||
3321 	     aux->alu_limit != alu_limit))
3322 		return -EACCES;
3323 
3324 	/* Corresponding fixup done in fixup_bpf_calls(). */
3325 	aux->alu_state = alu_state;
3326 	aux->alu_limit = alu_limit;
3327 	return 0;
3328 }
3329 
3330 static int sanitize_val_alu(struct bpf_verifier_env *env,
3331 			    struct bpf_insn *insn)
3332 {
3333 	struct bpf_insn_aux_data *aux = cur_aux(env);
3334 
3335 	if (can_skip_alu_sanitation(env, insn))
3336 		return 0;
3337 
3338 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3339 }
3340 
3341 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3342 			    struct bpf_insn *insn,
3343 			    const struct bpf_reg_state *ptr_reg,
3344 			    struct bpf_reg_state *dst_reg,
3345 			    bool off_is_neg)
3346 {
3347 	struct bpf_verifier_state *vstate = env->cur_state;
3348 	struct bpf_insn_aux_data *aux = cur_aux(env);
3349 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
3350 	u8 opcode = BPF_OP(insn->code);
3351 	u32 alu_state, alu_limit;
3352 	struct bpf_reg_state tmp;
3353 	bool ret;
3354 
3355 	if (can_skip_alu_sanitation(env, insn))
3356 		return 0;
3357 
3358 	/* We already marked aux for masking from non-speculative
3359 	 * paths, thus we got here in the first place. We only care
3360 	 * to explore bad access from here.
3361 	 */
3362 	if (vstate->speculative)
3363 		goto do_sim;
3364 
3365 	alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
3366 	alu_state |= ptr_is_dst_reg ?
3367 		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
3368 
3369 	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3370 		return 0;
3371 	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
3372 		return -EACCES;
3373 do_sim:
3374 	/* Simulate and find potential out-of-bounds access under
3375 	 * speculative execution from truncation as a result of
3376 	 * masking when off was not within expected range. If off
3377 	 * sits in dst, then we temporarily need to move ptr there
3378 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
3379 	 * for cases where we use K-based arithmetic in one direction
3380 	 * and truncated reg-based in the other in order to explore
3381 	 * bad access.
3382 	 */
3383 	if (!ptr_is_dst_reg) {
3384 		tmp = *dst_reg;
3385 		*dst_reg = *ptr_reg;
3386 	}
3387 	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
3388 	if (!ptr_is_dst_reg && ret)
3389 		*dst_reg = tmp;
3390 	return !ret ? -EFAULT : 0;
3391 }
3392 
3393 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
3394  * Caller should also handle BPF_MOV case separately.
3395  * If we return -EACCES, caller may want to try again treating pointer as a
3396  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
3397  */
3398 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3399 				   struct bpf_insn *insn,
3400 				   const struct bpf_reg_state *ptr_reg,
3401 				   const struct bpf_reg_state *off_reg)
3402 {
3403 	struct bpf_verifier_state *vstate = env->cur_state;
3404 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3405 	struct bpf_reg_state *regs = state->regs, *dst_reg;
3406 	bool known = tnum_is_const(off_reg->var_off);
3407 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
3408 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3409 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3410 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
3411 	u32 dst = insn->dst_reg, src = insn->src_reg;
3412 	u8 opcode = BPF_OP(insn->code);
3413 	int ret;
3414 
3415 	dst_reg = &regs[dst];
3416 
3417 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3418 	    smin_val > smax_val || umin_val > umax_val) {
3419 		/* Taint dst register if offset had invalid bounds derived from
3420 		 * e.g. dead branches.
3421 		 */
3422 		__mark_reg_unknown(dst_reg);
3423 		return 0;
3424 	}
3425 
3426 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
3427 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
3428 		verbose(env,
3429 			"R%d 32-bit pointer arithmetic prohibited\n",
3430 			dst);
3431 		return -EACCES;
3432 	}
3433 
3434 	switch (ptr_reg->type) {
3435 	case PTR_TO_MAP_VALUE_OR_NULL:
3436 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3437 			dst, reg_type_str[ptr_reg->type]);
3438 		return -EACCES;
3439 	case CONST_PTR_TO_MAP:
3440 	case PTR_TO_PACKET_END:
3441 	case PTR_TO_SOCKET:
3442 	case PTR_TO_SOCKET_OR_NULL:
3443 	case PTR_TO_SOCK_COMMON:
3444 	case PTR_TO_SOCK_COMMON_OR_NULL:
3445 	case PTR_TO_TCP_SOCK:
3446 	case PTR_TO_TCP_SOCK_OR_NULL:
3447 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3448 			dst, reg_type_str[ptr_reg->type]);
3449 		return -EACCES;
3450 	case PTR_TO_MAP_VALUE:
3451 		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
3452 			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
3453 				off_reg == dst_reg ? dst : src);
3454 			return -EACCES;
3455 		}
3456 		/* fall-through */
3457 	default:
3458 		break;
3459 	}
3460 
3461 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3462 	 * The id may be overwritten later if we create a new variable offset.
3463 	 */
3464 	dst_reg->type = ptr_reg->type;
3465 	dst_reg->id = ptr_reg->id;
3466 
3467 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3468 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3469 		return -EINVAL;
3470 
3471 	switch (opcode) {
3472 	case BPF_ADD:
3473 		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3474 		if (ret < 0) {
3475 			verbose(env, "R%d tried to add from different maps or paths\n", dst);
3476 			return ret;
3477 		}
3478 		/* We can take a fixed offset as long as it doesn't overflow
3479 		 * the s32 'off' field
3480 		 */
3481 		if (known && (ptr_reg->off + smin_val ==
3482 			      (s64)(s32)(ptr_reg->off + smin_val))) {
3483 			/* pointer += K.  Accumulate it into fixed offset */
3484 			dst_reg->smin_value = smin_ptr;
3485 			dst_reg->smax_value = smax_ptr;
3486 			dst_reg->umin_value = umin_ptr;
3487 			dst_reg->umax_value = umax_ptr;
3488 			dst_reg->var_off = ptr_reg->var_off;
3489 			dst_reg->off = ptr_reg->off + smin_val;
3490 			dst_reg->raw = ptr_reg->raw;
3491 			break;
3492 		}
3493 		/* A new variable offset is created.  Note that off_reg->off
3494 		 * == 0, since it's a scalar.
3495 		 * dst_reg gets the pointer type and since some positive
3496 		 * integer value was added to the pointer, give it a new 'id'
3497 		 * if it's a PTR_TO_PACKET.
3498 		 * this creates a new 'base' pointer, off_reg (variable) gets
3499 		 * added into the variable offset, and we copy the fixed offset
3500 		 * from ptr_reg.
3501 		 */
3502 		if (signed_add_overflows(smin_ptr, smin_val) ||
3503 		    signed_add_overflows(smax_ptr, smax_val)) {
3504 			dst_reg->smin_value = S64_MIN;
3505 			dst_reg->smax_value = S64_MAX;
3506 		} else {
3507 			dst_reg->smin_value = smin_ptr + smin_val;
3508 			dst_reg->smax_value = smax_ptr + smax_val;
3509 		}
3510 		if (umin_ptr + umin_val < umin_ptr ||
3511 		    umax_ptr + umax_val < umax_ptr) {
3512 			dst_reg->umin_value = 0;
3513 			dst_reg->umax_value = U64_MAX;
3514 		} else {
3515 			dst_reg->umin_value = umin_ptr + umin_val;
3516 			dst_reg->umax_value = umax_ptr + umax_val;
3517 		}
3518 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3519 		dst_reg->off = ptr_reg->off;
3520 		dst_reg->raw = ptr_reg->raw;
3521 		if (reg_is_pkt_pointer(ptr_reg)) {
3522 			dst_reg->id = ++env->id_gen;
3523 			/* something was added to pkt_ptr, set range to zero */
3524 			dst_reg->raw = 0;
3525 		}
3526 		break;
3527 	case BPF_SUB:
3528 		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3529 		if (ret < 0) {
3530 			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
3531 			return ret;
3532 		}
3533 		if (dst_reg == off_reg) {
3534 			/* scalar -= pointer.  Creates an unknown scalar */
3535 			verbose(env, "R%d tried to subtract pointer from scalar\n",
3536 				dst);
3537 			return -EACCES;
3538 		}
3539 		/* We don't allow subtraction from FP, because (according to
3540 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
3541 		 * be able to deal with it.
3542 		 */
3543 		if (ptr_reg->type == PTR_TO_STACK) {
3544 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
3545 				dst);
3546 			return -EACCES;
3547 		}
3548 		if (known && (ptr_reg->off - smin_val ==
3549 			      (s64)(s32)(ptr_reg->off - smin_val))) {
3550 			/* pointer -= K.  Subtract it from fixed offset */
3551 			dst_reg->smin_value = smin_ptr;
3552 			dst_reg->smax_value = smax_ptr;
3553 			dst_reg->umin_value = umin_ptr;
3554 			dst_reg->umax_value = umax_ptr;
3555 			dst_reg->var_off = ptr_reg->var_off;
3556 			dst_reg->id = ptr_reg->id;
3557 			dst_reg->off = ptr_reg->off - smin_val;
3558 			dst_reg->raw = ptr_reg->raw;
3559 			break;
3560 		}
3561 		/* A new variable offset is created.  If the subtrahend is known
3562 		 * nonnegative, then any reg->range we had before is still good.
3563 		 */
3564 		if (signed_sub_overflows(smin_ptr, smax_val) ||
3565 		    signed_sub_overflows(smax_ptr, smin_val)) {
3566 			/* Overflow possible, we know nothing */
3567 			dst_reg->smin_value = S64_MIN;
3568 			dst_reg->smax_value = S64_MAX;
3569 		} else {
3570 			dst_reg->smin_value = smin_ptr - smax_val;
3571 			dst_reg->smax_value = smax_ptr - smin_val;
3572 		}
3573 		if (umin_ptr < umax_val) {
3574 			/* Overflow possible, we know nothing */
3575 			dst_reg->umin_value = 0;
3576 			dst_reg->umax_value = U64_MAX;
3577 		} else {
3578 			/* Cannot overflow (as long as bounds are consistent) */
3579 			dst_reg->umin_value = umin_ptr - umax_val;
3580 			dst_reg->umax_value = umax_ptr - umin_val;
3581 		}
3582 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3583 		dst_reg->off = ptr_reg->off;
3584 		dst_reg->raw = ptr_reg->raw;
3585 		if (reg_is_pkt_pointer(ptr_reg)) {
3586 			dst_reg->id = ++env->id_gen;
3587 			/* something was added to pkt_ptr, set range to zero */
3588 			if (smin_val < 0)
3589 				dst_reg->raw = 0;
3590 		}
3591 		break;
3592 	case BPF_AND:
3593 	case BPF_OR:
3594 	case BPF_XOR:
3595 		/* bitwise ops on pointers are troublesome, prohibit. */
3596 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3597 			dst, bpf_alu_string[opcode >> 4]);
3598 		return -EACCES;
3599 	default:
3600 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
3601 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3602 			dst, bpf_alu_string[opcode >> 4]);
3603 		return -EACCES;
3604 	}
3605 
3606 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3607 		return -EINVAL;
3608 
3609 	__update_reg_bounds(dst_reg);
3610 	__reg_deduce_bounds(dst_reg);
3611 	__reg_bound_offset(dst_reg);
3612 
3613 	/* For unprivileged we require that resulting offset must be in bounds
3614 	 * in order to be able to sanitize access later on.
3615 	 */
3616 	if (!env->allow_ptr_leaks) {
3617 		if (dst_reg->type == PTR_TO_MAP_VALUE &&
3618 		    check_map_access(env, dst, dst_reg->off, 1, false)) {
3619 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
3620 				"prohibited for !root\n", dst);
3621 			return -EACCES;
3622 		} else if (dst_reg->type == PTR_TO_STACK &&
3623 			   check_stack_access(env, dst_reg, dst_reg->off +
3624 					      dst_reg->var_off.value, 1)) {
3625 			verbose(env, "R%d stack pointer arithmetic goes out of range, "
3626 				"prohibited for !root\n", dst);
3627 			return -EACCES;
3628 		}
3629 	}
3630 
3631 	return 0;
3632 }
3633 
3634 /* WARNING: This function does calculations on 64-bit values, but the actual
3635  * execution may occur on 32-bit values. Therefore, things like bitshifts
3636  * need extra checks in the 32-bit case.
3637  */
3638 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3639 				      struct bpf_insn *insn,
3640 				      struct bpf_reg_state *dst_reg,
3641 				      struct bpf_reg_state src_reg)
3642 {
3643 	struct bpf_reg_state *regs = cur_regs(env);
3644 	u8 opcode = BPF_OP(insn->code);
3645 	bool src_known, dst_known;
3646 	s64 smin_val, smax_val;
3647 	u64 umin_val, umax_val;
3648 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3649 	u32 dst = insn->dst_reg;
3650 	int ret;
3651 
3652 	if (insn_bitness == 32) {
3653 		/* Relevant for 32-bit RSH: Information can propagate towards
3654 		 * LSB, so it isn't sufficient to only truncate the output to
3655 		 * 32 bits.
3656 		 */
3657 		coerce_reg_to_size(dst_reg, 4);
3658 		coerce_reg_to_size(&src_reg, 4);
3659 	}
3660 
3661 	smin_val = src_reg.smin_value;
3662 	smax_val = src_reg.smax_value;
3663 	umin_val = src_reg.umin_value;
3664 	umax_val = src_reg.umax_value;
3665 	src_known = tnum_is_const(src_reg.var_off);
3666 	dst_known = tnum_is_const(dst_reg->var_off);
3667 
3668 	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3669 	    smin_val > smax_val || umin_val > umax_val) {
3670 		/* Taint dst register if offset had invalid bounds derived from
3671 		 * e.g. dead branches.
3672 		 */
3673 		__mark_reg_unknown(dst_reg);
3674 		return 0;
3675 	}
3676 
3677 	if (!src_known &&
3678 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3679 		__mark_reg_unknown(dst_reg);
3680 		return 0;
3681 	}
3682 
3683 	switch (opcode) {
3684 	case BPF_ADD:
3685 		ret = sanitize_val_alu(env, insn);
3686 		if (ret < 0) {
3687 			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3688 			return ret;
3689 		}
3690 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3691 		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
3692 			dst_reg->smin_value = S64_MIN;
3693 			dst_reg->smax_value = S64_MAX;
3694 		} else {
3695 			dst_reg->smin_value += smin_val;
3696 			dst_reg->smax_value += smax_val;
3697 		}
3698 		if (dst_reg->umin_value + umin_val < umin_val ||
3699 		    dst_reg->umax_value + umax_val < umax_val) {
3700 			dst_reg->umin_value = 0;
3701 			dst_reg->umax_value = U64_MAX;
3702 		} else {
3703 			dst_reg->umin_value += umin_val;
3704 			dst_reg->umax_value += umax_val;
3705 		}
3706 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
3707 		break;
3708 	case BPF_SUB:
3709 		ret = sanitize_val_alu(env, insn);
3710 		if (ret < 0) {
3711 			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3712 			return ret;
3713 		}
3714 		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3715 		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3716 			/* Overflow possible, we know nothing */
3717 			dst_reg->smin_value = S64_MIN;
3718 			dst_reg->smax_value = S64_MAX;
3719 		} else {
3720 			dst_reg->smin_value -= smax_val;
3721 			dst_reg->smax_value -= smin_val;
3722 		}
3723 		if (dst_reg->umin_value < umax_val) {
3724 			/* Overflow possible, we know nothing */
3725 			dst_reg->umin_value = 0;
3726 			dst_reg->umax_value = U64_MAX;
3727 		} else {
3728 			/* Cannot overflow (as long as bounds are consistent) */
3729 			dst_reg->umin_value -= umax_val;
3730 			dst_reg->umax_value -= umin_val;
3731 		}
3732 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
3733 		break;
3734 	case BPF_MUL:
3735 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3736 		if (smin_val < 0 || dst_reg->smin_value < 0) {
3737 			/* Ain't nobody got time to multiply that sign */
3738 			__mark_reg_unbounded(dst_reg);
3739 			__update_reg_bounds(dst_reg);
3740 			break;
3741 		}
3742 		/* Both values are positive, so we can work with unsigned and
3743 		 * copy the result to signed (unless it exceeds S64_MAX).
3744 		 */
3745 		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3746 			/* Potential overflow, we know nothing */
3747 			__mark_reg_unbounded(dst_reg);
3748 			/* (except what we can learn from the var_off) */
3749 			__update_reg_bounds(dst_reg);
3750 			break;
3751 		}
3752 		dst_reg->umin_value *= umin_val;
3753 		dst_reg->umax_value *= umax_val;
3754 		if (dst_reg->umax_value > S64_MAX) {
3755 			/* Overflow possible, we know nothing */
3756 			dst_reg->smin_value = S64_MIN;
3757 			dst_reg->smax_value = S64_MAX;
3758 		} else {
3759 			dst_reg->smin_value = dst_reg->umin_value;
3760 			dst_reg->smax_value = dst_reg->umax_value;
3761 		}
3762 		break;
3763 	case BPF_AND:
3764 		if (src_known && dst_known) {
3765 			__mark_reg_known(dst_reg, dst_reg->var_off.value &
3766 						  src_reg.var_off.value);
3767 			break;
3768 		}
3769 		/* We get our minimum from the var_off, since that's inherently
3770 		 * bitwise.  Our maximum is the minimum of the operands' maxima.
3771 		 */
3772 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
3773 		dst_reg->umin_value = dst_reg->var_off.value;
3774 		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3775 		if (dst_reg->smin_value < 0 || smin_val < 0) {
3776 			/* Lose signed bounds when ANDing negative numbers,
3777 			 * ain't nobody got time for that.
3778 			 */
3779 			dst_reg->smin_value = S64_MIN;
3780 			dst_reg->smax_value = S64_MAX;
3781 		} else {
3782 			/* ANDing two positives gives a positive, so safe to
3783 			 * cast result into s64.
3784 			 */
3785 			dst_reg->smin_value = dst_reg->umin_value;
3786 			dst_reg->smax_value = dst_reg->umax_value;
3787 		}
3788 		/* We may learn something more from the var_off */
3789 		__update_reg_bounds(dst_reg);
3790 		break;
3791 	case BPF_OR:
3792 		if (src_known && dst_known) {
3793 			__mark_reg_known(dst_reg, dst_reg->var_off.value |
3794 						  src_reg.var_off.value);
3795 			break;
3796 		}
3797 		/* We get our maximum from the var_off, and our minimum is the
3798 		 * maximum of the operands' minima
3799 		 */
3800 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
3801 		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3802 		dst_reg->umax_value = dst_reg->var_off.value |
3803 				      dst_reg->var_off.mask;
3804 		if (dst_reg->smin_value < 0 || smin_val < 0) {
3805 			/* Lose signed bounds when ORing negative numbers,
3806 			 * ain't nobody got time for that.
3807 			 */
3808 			dst_reg->smin_value = S64_MIN;
3809 			dst_reg->smax_value = S64_MAX;
3810 		} else {
3811 			/* ORing two positives gives a positive, so safe to
3812 			 * cast result into s64.
3813 			 */
3814 			dst_reg->smin_value = dst_reg->umin_value;
3815 			dst_reg->smax_value = dst_reg->umax_value;
3816 		}
3817 		/* We may learn something more from the var_off */
3818 		__update_reg_bounds(dst_reg);
3819 		break;
3820 	case BPF_LSH:
3821 		if (umax_val >= insn_bitness) {
3822 			/* Shifts greater than 31 or 63 are undefined.
3823 			 * This includes shifts by a negative number.
3824 			 */
3825 			mark_reg_unknown(env, regs, insn->dst_reg);
3826 			break;
3827 		}
3828 		/* We lose all sign bit information (except what we can pick
3829 		 * up from var_off)
3830 		 */
3831 		dst_reg->smin_value = S64_MIN;
3832 		dst_reg->smax_value = S64_MAX;
3833 		/* If we might shift our top bit out, then we know nothing */
3834 		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3835 			dst_reg->umin_value = 0;
3836 			dst_reg->umax_value = U64_MAX;
3837 		} else {
3838 			dst_reg->umin_value <<= umin_val;
3839 			dst_reg->umax_value <<= umax_val;
3840 		}
3841 		dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
3842 		/* We may learn something more from the var_off */
3843 		__update_reg_bounds(dst_reg);
3844 		break;
3845 	case BPF_RSH:
3846 		if (umax_val >= insn_bitness) {
3847 			/* Shifts greater than 31 or 63 are undefined.
3848 			 * This includes shifts by a negative number.
3849 			 */
3850 			mark_reg_unknown(env, regs, insn->dst_reg);
3851 			break;
3852 		}
3853 		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
3854 		 * be negative, then either:
3855 		 * 1) src_reg might be zero, so the sign bit of the result is
3856 		 *    unknown, so we lose our signed bounds
3857 		 * 2) it's known negative, thus the unsigned bounds capture the
3858 		 *    signed bounds
3859 		 * 3) the signed bounds cross zero, so they tell us nothing
3860 		 *    about the result
3861 		 * If the value in dst_reg is known nonnegative, then again the
3862 		 * unsigned bounts capture the signed bounds.
3863 		 * Thus, in all cases it suffices to blow away our signed bounds
3864 		 * and rely on inferring new ones from the unsigned bounds and
3865 		 * var_off of the result.
3866 		 */
3867 		dst_reg->smin_value = S64_MIN;
3868 		dst_reg->smax_value = S64_MAX;
3869 		dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
3870 		dst_reg->umin_value >>= umax_val;
3871 		dst_reg->umax_value >>= umin_val;
3872 		/* We may learn something more from the var_off */
3873 		__update_reg_bounds(dst_reg);
3874 		break;
3875 	case BPF_ARSH:
3876 		if (umax_val >= insn_bitness) {
3877 			/* Shifts greater than 31 or 63 are undefined.
3878 			 * This includes shifts by a negative number.
3879 			 */
3880 			mark_reg_unknown(env, regs, insn->dst_reg);
3881 			break;
3882 		}
3883 
3884 		/* Upon reaching here, src_known is true and
3885 		 * umax_val is equal to umin_val.
3886 		 */
3887 		dst_reg->smin_value >>= umin_val;
3888 		dst_reg->smax_value >>= umin_val;
3889 		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3890 
3891 		/* blow away the dst_reg umin_value/umax_value and rely on
3892 		 * dst_reg var_off to refine the result.
3893 		 */
3894 		dst_reg->umin_value = 0;
3895 		dst_reg->umax_value = U64_MAX;
3896 		__update_reg_bounds(dst_reg);
3897 		break;
3898 	default:
3899 		mark_reg_unknown(env, regs, insn->dst_reg);
3900 		break;
3901 	}
3902 
3903 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
3904 		/* 32-bit ALU ops are (32,32)->32 */
3905 		coerce_reg_to_size(dst_reg, 4);
3906 	}
3907 
3908 	__reg_deduce_bounds(dst_reg);
3909 	__reg_bound_offset(dst_reg);
3910 	return 0;
3911 }
3912 
3913 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3914  * and var_off.
3915  */
3916 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3917 				   struct bpf_insn *insn)
3918 {
3919 	struct bpf_verifier_state *vstate = env->cur_state;
3920 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3921 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
3922 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3923 	u8 opcode = BPF_OP(insn->code);
3924 
3925 	dst_reg = &regs[insn->dst_reg];
3926 	src_reg = NULL;
3927 	if (dst_reg->type != SCALAR_VALUE)
3928 		ptr_reg = dst_reg;
3929 	if (BPF_SRC(insn->code) == BPF_X) {
3930 		src_reg = &regs[insn->src_reg];
3931 		if (src_reg->type != SCALAR_VALUE) {
3932 			if (dst_reg->type != SCALAR_VALUE) {
3933 				/* Combining two pointers by any ALU op yields
3934 				 * an arbitrary scalar. Disallow all math except
3935 				 * pointer subtraction
3936 				 */
3937 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
3938 					mark_reg_unknown(env, regs, insn->dst_reg);
3939 					return 0;
3940 				}
3941 				verbose(env, "R%d pointer %s pointer prohibited\n",
3942 					insn->dst_reg,
3943 					bpf_alu_string[opcode >> 4]);
3944 				return -EACCES;
3945 			} else {
3946 				/* scalar += pointer
3947 				 * This is legal, but we have to reverse our
3948 				 * src/dest handling in computing the range
3949 				 */
3950 				return adjust_ptr_min_max_vals(env, insn,
3951 							       src_reg, dst_reg);
3952 			}
3953 		} else if (ptr_reg) {
3954 			/* pointer += scalar */
3955 			return adjust_ptr_min_max_vals(env, insn,
3956 						       dst_reg, src_reg);
3957 		}
3958 	} else {
3959 		/* Pretend the src is a reg with a known value, since we only
3960 		 * need to be able to read from this state.
3961 		 */
3962 		off_reg.type = SCALAR_VALUE;
3963 		__mark_reg_known(&off_reg, insn->imm);
3964 		src_reg = &off_reg;
3965 		if (ptr_reg) /* pointer += K */
3966 			return adjust_ptr_min_max_vals(env, insn,
3967 						       ptr_reg, src_reg);
3968 	}
3969 
3970 	/* Got here implies adding two SCALAR_VALUEs */
3971 	if (WARN_ON_ONCE(ptr_reg)) {
3972 		print_verifier_state(env, state);
3973 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
3974 		return -EINVAL;
3975 	}
3976 	if (WARN_ON(!src_reg)) {
3977 		print_verifier_state(env, state);
3978 		verbose(env, "verifier internal error: no src_reg\n");
3979 		return -EINVAL;
3980 	}
3981 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
3982 }
3983 
3984 /* check validity of 32-bit and 64-bit arithmetic operations */
3985 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
3986 {
3987 	struct bpf_reg_state *regs = cur_regs(env);
3988 	u8 opcode = BPF_OP(insn->code);
3989 	int err;
3990 
3991 	if (opcode == BPF_END || opcode == BPF_NEG) {
3992 		if (opcode == BPF_NEG) {
3993 			if (BPF_SRC(insn->code) != 0 ||
3994 			    insn->src_reg != BPF_REG_0 ||
3995 			    insn->off != 0 || insn->imm != 0) {
3996 				verbose(env, "BPF_NEG uses reserved fields\n");
3997 				return -EINVAL;
3998 			}
3999 		} else {
4000 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
4001 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
4002 			    BPF_CLASS(insn->code) == BPF_ALU64) {
4003 				verbose(env, "BPF_END uses reserved fields\n");
4004 				return -EINVAL;
4005 			}
4006 		}
4007 
4008 		/* check src operand */
4009 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4010 		if (err)
4011 			return err;
4012 
4013 		if (is_pointer_value(env, insn->dst_reg)) {
4014 			verbose(env, "R%d pointer arithmetic prohibited\n",
4015 				insn->dst_reg);
4016 			return -EACCES;
4017 		}
4018 
4019 		/* check dest operand */
4020 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
4021 		if (err)
4022 			return err;
4023 
4024 	} else if (opcode == BPF_MOV) {
4025 
4026 		if (BPF_SRC(insn->code) == BPF_X) {
4027 			if (insn->imm != 0 || insn->off != 0) {
4028 				verbose(env, "BPF_MOV uses reserved fields\n");
4029 				return -EINVAL;
4030 			}
4031 
4032 			/* check src operand */
4033 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4034 			if (err)
4035 				return err;
4036 		} else {
4037 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
4038 				verbose(env, "BPF_MOV uses reserved fields\n");
4039 				return -EINVAL;
4040 			}
4041 		}
4042 
4043 		/* check dest operand, mark as required later */
4044 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4045 		if (err)
4046 			return err;
4047 
4048 		if (BPF_SRC(insn->code) == BPF_X) {
4049 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
4050 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
4051 
4052 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
4053 				/* case: R1 = R2
4054 				 * copy register state to dest reg
4055 				 */
4056 				*dst_reg = *src_reg;
4057 				dst_reg->live |= REG_LIVE_WRITTEN;
4058 			} else {
4059 				/* R1 = (u32) R2 */
4060 				if (is_pointer_value(env, insn->src_reg)) {
4061 					verbose(env,
4062 						"R%d partial copy of pointer\n",
4063 						insn->src_reg);
4064 					return -EACCES;
4065 				} else if (src_reg->type == SCALAR_VALUE) {
4066 					*dst_reg = *src_reg;
4067 					dst_reg->live |= REG_LIVE_WRITTEN;
4068 				} else {
4069 					mark_reg_unknown(env, regs,
4070 							 insn->dst_reg);
4071 				}
4072 				coerce_reg_to_size(dst_reg, 4);
4073 			}
4074 		} else {
4075 			/* case: R = imm
4076 			 * remember the value we stored into this reg
4077 			 */
4078 			/* clear any state __mark_reg_known doesn't set */
4079 			mark_reg_unknown(env, regs, insn->dst_reg);
4080 			regs[insn->dst_reg].type = SCALAR_VALUE;
4081 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
4082 				__mark_reg_known(regs + insn->dst_reg,
4083 						 insn->imm);
4084 			} else {
4085 				__mark_reg_known(regs + insn->dst_reg,
4086 						 (u32)insn->imm);
4087 			}
4088 		}
4089 
4090 	} else if (opcode > BPF_END) {
4091 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
4092 		return -EINVAL;
4093 
4094 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
4095 
4096 		if (BPF_SRC(insn->code) == BPF_X) {
4097 			if (insn->imm != 0 || insn->off != 0) {
4098 				verbose(env, "BPF_ALU uses reserved fields\n");
4099 				return -EINVAL;
4100 			}
4101 			/* check src1 operand */
4102 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4103 			if (err)
4104 				return err;
4105 		} else {
4106 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
4107 				verbose(env, "BPF_ALU uses reserved fields\n");
4108 				return -EINVAL;
4109 			}
4110 		}
4111 
4112 		/* check src2 operand */
4113 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4114 		if (err)
4115 			return err;
4116 
4117 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
4118 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
4119 			verbose(env, "div by zero\n");
4120 			return -EINVAL;
4121 		}
4122 
4123 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4124 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4125 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4126 
4127 			if (insn->imm < 0 || insn->imm >= size) {
4128 				verbose(env, "invalid shift %d\n", insn->imm);
4129 				return -EINVAL;
4130 			}
4131 		}
4132 
4133 		/* check dest operand */
4134 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4135 		if (err)
4136 			return err;
4137 
4138 		return adjust_reg_min_max_vals(env, insn);
4139 	}
4140 
4141 	return 0;
4142 }
4143 
4144 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
4145 				   struct bpf_reg_state *dst_reg,
4146 				   enum bpf_reg_type type,
4147 				   bool range_right_open)
4148 {
4149 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4150 	struct bpf_reg_state *regs = state->regs, *reg;
4151 	u16 new_range;
4152 	int i, j;
4153 
4154 	if (dst_reg->off < 0 ||
4155 	    (dst_reg->off == 0 && range_right_open))
4156 		/* This doesn't give us any range */
4157 		return;
4158 
4159 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
4160 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
4161 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
4162 		 * than pkt_end, but that's because it's also less than pkt.
4163 		 */
4164 		return;
4165 
4166 	new_range = dst_reg->off;
4167 	if (range_right_open)
4168 		new_range--;
4169 
4170 	/* Examples for register markings:
4171 	 *
4172 	 * pkt_data in dst register:
4173 	 *
4174 	 *   r2 = r3;
4175 	 *   r2 += 8;
4176 	 *   if (r2 > pkt_end) goto <handle exception>
4177 	 *   <access okay>
4178 	 *
4179 	 *   r2 = r3;
4180 	 *   r2 += 8;
4181 	 *   if (r2 < pkt_end) goto <access okay>
4182 	 *   <handle exception>
4183 	 *
4184 	 *   Where:
4185 	 *     r2 == dst_reg, pkt_end == src_reg
4186 	 *     r2=pkt(id=n,off=8,r=0)
4187 	 *     r3=pkt(id=n,off=0,r=0)
4188 	 *
4189 	 * pkt_data in src register:
4190 	 *
4191 	 *   r2 = r3;
4192 	 *   r2 += 8;
4193 	 *   if (pkt_end >= r2) goto <access okay>
4194 	 *   <handle exception>
4195 	 *
4196 	 *   r2 = r3;
4197 	 *   r2 += 8;
4198 	 *   if (pkt_end <= r2) goto <handle exception>
4199 	 *   <access okay>
4200 	 *
4201 	 *   Where:
4202 	 *     pkt_end == dst_reg, r2 == src_reg
4203 	 *     r2=pkt(id=n,off=8,r=0)
4204 	 *     r3=pkt(id=n,off=0,r=0)
4205 	 *
4206 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
4207 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
4208 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
4209 	 * the check.
4210 	 */
4211 
4212 	/* If our ids match, then we must have the same max_value.  And we
4213 	 * don't care about the other reg's fixed offset, since if it's too big
4214 	 * the range won't allow anything.
4215 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
4216 	 */
4217 	for (i = 0; i < MAX_BPF_REG; i++)
4218 		if (regs[i].type == type && regs[i].id == dst_reg->id)
4219 			/* keep the maximum range already checked */
4220 			regs[i].range = max(regs[i].range, new_range);
4221 
4222 	for (j = 0; j <= vstate->curframe; j++) {
4223 		state = vstate->frame[j];
4224 		bpf_for_each_spilled_reg(i, state, reg) {
4225 			if (!reg)
4226 				continue;
4227 			if (reg->type == type && reg->id == dst_reg->id)
4228 				reg->range = max(reg->range, new_range);
4229 		}
4230 	}
4231 }
4232 
4233 /* compute branch direction of the expression "if (reg opcode val) goto target;"
4234  * and return:
4235  *  1 - branch will be taken and "goto target" will be executed
4236  *  0 - branch will not be taken and fall-through to next insn
4237  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4238  */
4239 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4240 			   bool is_jmp32)
4241 {
4242 	struct bpf_reg_state reg_lo;
4243 	s64 sval;
4244 
4245 	if (__is_pointer_value(false, reg))
4246 		return -1;
4247 
4248 	if (is_jmp32) {
4249 		reg_lo = *reg;
4250 		reg = &reg_lo;
4251 		/* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4252 		 * could truncate high bits and update umin/umax according to
4253 		 * information of low bits.
4254 		 */
4255 		coerce_reg_to_size(reg, 4);
4256 		/* smin/smax need special handling. For example, after coerce,
4257 		 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4258 		 * used as operand to JMP32. It is a negative number from s32's
4259 		 * point of view, while it is a positive number when seen as
4260 		 * s64. The smin/smax are kept as s64, therefore, when used with
4261 		 * JMP32, they need to be transformed into s32, then sign
4262 		 * extended back to s64.
4263 		 *
4264 		 * Also, smin/smax were copied from umin/umax. If umin/umax has
4265 		 * different sign bit, then min/max relationship doesn't
4266 		 * maintain after casting into s32, for this case, set smin/smax
4267 		 * to safest range.
4268 		 */
4269 		if ((reg->umax_value ^ reg->umin_value) &
4270 		    (1ULL << 31)) {
4271 			reg->smin_value = S32_MIN;
4272 			reg->smax_value = S32_MAX;
4273 		}
4274 		reg->smin_value = (s64)(s32)reg->smin_value;
4275 		reg->smax_value = (s64)(s32)reg->smax_value;
4276 
4277 		val = (u32)val;
4278 		sval = (s64)(s32)val;
4279 	} else {
4280 		sval = (s64)val;
4281 	}
4282 
4283 	switch (opcode) {
4284 	case BPF_JEQ:
4285 		if (tnum_is_const(reg->var_off))
4286 			return !!tnum_equals_const(reg->var_off, val);
4287 		break;
4288 	case BPF_JNE:
4289 		if (tnum_is_const(reg->var_off))
4290 			return !tnum_equals_const(reg->var_off, val);
4291 		break;
4292 	case BPF_JSET:
4293 		if ((~reg->var_off.mask & reg->var_off.value) & val)
4294 			return 1;
4295 		if (!((reg->var_off.mask | reg->var_off.value) & val))
4296 			return 0;
4297 		break;
4298 	case BPF_JGT:
4299 		if (reg->umin_value > val)
4300 			return 1;
4301 		else if (reg->umax_value <= val)
4302 			return 0;
4303 		break;
4304 	case BPF_JSGT:
4305 		if (reg->smin_value > sval)
4306 			return 1;
4307 		else if (reg->smax_value < sval)
4308 			return 0;
4309 		break;
4310 	case BPF_JLT:
4311 		if (reg->umax_value < val)
4312 			return 1;
4313 		else if (reg->umin_value >= val)
4314 			return 0;
4315 		break;
4316 	case BPF_JSLT:
4317 		if (reg->smax_value < sval)
4318 			return 1;
4319 		else if (reg->smin_value >= sval)
4320 			return 0;
4321 		break;
4322 	case BPF_JGE:
4323 		if (reg->umin_value >= val)
4324 			return 1;
4325 		else if (reg->umax_value < val)
4326 			return 0;
4327 		break;
4328 	case BPF_JSGE:
4329 		if (reg->smin_value >= sval)
4330 			return 1;
4331 		else if (reg->smax_value < sval)
4332 			return 0;
4333 		break;
4334 	case BPF_JLE:
4335 		if (reg->umax_value <= val)
4336 			return 1;
4337 		else if (reg->umin_value > val)
4338 			return 0;
4339 		break;
4340 	case BPF_JSLE:
4341 		if (reg->smax_value <= sval)
4342 			return 1;
4343 		else if (reg->smin_value > sval)
4344 			return 0;
4345 		break;
4346 	}
4347 
4348 	return -1;
4349 }
4350 
4351 /* Generate min value of the high 32-bit from TNUM info. */
4352 static u64 gen_hi_min(struct tnum var)
4353 {
4354 	return var.value & ~0xffffffffULL;
4355 }
4356 
4357 /* Generate max value of the high 32-bit from TNUM info. */
4358 static u64 gen_hi_max(struct tnum var)
4359 {
4360 	return (var.value | var.mask) & ~0xffffffffULL;
4361 }
4362 
4363 /* Return true if VAL is compared with a s64 sign extended from s32, and they
4364  * are with the same signedness.
4365  */
4366 static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4367 {
4368 	return ((s32)sval >= 0 &&
4369 		reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4370 	       ((s32)sval < 0 &&
4371 		reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4372 }
4373 
4374 /* Adjusts the register min/max values in the case that the dst_reg is the
4375  * variable register that we are working on, and src_reg is a constant or we're
4376  * simply doing a BPF_K check.
4377  * In JEQ/JNE cases we also adjust the var_off values.
4378  */
4379 static void reg_set_min_max(struct bpf_reg_state *true_reg,
4380 			    struct bpf_reg_state *false_reg, u64 val,
4381 			    u8 opcode, bool is_jmp32)
4382 {
4383 	s64 sval;
4384 
4385 	/* If the dst_reg is a pointer, we can't learn anything about its
4386 	 * variable offset from the compare (unless src_reg were a pointer into
4387 	 * the same object, but we don't bother with that.
4388 	 * Since false_reg and true_reg have the same type by construction, we
4389 	 * only need to check one of them for pointerness.
4390 	 */
4391 	if (__is_pointer_value(false, false_reg))
4392 		return;
4393 
4394 	val = is_jmp32 ? (u32)val : val;
4395 	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4396 
4397 	switch (opcode) {
4398 	case BPF_JEQ:
4399 	case BPF_JNE:
4400 	{
4401 		struct bpf_reg_state *reg =
4402 			opcode == BPF_JEQ ? true_reg : false_reg;
4403 
4404 		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4405 		 * if it is true we know the value for sure. Likewise for
4406 		 * BPF_JNE.
4407 		 */
4408 		if (is_jmp32) {
4409 			u64 old_v = reg->var_off.value;
4410 			u64 hi_mask = ~0xffffffffULL;
4411 
4412 			reg->var_off.value = (old_v & hi_mask) | val;
4413 			reg->var_off.mask &= hi_mask;
4414 		} else {
4415 			__mark_reg_known(reg, val);
4416 		}
4417 		break;
4418 	}
4419 	case BPF_JSET:
4420 		false_reg->var_off = tnum_and(false_reg->var_off,
4421 					      tnum_const(~val));
4422 		if (is_power_of_2(val))
4423 			true_reg->var_off = tnum_or(true_reg->var_off,
4424 						    tnum_const(val));
4425 		break;
4426 	case BPF_JGE:
4427 	case BPF_JGT:
4428 	{
4429 		u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
4430 		u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4431 
4432 		if (is_jmp32) {
4433 			false_umax += gen_hi_max(false_reg->var_off);
4434 			true_umin += gen_hi_min(true_reg->var_off);
4435 		}
4436 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
4437 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
4438 		break;
4439 	}
4440 	case BPF_JSGE:
4441 	case BPF_JSGT:
4442 	{
4443 		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
4444 		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4445 
4446 		/* If the full s64 was not sign-extended from s32 then don't
4447 		 * deduct further info.
4448 		 */
4449 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4450 			break;
4451 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
4452 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
4453 		break;
4454 	}
4455 	case BPF_JLE:
4456 	case BPF_JLT:
4457 	{
4458 		u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
4459 		u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4460 
4461 		if (is_jmp32) {
4462 			false_umin += gen_hi_min(false_reg->var_off);
4463 			true_umax += gen_hi_max(true_reg->var_off);
4464 		}
4465 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
4466 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
4467 		break;
4468 	}
4469 	case BPF_JSLE:
4470 	case BPF_JSLT:
4471 	{
4472 		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
4473 		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4474 
4475 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4476 			break;
4477 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
4478 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
4479 		break;
4480 	}
4481 	default:
4482 		break;
4483 	}
4484 
4485 	__reg_deduce_bounds(false_reg);
4486 	__reg_deduce_bounds(true_reg);
4487 	/* We might have learned some bits from the bounds. */
4488 	__reg_bound_offset(false_reg);
4489 	__reg_bound_offset(true_reg);
4490 	/* Intersecting with the old var_off might have improved our bounds
4491 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4492 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
4493 	 */
4494 	__update_reg_bounds(false_reg);
4495 	__update_reg_bounds(true_reg);
4496 }
4497 
4498 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
4499  * the variable reg.
4500  */
4501 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4502 				struct bpf_reg_state *false_reg, u64 val,
4503 				u8 opcode, bool is_jmp32)
4504 {
4505 	s64 sval;
4506 
4507 	if (__is_pointer_value(false, false_reg))
4508 		return;
4509 
4510 	val = is_jmp32 ? (u32)val : val;
4511 	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4512 
4513 	switch (opcode) {
4514 	case BPF_JEQ:
4515 	case BPF_JNE:
4516 	{
4517 		struct bpf_reg_state *reg =
4518 			opcode == BPF_JEQ ? true_reg : false_reg;
4519 
4520 		if (is_jmp32) {
4521 			u64 old_v = reg->var_off.value;
4522 			u64 hi_mask = ~0xffffffffULL;
4523 
4524 			reg->var_off.value = (old_v & hi_mask) | val;
4525 			reg->var_off.mask &= hi_mask;
4526 		} else {
4527 			__mark_reg_known(reg, val);
4528 		}
4529 		break;
4530 	}
4531 	case BPF_JSET:
4532 		false_reg->var_off = tnum_and(false_reg->var_off,
4533 					      tnum_const(~val));
4534 		if (is_power_of_2(val))
4535 			true_reg->var_off = tnum_or(true_reg->var_off,
4536 						    tnum_const(val));
4537 		break;
4538 	case BPF_JGE:
4539 	case BPF_JGT:
4540 	{
4541 		u64 false_umin = opcode == BPF_JGT ? val    : val + 1;
4542 		u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4543 
4544 		if (is_jmp32) {
4545 			false_umin += gen_hi_min(false_reg->var_off);
4546 			true_umax += gen_hi_max(true_reg->var_off);
4547 		}
4548 		false_reg->umin_value = max(false_reg->umin_value, false_umin);
4549 		true_reg->umax_value = min(true_reg->umax_value, true_umax);
4550 		break;
4551 	}
4552 	case BPF_JSGE:
4553 	case BPF_JSGT:
4554 	{
4555 		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
4556 		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4557 
4558 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4559 			break;
4560 		false_reg->smin_value = max(false_reg->smin_value, false_smin);
4561 		true_reg->smax_value = min(true_reg->smax_value, true_smax);
4562 		break;
4563 	}
4564 	case BPF_JLE:
4565 	case BPF_JLT:
4566 	{
4567 		u64 false_umax = opcode == BPF_JLT ? val    : val - 1;
4568 		u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4569 
4570 		if (is_jmp32) {
4571 			false_umax += gen_hi_max(false_reg->var_off);
4572 			true_umin += gen_hi_min(true_reg->var_off);
4573 		}
4574 		false_reg->umax_value = min(false_reg->umax_value, false_umax);
4575 		true_reg->umin_value = max(true_reg->umin_value, true_umin);
4576 		break;
4577 	}
4578 	case BPF_JSLE:
4579 	case BPF_JSLT:
4580 	{
4581 		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
4582 		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4583 
4584 		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4585 			break;
4586 		false_reg->smax_value = min(false_reg->smax_value, false_smax);
4587 		true_reg->smin_value = max(true_reg->smin_value, true_smin);
4588 		break;
4589 	}
4590 	default:
4591 		break;
4592 	}
4593 
4594 	__reg_deduce_bounds(false_reg);
4595 	__reg_deduce_bounds(true_reg);
4596 	/* We might have learned some bits from the bounds. */
4597 	__reg_bound_offset(false_reg);
4598 	__reg_bound_offset(true_reg);
4599 	/* Intersecting with the old var_off might have improved our bounds
4600 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4601 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
4602 	 */
4603 	__update_reg_bounds(false_reg);
4604 	__update_reg_bounds(true_reg);
4605 }
4606 
4607 /* Regs are known to be equal, so intersect their min/max/var_off */
4608 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
4609 				  struct bpf_reg_state *dst_reg)
4610 {
4611 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
4612 							dst_reg->umin_value);
4613 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
4614 							dst_reg->umax_value);
4615 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
4616 							dst_reg->smin_value);
4617 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
4618 							dst_reg->smax_value);
4619 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
4620 							     dst_reg->var_off);
4621 	/* We might have learned new bounds from the var_off. */
4622 	__update_reg_bounds(src_reg);
4623 	__update_reg_bounds(dst_reg);
4624 	/* We might have learned something about the sign bit. */
4625 	__reg_deduce_bounds(src_reg);
4626 	__reg_deduce_bounds(dst_reg);
4627 	/* We might have learned some bits from the bounds. */
4628 	__reg_bound_offset(src_reg);
4629 	__reg_bound_offset(dst_reg);
4630 	/* Intersecting with the old var_off might have improved our bounds
4631 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4632 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
4633 	 */
4634 	__update_reg_bounds(src_reg);
4635 	__update_reg_bounds(dst_reg);
4636 }
4637 
4638 static void reg_combine_min_max(struct bpf_reg_state *true_src,
4639 				struct bpf_reg_state *true_dst,
4640 				struct bpf_reg_state *false_src,
4641 				struct bpf_reg_state *false_dst,
4642 				u8 opcode)
4643 {
4644 	switch (opcode) {
4645 	case BPF_JEQ:
4646 		__reg_combine_min_max(true_src, true_dst);
4647 		break;
4648 	case BPF_JNE:
4649 		__reg_combine_min_max(false_src, false_dst);
4650 		break;
4651 	}
4652 }
4653 
4654 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4655 				 struct bpf_reg_state *reg, u32 id,
4656 				 bool is_null)
4657 {
4658 	if (reg_type_may_be_null(reg->type) && reg->id == id) {
4659 		/* Old offset (both fixed and variable parts) should
4660 		 * have been known-zero, because we don't allow pointer
4661 		 * arithmetic on pointers that might be NULL.
4662 		 */
4663 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
4664 				 !tnum_equals_const(reg->var_off, 0) ||
4665 				 reg->off)) {
4666 			__mark_reg_known_zero(reg);
4667 			reg->off = 0;
4668 		}
4669 		if (is_null) {
4670 			reg->type = SCALAR_VALUE;
4671 		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4672 			if (reg->map_ptr->inner_map_meta) {
4673 				reg->type = CONST_PTR_TO_MAP;
4674 				reg->map_ptr = reg->map_ptr->inner_map_meta;
4675 			} else {
4676 				reg->type = PTR_TO_MAP_VALUE;
4677 			}
4678 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4679 			reg->type = PTR_TO_SOCKET;
4680 		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4681 			reg->type = PTR_TO_SOCK_COMMON;
4682 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4683 			reg->type = PTR_TO_TCP_SOCK;
4684 		}
4685 		if (is_null) {
4686 			/* We don't need id and ref_obj_id from this point
4687 			 * onwards anymore, thus we should better reset it,
4688 			 * so that state pruning has chances to take effect.
4689 			 */
4690 			reg->id = 0;
4691 			reg->ref_obj_id = 0;
4692 		} else if (!reg_may_point_to_spin_lock(reg)) {
4693 			/* For not-NULL ptr, reg->ref_obj_id will be reset
4694 			 * in release_reg_references().
4695 			 *
4696 			 * reg->id is still used by spin_lock ptr. Other
4697 			 * than spin_lock ptr type, reg->id can be reset.
4698 			 */
4699 			reg->id = 0;
4700 		}
4701 	}
4702 }
4703 
4704 /* The logic is similar to find_good_pkt_pointers(), both could eventually
4705  * be folded together at some point.
4706  */
4707 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4708 				  bool is_null)
4709 {
4710 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4711 	struct bpf_reg_state *reg, *regs = state->regs;
4712 	u32 ref_obj_id = regs[regno].ref_obj_id;
4713 	u32 id = regs[regno].id;
4714 	int i, j;
4715 
4716 	if (ref_obj_id && ref_obj_id == id && is_null)
4717 		/* regs[regno] is in the " == NULL" branch.
4718 		 * No one could have freed the reference state before
4719 		 * doing the NULL check.
4720 		 */
4721 		WARN_ON_ONCE(release_reference_state(state, id));
4722 
4723 	for (i = 0; i < MAX_BPF_REG; i++)
4724 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
4725 
4726 	for (j = 0; j <= vstate->curframe; j++) {
4727 		state = vstate->frame[j];
4728 		bpf_for_each_spilled_reg(i, state, reg) {
4729 			if (!reg)
4730 				continue;
4731 			mark_ptr_or_null_reg(state, reg, id, is_null);
4732 		}
4733 	}
4734 }
4735 
4736 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4737 				   struct bpf_reg_state *dst_reg,
4738 				   struct bpf_reg_state *src_reg,
4739 				   struct bpf_verifier_state *this_branch,
4740 				   struct bpf_verifier_state *other_branch)
4741 {
4742 	if (BPF_SRC(insn->code) != BPF_X)
4743 		return false;
4744 
4745 	/* Pointers are always 64-bit. */
4746 	if (BPF_CLASS(insn->code) == BPF_JMP32)
4747 		return false;
4748 
4749 	switch (BPF_OP(insn->code)) {
4750 	case BPF_JGT:
4751 		if ((dst_reg->type == PTR_TO_PACKET &&
4752 		     src_reg->type == PTR_TO_PACKET_END) ||
4753 		    (dst_reg->type == PTR_TO_PACKET_META &&
4754 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4755 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4756 			find_good_pkt_pointers(this_branch, dst_reg,
4757 					       dst_reg->type, false);
4758 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
4759 			    src_reg->type == PTR_TO_PACKET) ||
4760 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4761 			    src_reg->type == PTR_TO_PACKET_META)) {
4762 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
4763 			find_good_pkt_pointers(other_branch, src_reg,
4764 					       src_reg->type, true);
4765 		} else {
4766 			return false;
4767 		}
4768 		break;
4769 	case BPF_JLT:
4770 		if ((dst_reg->type == PTR_TO_PACKET &&
4771 		     src_reg->type == PTR_TO_PACKET_END) ||
4772 		    (dst_reg->type == PTR_TO_PACKET_META &&
4773 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4774 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4775 			find_good_pkt_pointers(other_branch, dst_reg,
4776 					       dst_reg->type, true);
4777 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
4778 			    src_reg->type == PTR_TO_PACKET) ||
4779 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4780 			    src_reg->type == PTR_TO_PACKET_META)) {
4781 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
4782 			find_good_pkt_pointers(this_branch, src_reg,
4783 					       src_reg->type, false);
4784 		} else {
4785 			return false;
4786 		}
4787 		break;
4788 	case BPF_JGE:
4789 		if ((dst_reg->type == PTR_TO_PACKET &&
4790 		     src_reg->type == PTR_TO_PACKET_END) ||
4791 		    (dst_reg->type == PTR_TO_PACKET_META &&
4792 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4793 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4794 			find_good_pkt_pointers(this_branch, dst_reg,
4795 					       dst_reg->type, true);
4796 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
4797 			    src_reg->type == PTR_TO_PACKET) ||
4798 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4799 			    src_reg->type == PTR_TO_PACKET_META)) {
4800 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4801 			find_good_pkt_pointers(other_branch, src_reg,
4802 					       src_reg->type, false);
4803 		} else {
4804 			return false;
4805 		}
4806 		break;
4807 	case BPF_JLE:
4808 		if ((dst_reg->type == PTR_TO_PACKET &&
4809 		     src_reg->type == PTR_TO_PACKET_END) ||
4810 		    (dst_reg->type == PTR_TO_PACKET_META &&
4811 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4812 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4813 			find_good_pkt_pointers(other_branch, dst_reg,
4814 					       dst_reg->type, false);
4815 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
4816 			    src_reg->type == PTR_TO_PACKET) ||
4817 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4818 			    src_reg->type == PTR_TO_PACKET_META)) {
4819 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4820 			find_good_pkt_pointers(this_branch, src_reg,
4821 					       src_reg->type, true);
4822 		} else {
4823 			return false;
4824 		}
4825 		break;
4826 	default:
4827 		return false;
4828 	}
4829 
4830 	return true;
4831 }
4832 
4833 static int check_cond_jmp_op(struct bpf_verifier_env *env,
4834 			     struct bpf_insn *insn, int *insn_idx)
4835 {
4836 	struct bpf_verifier_state *this_branch = env->cur_state;
4837 	struct bpf_verifier_state *other_branch;
4838 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4839 	struct bpf_reg_state *dst_reg, *other_branch_regs;
4840 	u8 opcode = BPF_OP(insn->code);
4841 	bool is_jmp32;
4842 	int err;
4843 
4844 	/* Only conditional jumps are expected to reach here. */
4845 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
4846 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
4847 		return -EINVAL;
4848 	}
4849 
4850 	if (BPF_SRC(insn->code) == BPF_X) {
4851 		if (insn->imm != 0) {
4852 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4853 			return -EINVAL;
4854 		}
4855 
4856 		/* check src1 operand */
4857 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
4858 		if (err)
4859 			return err;
4860 
4861 		if (is_pointer_value(env, insn->src_reg)) {
4862 			verbose(env, "R%d pointer comparison prohibited\n",
4863 				insn->src_reg);
4864 			return -EACCES;
4865 		}
4866 	} else {
4867 		if (insn->src_reg != BPF_REG_0) {
4868 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4869 			return -EINVAL;
4870 		}
4871 	}
4872 
4873 	/* check src2 operand */
4874 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4875 	if (err)
4876 		return err;
4877 
4878 	dst_reg = &regs[insn->dst_reg];
4879 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
4880 
4881 	if (BPF_SRC(insn->code) == BPF_K) {
4882 		int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4883 					   is_jmp32);
4884 
4885 		if (pred == 1) {
4886 			 /* only follow the goto, ignore fall-through */
4887 			*insn_idx += insn->off;
4888 			return 0;
4889 		} else if (pred == 0) {
4890 			/* only follow fall-through branch, since
4891 			 * that's where the program will go
4892 			 */
4893 			return 0;
4894 		}
4895 	}
4896 
4897 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
4898 				  false);
4899 	if (!other_branch)
4900 		return -EFAULT;
4901 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
4902 
4903 	/* detect if we are comparing against a constant value so we can adjust
4904 	 * our min/max values for our dst register.
4905 	 * this is only legit if both are scalars (or pointers to the same
4906 	 * object, I suppose, but we don't support that right now), because
4907 	 * otherwise the different base pointers mean the offsets aren't
4908 	 * comparable.
4909 	 */
4910 	if (BPF_SRC(insn->code) == BPF_X) {
4911 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4912 		struct bpf_reg_state lo_reg0 = *dst_reg;
4913 		struct bpf_reg_state lo_reg1 = *src_reg;
4914 		struct bpf_reg_state *src_lo, *dst_lo;
4915 
4916 		dst_lo = &lo_reg0;
4917 		src_lo = &lo_reg1;
4918 		coerce_reg_to_size(dst_lo, 4);
4919 		coerce_reg_to_size(src_lo, 4);
4920 
4921 		if (dst_reg->type == SCALAR_VALUE &&
4922 		    src_reg->type == SCALAR_VALUE) {
4923 			if (tnum_is_const(src_reg->var_off) ||
4924 			    (is_jmp32 && tnum_is_const(src_lo->var_off)))
4925 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
4926 						dst_reg,
4927 						is_jmp32
4928 						? src_lo->var_off.value
4929 						: src_reg->var_off.value,
4930 						opcode, is_jmp32);
4931 			else if (tnum_is_const(dst_reg->var_off) ||
4932 				 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
4933 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
4934 						    src_reg,
4935 						    is_jmp32
4936 						    ? dst_lo->var_off.value
4937 						    : dst_reg->var_off.value,
4938 						    opcode, is_jmp32);
4939 			else if (!is_jmp32 &&
4940 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
4941 				/* Comparing for equality, we can combine knowledge */
4942 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
4943 						    &other_branch_regs[insn->dst_reg],
4944 						    src_reg, dst_reg, opcode);
4945 		}
4946 	} else if (dst_reg->type == SCALAR_VALUE) {
4947 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
4948 					dst_reg, insn->imm, opcode, is_jmp32);
4949 	}
4950 
4951 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4952 	 * NOTE: these optimizations below are related with pointer comparison
4953 	 *       which will never be JMP32.
4954 	 */
4955 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
4956 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
4957 	    reg_type_may_be_null(dst_reg->type)) {
4958 		/* Mark all identical registers in each branch as either
4959 		 * safe or unknown depending R == 0 or R != 0 conditional.
4960 		 */
4961 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4962 				      opcode == BPF_JNE);
4963 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4964 				      opcode == BPF_JEQ);
4965 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4966 					   this_branch, other_branch) &&
4967 		   is_pointer_value(env, insn->dst_reg)) {
4968 		verbose(env, "R%d pointer comparison prohibited\n",
4969 			insn->dst_reg);
4970 		return -EACCES;
4971 	}
4972 	if (env->log.level)
4973 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
4974 	return 0;
4975 }
4976 
4977 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
4978 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4979 {
4980 	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4981 
4982 	return (struct bpf_map *) (unsigned long) imm64;
4983 }
4984 
4985 /* verify BPF_LD_IMM64 instruction */
4986 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
4987 {
4988 	struct bpf_reg_state *regs = cur_regs(env);
4989 	int err;
4990 
4991 	if (BPF_SIZE(insn->code) != BPF_DW) {
4992 		verbose(env, "invalid BPF_LD_IMM insn\n");
4993 		return -EINVAL;
4994 	}
4995 	if (insn->off != 0) {
4996 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
4997 		return -EINVAL;
4998 	}
4999 
5000 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
5001 	if (err)
5002 		return err;
5003 
5004 	if (insn->src_reg == 0) {
5005 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
5006 
5007 		regs[insn->dst_reg].type = SCALAR_VALUE;
5008 		__mark_reg_known(&regs[insn->dst_reg], imm);
5009 		return 0;
5010 	}
5011 
5012 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
5013 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
5014 
5015 	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5016 	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
5017 	return 0;
5018 }
5019 
5020 static bool may_access_skb(enum bpf_prog_type type)
5021 {
5022 	switch (type) {
5023 	case BPF_PROG_TYPE_SOCKET_FILTER:
5024 	case BPF_PROG_TYPE_SCHED_CLS:
5025 	case BPF_PROG_TYPE_SCHED_ACT:
5026 		return true;
5027 	default:
5028 		return false;
5029 	}
5030 }
5031 
5032 /* verify safety of LD_ABS|LD_IND instructions:
5033  * - they can only appear in the programs where ctx == skb
5034  * - since they are wrappers of function calls, they scratch R1-R5 registers,
5035  *   preserve R6-R9, and store return value into R0
5036  *
5037  * Implicit input:
5038  *   ctx == skb == R6 == CTX
5039  *
5040  * Explicit input:
5041  *   SRC == any register
5042  *   IMM == 32-bit immediate
5043  *
5044  * Output:
5045  *   R0 - 8/16/32-bit skb data converted to cpu endianness
5046  */
5047 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
5048 {
5049 	struct bpf_reg_state *regs = cur_regs(env);
5050 	u8 mode = BPF_MODE(insn->code);
5051 	int i, err;
5052 
5053 	if (!may_access_skb(env->prog->type)) {
5054 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
5055 		return -EINVAL;
5056 	}
5057 
5058 	if (!env->ops->gen_ld_abs) {
5059 		verbose(env, "bpf verifier is misconfigured\n");
5060 		return -EINVAL;
5061 	}
5062 
5063 	if (env->subprog_cnt > 1) {
5064 		/* when program has LD_ABS insn JITs and interpreter assume
5065 		 * that r1 == ctx == skb which is not the case for callees
5066 		 * that can have arbitrary arguments. It's problematic
5067 		 * for main prog as well since JITs would need to analyze
5068 		 * all functions in order to make proper register save/restore
5069 		 * decisions in the main prog. Hence disallow LD_ABS with calls
5070 		 */
5071 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
5072 		return -EINVAL;
5073 	}
5074 
5075 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
5076 	    BPF_SIZE(insn->code) == BPF_DW ||
5077 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
5078 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
5079 		return -EINVAL;
5080 	}
5081 
5082 	/* check whether implicit source operand (register R6) is readable */
5083 	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
5084 	if (err)
5085 		return err;
5086 
5087 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
5088 	 * gen_ld_abs() may terminate the program at runtime, leading to
5089 	 * reference leak.
5090 	 */
5091 	err = check_reference_leak(env);
5092 	if (err) {
5093 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
5094 		return err;
5095 	}
5096 
5097 	if (env->cur_state->active_spin_lock) {
5098 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
5099 		return -EINVAL;
5100 	}
5101 
5102 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
5103 		verbose(env,
5104 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
5105 		return -EINVAL;
5106 	}
5107 
5108 	if (mode == BPF_IND) {
5109 		/* check explicit source operand */
5110 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
5111 		if (err)
5112 			return err;
5113 	}
5114 
5115 	/* reset caller saved regs to unreadable */
5116 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
5117 		mark_reg_not_init(env, regs, caller_saved[i]);
5118 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5119 	}
5120 
5121 	/* mark destination R0 register as readable, since it contains
5122 	 * the value fetched from the packet.
5123 	 * Already marked as written above.
5124 	 */
5125 	mark_reg_unknown(env, regs, BPF_REG_0);
5126 	return 0;
5127 }
5128 
5129 static int check_return_code(struct bpf_verifier_env *env)
5130 {
5131 	struct bpf_reg_state *reg;
5132 	struct tnum range = tnum_range(0, 1);
5133 
5134 	switch (env->prog->type) {
5135 	case BPF_PROG_TYPE_CGROUP_SKB:
5136 	case BPF_PROG_TYPE_CGROUP_SOCK:
5137 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
5138 	case BPF_PROG_TYPE_SOCK_OPS:
5139 	case BPF_PROG_TYPE_CGROUP_DEVICE:
5140 		break;
5141 	default:
5142 		return 0;
5143 	}
5144 
5145 	reg = cur_regs(env) + BPF_REG_0;
5146 	if (reg->type != SCALAR_VALUE) {
5147 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
5148 			reg_type_str[reg->type]);
5149 		return -EINVAL;
5150 	}
5151 
5152 	if (!tnum_in(range, reg->var_off)) {
5153 		verbose(env, "At program exit the register R0 ");
5154 		if (!tnum_is_unknown(reg->var_off)) {
5155 			char tn_buf[48];
5156 
5157 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5158 			verbose(env, "has value %s", tn_buf);
5159 		} else {
5160 			verbose(env, "has unknown scalar value");
5161 		}
5162 		verbose(env, " should have been 0 or 1\n");
5163 		return -EINVAL;
5164 	}
5165 	return 0;
5166 }
5167 
5168 /* non-recursive DFS pseudo code
5169  * 1  procedure DFS-iterative(G,v):
5170  * 2      label v as discovered
5171  * 3      let S be a stack
5172  * 4      S.push(v)
5173  * 5      while S is not empty
5174  * 6            t <- S.pop()
5175  * 7            if t is what we're looking for:
5176  * 8                return t
5177  * 9            for all edges e in G.adjacentEdges(t) do
5178  * 10               if edge e is already labelled
5179  * 11                   continue with the next edge
5180  * 12               w <- G.adjacentVertex(t,e)
5181  * 13               if vertex w is not discovered and not explored
5182  * 14                   label e as tree-edge
5183  * 15                   label w as discovered
5184  * 16                   S.push(w)
5185  * 17                   continue at 5
5186  * 18               else if vertex w is discovered
5187  * 19                   label e as back-edge
5188  * 20               else
5189  * 21                   // vertex w is explored
5190  * 22                   label e as forward- or cross-edge
5191  * 23           label t as explored
5192  * 24           S.pop()
5193  *
5194  * convention:
5195  * 0x10 - discovered
5196  * 0x11 - discovered and fall-through edge labelled
5197  * 0x12 - discovered and fall-through and branch edges labelled
5198  * 0x20 - explored
5199  */
5200 
5201 enum {
5202 	DISCOVERED = 0x10,
5203 	EXPLORED = 0x20,
5204 	FALLTHROUGH = 1,
5205 	BRANCH = 2,
5206 };
5207 
5208 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
5209 
5210 static int *insn_stack;	/* stack of insns to process */
5211 static int cur_stack;	/* current stack index */
5212 static int *insn_state;
5213 
5214 /* t, w, e - match pseudo-code above:
5215  * t - index of current instruction
5216  * w - next instruction
5217  * e - edge
5218  */
5219 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5220 {
5221 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
5222 		return 0;
5223 
5224 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
5225 		return 0;
5226 
5227 	if (w < 0 || w >= env->prog->len) {
5228 		verbose_linfo(env, t, "%d: ", t);
5229 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
5230 		return -EINVAL;
5231 	}
5232 
5233 	if (e == BRANCH)
5234 		/* mark branch target for state pruning */
5235 		env->explored_states[w] = STATE_LIST_MARK;
5236 
5237 	if (insn_state[w] == 0) {
5238 		/* tree-edge */
5239 		insn_state[t] = DISCOVERED | e;
5240 		insn_state[w] = DISCOVERED;
5241 		if (cur_stack >= env->prog->len)
5242 			return -E2BIG;
5243 		insn_stack[cur_stack++] = w;
5244 		return 1;
5245 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
5246 		verbose_linfo(env, t, "%d: ", t);
5247 		verbose_linfo(env, w, "%d: ", w);
5248 		verbose(env, "back-edge from insn %d to %d\n", t, w);
5249 		return -EINVAL;
5250 	} else if (insn_state[w] == EXPLORED) {
5251 		/* forward- or cross-edge */
5252 		insn_state[t] = DISCOVERED | e;
5253 	} else {
5254 		verbose(env, "insn state internal bug\n");
5255 		return -EFAULT;
5256 	}
5257 	return 0;
5258 }
5259 
5260 /* non-recursive depth-first-search to detect loops in BPF program
5261  * loop == back-edge in directed graph
5262  */
5263 static int check_cfg(struct bpf_verifier_env *env)
5264 {
5265 	struct bpf_insn *insns = env->prog->insnsi;
5266 	int insn_cnt = env->prog->len;
5267 	int ret = 0;
5268 	int i, t;
5269 
5270 	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5271 	if (!insn_state)
5272 		return -ENOMEM;
5273 
5274 	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5275 	if (!insn_stack) {
5276 		kfree(insn_state);
5277 		return -ENOMEM;
5278 	}
5279 
5280 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
5281 	insn_stack[0] = 0; /* 0 is the first instruction */
5282 	cur_stack = 1;
5283 
5284 peek_stack:
5285 	if (cur_stack == 0)
5286 		goto check_state;
5287 	t = insn_stack[cur_stack - 1];
5288 
5289 	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5290 	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
5291 		u8 opcode = BPF_OP(insns[t].code);
5292 
5293 		if (opcode == BPF_EXIT) {
5294 			goto mark_explored;
5295 		} else if (opcode == BPF_CALL) {
5296 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
5297 			if (ret == 1)
5298 				goto peek_stack;
5299 			else if (ret < 0)
5300 				goto err_free;
5301 			if (t + 1 < insn_cnt)
5302 				env->explored_states[t + 1] = STATE_LIST_MARK;
5303 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5304 				env->explored_states[t] = STATE_LIST_MARK;
5305 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5306 				if (ret == 1)
5307 					goto peek_stack;
5308 				else if (ret < 0)
5309 					goto err_free;
5310 			}
5311 		} else if (opcode == BPF_JA) {
5312 			if (BPF_SRC(insns[t].code) != BPF_K) {
5313 				ret = -EINVAL;
5314 				goto err_free;
5315 			}
5316 			/* unconditional jump with single edge */
5317 			ret = push_insn(t, t + insns[t].off + 1,
5318 					FALLTHROUGH, env);
5319 			if (ret == 1)
5320 				goto peek_stack;
5321 			else if (ret < 0)
5322 				goto err_free;
5323 			/* tell verifier to check for equivalent states
5324 			 * after every call and jump
5325 			 */
5326 			if (t + 1 < insn_cnt)
5327 				env->explored_states[t + 1] = STATE_LIST_MARK;
5328 		} else {
5329 			/* conditional jump with two edges */
5330 			env->explored_states[t] = STATE_LIST_MARK;
5331 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
5332 			if (ret == 1)
5333 				goto peek_stack;
5334 			else if (ret < 0)
5335 				goto err_free;
5336 
5337 			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
5338 			if (ret == 1)
5339 				goto peek_stack;
5340 			else if (ret < 0)
5341 				goto err_free;
5342 		}
5343 	} else {
5344 		/* all other non-branch instructions with single
5345 		 * fall-through edge
5346 		 */
5347 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
5348 		if (ret == 1)
5349 			goto peek_stack;
5350 		else if (ret < 0)
5351 			goto err_free;
5352 	}
5353 
5354 mark_explored:
5355 	insn_state[t] = EXPLORED;
5356 	if (cur_stack-- <= 0) {
5357 		verbose(env, "pop stack internal bug\n");
5358 		ret = -EFAULT;
5359 		goto err_free;
5360 	}
5361 	goto peek_stack;
5362 
5363 check_state:
5364 	for (i = 0; i < insn_cnt; i++) {
5365 		if (insn_state[i] != EXPLORED) {
5366 			verbose(env, "unreachable insn %d\n", i);
5367 			ret = -EINVAL;
5368 			goto err_free;
5369 		}
5370 	}
5371 	ret = 0; /* cfg looks good */
5372 
5373 err_free:
5374 	kfree(insn_state);
5375 	kfree(insn_stack);
5376 	return ret;
5377 }
5378 
5379 /* The minimum supported BTF func info size */
5380 #define MIN_BPF_FUNCINFO_SIZE	8
5381 #define MAX_FUNCINFO_REC_SIZE	252
5382 
5383 static int check_btf_func(struct bpf_verifier_env *env,
5384 			  const union bpf_attr *attr,
5385 			  union bpf_attr __user *uattr)
5386 {
5387 	u32 i, nfuncs, urec_size, min_size;
5388 	u32 krec_size = sizeof(struct bpf_func_info);
5389 	struct bpf_func_info *krecord;
5390 	const struct btf_type *type;
5391 	struct bpf_prog *prog;
5392 	const struct btf *btf;
5393 	void __user *urecord;
5394 	u32 prev_offset = 0;
5395 	int ret = 0;
5396 
5397 	nfuncs = attr->func_info_cnt;
5398 	if (!nfuncs)
5399 		return 0;
5400 
5401 	if (nfuncs != env->subprog_cnt) {
5402 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
5403 		return -EINVAL;
5404 	}
5405 
5406 	urec_size = attr->func_info_rec_size;
5407 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
5408 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
5409 	    urec_size % sizeof(u32)) {
5410 		verbose(env, "invalid func info rec size %u\n", urec_size);
5411 		return -EINVAL;
5412 	}
5413 
5414 	prog = env->prog;
5415 	btf = prog->aux->btf;
5416 
5417 	urecord = u64_to_user_ptr(attr->func_info);
5418 	min_size = min_t(u32, krec_size, urec_size);
5419 
5420 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
5421 	if (!krecord)
5422 		return -ENOMEM;
5423 
5424 	for (i = 0; i < nfuncs; i++) {
5425 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
5426 		if (ret) {
5427 			if (ret == -E2BIG) {
5428 				verbose(env, "nonzero tailing record in func info");
5429 				/* set the size kernel expects so loader can zero
5430 				 * out the rest of the record.
5431 				 */
5432 				if (put_user(min_size, &uattr->func_info_rec_size))
5433 					ret = -EFAULT;
5434 			}
5435 			goto err_free;
5436 		}
5437 
5438 		if (copy_from_user(&krecord[i], urecord, min_size)) {
5439 			ret = -EFAULT;
5440 			goto err_free;
5441 		}
5442 
5443 		/* check insn_off */
5444 		if (i == 0) {
5445 			if (krecord[i].insn_off) {
5446 				verbose(env,
5447 					"nonzero insn_off %u for the first func info record",
5448 					krecord[i].insn_off);
5449 				ret = -EINVAL;
5450 				goto err_free;
5451 			}
5452 		} else if (krecord[i].insn_off <= prev_offset) {
5453 			verbose(env,
5454 				"same or smaller insn offset (%u) than previous func info record (%u)",
5455 				krecord[i].insn_off, prev_offset);
5456 			ret = -EINVAL;
5457 			goto err_free;
5458 		}
5459 
5460 		if (env->subprog_info[i].start != krecord[i].insn_off) {
5461 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
5462 			ret = -EINVAL;
5463 			goto err_free;
5464 		}
5465 
5466 		/* check type_id */
5467 		type = btf_type_by_id(btf, krecord[i].type_id);
5468 		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
5469 			verbose(env, "invalid type id %d in func info",
5470 				krecord[i].type_id);
5471 			ret = -EINVAL;
5472 			goto err_free;
5473 		}
5474 
5475 		prev_offset = krecord[i].insn_off;
5476 		urecord += urec_size;
5477 	}
5478 
5479 	prog->aux->func_info = krecord;
5480 	prog->aux->func_info_cnt = nfuncs;
5481 	return 0;
5482 
5483 err_free:
5484 	kvfree(krecord);
5485 	return ret;
5486 }
5487 
5488 static void adjust_btf_func(struct bpf_verifier_env *env)
5489 {
5490 	int i;
5491 
5492 	if (!env->prog->aux->func_info)
5493 		return;
5494 
5495 	for (i = 0; i < env->subprog_cnt; i++)
5496 		env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
5497 }
5498 
5499 #define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
5500 		sizeof(((struct bpf_line_info *)(0))->line_col))
5501 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
5502 
5503 static int check_btf_line(struct bpf_verifier_env *env,
5504 			  const union bpf_attr *attr,
5505 			  union bpf_attr __user *uattr)
5506 {
5507 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
5508 	struct bpf_subprog_info *sub;
5509 	struct bpf_line_info *linfo;
5510 	struct bpf_prog *prog;
5511 	const struct btf *btf;
5512 	void __user *ulinfo;
5513 	int err;
5514 
5515 	nr_linfo = attr->line_info_cnt;
5516 	if (!nr_linfo)
5517 		return 0;
5518 
5519 	rec_size = attr->line_info_rec_size;
5520 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
5521 	    rec_size > MAX_LINEINFO_REC_SIZE ||
5522 	    rec_size & (sizeof(u32) - 1))
5523 		return -EINVAL;
5524 
5525 	/* Need to zero it in case the userspace may
5526 	 * pass in a smaller bpf_line_info object.
5527 	 */
5528 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
5529 			 GFP_KERNEL | __GFP_NOWARN);
5530 	if (!linfo)
5531 		return -ENOMEM;
5532 
5533 	prog = env->prog;
5534 	btf = prog->aux->btf;
5535 
5536 	s = 0;
5537 	sub = env->subprog_info;
5538 	ulinfo = u64_to_user_ptr(attr->line_info);
5539 	expected_size = sizeof(struct bpf_line_info);
5540 	ncopy = min_t(u32, expected_size, rec_size);
5541 	for (i = 0; i < nr_linfo; i++) {
5542 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
5543 		if (err) {
5544 			if (err == -E2BIG) {
5545 				verbose(env, "nonzero tailing record in line_info");
5546 				if (put_user(expected_size,
5547 					     &uattr->line_info_rec_size))
5548 					err = -EFAULT;
5549 			}
5550 			goto err_free;
5551 		}
5552 
5553 		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
5554 			err = -EFAULT;
5555 			goto err_free;
5556 		}
5557 
5558 		/*
5559 		 * Check insn_off to ensure
5560 		 * 1) strictly increasing AND
5561 		 * 2) bounded by prog->len
5562 		 *
5563 		 * The linfo[0].insn_off == 0 check logically falls into
5564 		 * the later "missing bpf_line_info for func..." case
5565 		 * because the first linfo[0].insn_off must be the
5566 		 * first sub also and the first sub must have
5567 		 * subprog_info[0].start == 0.
5568 		 */
5569 		if ((i && linfo[i].insn_off <= prev_offset) ||
5570 		    linfo[i].insn_off >= prog->len) {
5571 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
5572 				i, linfo[i].insn_off, prev_offset,
5573 				prog->len);
5574 			err = -EINVAL;
5575 			goto err_free;
5576 		}
5577 
5578 		if (!prog->insnsi[linfo[i].insn_off].code) {
5579 			verbose(env,
5580 				"Invalid insn code at line_info[%u].insn_off\n",
5581 				i);
5582 			err = -EINVAL;
5583 			goto err_free;
5584 		}
5585 
5586 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
5587 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
5588 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
5589 			err = -EINVAL;
5590 			goto err_free;
5591 		}
5592 
5593 		if (s != env->subprog_cnt) {
5594 			if (linfo[i].insn_off == sub[s].start) {
5595 				sub[s].linfo_idx = i;
5596 				s++;
5597 			} else if (sub[s].start < linfo[i].insn_off) {
5598 				verbose(env, "missing bpf_line_info for func#%u\n", s);
5599 				err = -EINVAL;
5600 				goto err_free;
5601 			}
5602 		}
5603 
5604 		prev_offset = linfo[i].insn_off;
5605 		ulinfo += rec_size;
5606 	}
5607 
5608 	if (s != env->subprog_cnt) {
5609 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
5610 			env->subprog_cnt - s, s);
5611 		err = -EINVAL;
5612 		goto err_free;
5613 	}
5614 
5615 	prog->aux->linfo = linfo;
5616 	prog->aux->nr_linfo = nr_linfo;
5617 
5618 	return 0;
5619 
5620 err_free:
5621 	kvfree(linfo);
5622 	return err;
5623 }
5624 
5625 static int check_btf_info(struct bpf_verifier_env *env,
5626 			  const union bpf_attr *attr,
5627 			  union bpf_attr __user *uattr)
5628 {
5629 	struct btf *btf;
5630 	int err;
5631 
5632 	if (!attr->func_info_cnt && !attr->line_info_cnt)
5633 		return 0;
5634 
5635 	btf = btf_get_by_fd(attr->prog_btf_fd);
5636 	if (IS_ERR(btf))
5637 		return PTR_ERR(btf);
5638 	env->prog->aux->btf = btf;
5639 
5640 	err = check_btf_func(env, attr, uattr);
5641 	if (err)
5642 		return err;
5643 
5644 	err = check_btf_line(env, attr, uattr);
5645 	if (err)
5646 		return err;
5647 
5648 	return 0;
5649 }
5650 
5651 /* check %cur's range satisfies %old's */
5652 static bool range_within(struct bpf_reg_state *old,
5653 			 struct bpf_reg_state *cur)
5654 {
5655 	return old->umin_value <= cur->umin_value &&
5656 	       old->umax_value >= cur->umax_value &&
5657 	       old->smin_value <= cur->smin_value &&
5658 	       old->smax_value >= cur->smax_value;
5659 }
5660 
5661 /* Maximum number of register states that can exist at once */
5662 #define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
5663 struct idpair {
5664 	u32 old;
5665 	u32 cur;
5666 };
5667 
5668 /* If in the old state two registers had the same id, then they need to have
5669  * the same id in the new state as well.  But that id could be different from
5670  * the old state, so we need to track the mapping from old to new ids.
5671  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
5672  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
5673  * regs with a different old id could still have new id 9, we don't care about
5674  * that.
5675  * So we look through our idmap to see if this old id has been seen before.  If
5676  * so, we require the new id to match; otherwise, we add the id pair to the map.
5677  */
5678 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
5679 {
5680 	unsigned int i;
5681 
5682 	for (i = 0; i < ID_MAP_SIZE; i++) {
5683 		if (!idmap[i].old) {
5684 			/* Reached an empty slot; haven't seen this id before */
5685 			idmap[i].old = old_id;
5686 			idmap[i].cur = cur_id;
5687 			return true;
5688 		}
5689 		if (idmap[i].old == old_id)
5690 			return idmap[i].cur == cur_id;
5691 	}
5692 	/* We ran out of idmap slots, which should be impossible */
5693 	WARN_ON_ONCE(1);
5694 	return false;
5695 }
5696 
5697 static void clean_func_state(struct bpf_verifier_env *env,
5698 			     struct bpf_func_state *st)
5699 {
5700 	enum bpf_reg_liveness live;
5701 	int i, j;
5702 
5703 	for (i = 0; i < BPF_REG_FP; i++) {
5704 		live = st->regs[i].live;
5705 		/* liveness must not touch this register anymore */
5706 		st->regs[i].live |= REG_LIVE_DONE;
5707 		if (!(live & REG_LIVE_READ))
5708 			/* since the register is unused, clear its state
5709 			 * to make further comparison simpler
5710 			 */
5711 			__mark_reg_not_init(&st->regs[i]);
5712 	}
5713 
5714 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
5715 		live = st->stack[i].spilled_ptr.live;
5716 		/* liveness must not touch this stack slot anymore */
5717 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
5718 		if (!(live & REG_LIVE_READ)) {
5719 			__mark_reg_not_init(&st->stack[i].spilled_ptr);
5720 			for (j = 0; j < BPF_REG_SIZE; j++)
5721 				st->stack[i].slot_type[j] = STACK_INVALID;
5722 		}
5723 	}
5724 }
5725 
5726 static void clean_verifier_state(struct bpf_verifier_env *env,
5727 				 struct bpf_verifier_state *st)
5728 {
5729 	int i;
5730 
5731 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
5732 		/* all regs in this state in all frames were already marked */
5733 		return;
5734 
5735 	for (i = 0; i <= st->curframe; i++)
5736 		clean_func_state(env, st->frame[i]);
5737 }
5738 
5739 /* the parentage chains form a tree.
5740  * the verifier states are added to state lists at given insn and
5741  * pushed into state stack for future exploration.
5742  * when the verifier reaches bpf_exit insn some of the verifer states
5743  * stored in the state lists have their final liveness state already,
5744  * but a lot of states will get revised from liveness point of view when
5745  * the verifier explores other branches.
5746  * Example:
5747  * 1: r0 = 1
5748  * 2: if r1 == 100 goto pc+1
5749  * 3: r0 = 2
5750  * 4: exit
5751  * when the verifier reaches exit insn the register r0 in the state list of
5752  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
5753  * of insn 2 and goes exploring further. At the insn 4 it will walk the
5754  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
5755  *
5756  * Since the verifier pushes the branch states as it sees them while exploring
5757  * the program the condition of walking the branch instruction for the second
5758  * time means that all states below this branch were already explored and
5759  * their final liveness markes are already propagated.
5760  * Hence when the verifier completes the search of state list in is_state_visited()
5761  * we can call this clean_live_states() function to mark all liveness states
5762  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
5763  * will not be used.
5764  * This function also clears the registers and stack for states that !READ
5765  * to simplify state merging.
5766  *
5767  * Important note here that walking the same branch instruction in the callee
5768  * doesn't meant that the states are DONE. The verifier has to compare
5769  * the callsites
5770  */
5771 static void clean_live_states(struct bpf_verifier_env *env, int insn,
5772 			      struct bpf_verifier_state *cur)
5773 {
5774 	struct bpf_verifier_state_list *sl;
5775 	int i;
5776 
5777 	sl = env->explored_states[insn];
5778 	if (!sl)
5779 		return;
5780 
5781 	while (sl != STATE_LIST_MARK) {
5782 		if (sl->state.curframe != cur->curframe)
5783 			goto next;
5784 		for (i = 0; i <= cur->curframe; i++)
5785 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
5786 				goto next;
5787 		clean_verifier_state(env, &sl->state);
5788 next:
5789 		sl = sl->next;
5790 	}
5791 }
5792 
5793 /* Returns true if (rold safe implies rcur safe) */
5794 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5795 		    struct idpair *idmap)
5796 {
5797 	bool equal;
5798 
5799 	if (!(rold->live & REG_LIVE_READ))
5800 		/* explored state didn't use this */
5801 		return true;
5802 
5803 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
5804 
5805 	if (rold->type == PTR_TO_STACK)
5806 		/* two stack pointers are equal only if they're pointing to
5807 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
5808 		 */
5809 		return equal && rold->frameno == rcur->frameno;
5810 
5811 	if (equal)
5812 		return true;
5813 
5814 	if (rold->type == NOT_INIT)
5815 		/* explored state can't have used this */
5816 		return true;
5817 	if (rcur->type == NOT_INIT)
5818 		return false;
5819 	switch (rold->type) {
5820 	case SCALAR_VALUE:
5821 		if (rcur->type == SCALAR_VALUE) {
5822 			/* new val must satisfy old val knowledge */
5823 			return range_within(rold, rcur) &&
5824 			       tnum_in(rold->var_off, rcur->var_off);
5825 		} else {
5826 			/* We're trying to use a pointer in place of a scalar.
5827 			 * Even if the scalar was unbounded, this could lead to
5828 			 * pointer leaks because scalars are allowed to leak
5829 			 * while pointers are not. We could make this safe in
5830 			 * special cases if root is calling us, but it's
5831 			 * probably not worth the hassle.
5832 			 */
5833 			return false;
5834 		}
5835 	case PTR_TO_MAP_VALUE:
5836 		/* If the new min/max/var_off satisfy the old ones and
5837 		 * everything else matches, we are OK.
5838 		 * 'id' is not compared, since it's only used for maps with
5839 		 * bpf_spin_lock inside map element and in such cases if
5840 		 * the rest of the prog is valid for one map element then
5841 		 * it's valid for all map elements regardless of the key
5842 		 * used in bpf_map_lookup()
5843 		 */
5844 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5845 		       range_within(rold, rcur) &&
5846 		       tnum_in(rold->var_off, rcur->var_off);
5847 	case PTR_TO_MAP_VALUE_OR_NULL:
5848 		/* a PTR_TO_MAP_VALUE could be safe to use as a
5849 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5850 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5851 		 * checked, doing so could have affected others with the same
5852 		 * id, and we can't check for that because we lost the id when
5853 		 * we converted to a PTR_TO_MAP_VALUE.
5854 		 */
5855 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5856 			return false;
5857 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5858 			return false;
5859 		/* Check our ids match any regs they're supposed to */
5860 		return check_ids(rold->id, rcur->id, idmap);
5861 	case PTR_TO_PACKET_META:
5862 	case PTR_TO_PACKET:
5863 		if (rcur->type != rold->type)
5864 			return false;
5865 		/* We must have at least as much range as the old ptr
5866 		 * did, so that any accesses which were safe before are
5867 		 * still safe.  This is true even if old range < old off,
5868 		 * since someone could have accessed through (ptr - k), or
5869 		 * even done ptr -= k in a register, to get a safe access.
5870 		 */
5871 		if (rold->range > rcur->range)
5872 			return false;
5873 		/* If the offsets don't match, we can't trust our alignment;
5874 		 * nor can we be sure that we won't fall out of range.
5875 		 */
5876 		if (rold->off != rcur->off)
5877 			return false;
5878 		/* id relations must be preserved */
5879 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5880 			return false;
5881 		/* new val must satisfy old val knowledge */
5882 		return range_within(rold, rcur) &&
5883 		       tnum_in(rold->var_off, rcur->var_off);
5884 	case PTR_TO_CTX:
5885 	case CONST_PTR_TO_MAP:
5886 	case PTR_TO_PACKET_END:
5887 	case PTR_TO_FLOW_KEYS:
5888 	case PTR_TO_SOCKET:
5889 	case PTR_TO_SOCKET_OR_NULL:
5890 	case PTR_TO_SOCK_COMMON:
5891 	case PTR_TO_SOCK_COMMON_OR_NULL:
5892 	case PTR_TO_TCP_SOCK:
5893 	case PTR_TO_TCP_SOCK_OR_NULL:
5894 		/* Only valid matches are exact, which memcmp() above
5895 		 * would have accepted
5896 		 */
5897 	default:
5898 		/* Don't know what's going on, just say it's not safe */
5899 		return false;
5900 	}
5901 
5902 	/* Shouldn't get here; if we do, say it's not safe */
5903 	WARN_ON_ONCE(1);
5904 	return false;
5905 }
5906 
5907 static bool stacksafe(struct bpf_func_state *old,
5908 		      struct bpf_func_state *cur,
5909 		      struct idpair *idmap)
5910 {
5911 	int i, spi;
5912 
5913 	/* walk slots of the explored stack and ignore any additional
5914 	 * slots in the current stack, since explored(safe) state
5915 	 * didn't use them
5916 	 */
5917 	for (i = 0; i < old->allocated_stack; i++) {
5918 		spi = i / BPF_REG_SIZE;
5919 
5920 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
5921 			i += BPF_REG_SIZE - 1;
5922 			/* explored state didn't use this */
5923 			continue;
5924 		}
5925 
5926 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5927 			continue;
5928 
5929 		/* explored stack has more populated slots than current stack
5930 		 * and these slots were used
5931 		 */
5932 		if (i >= cur->allocated_stack)
5933 			return false;
5934 
5935 		/* if old state was safe with misc data in the stack
5936 		 * it will be safe with zero-initialized stack.
5937 		 * The opposite is not true
5938 		 */
5939 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5940 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5941 			continue;
5942 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5943 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5944 			/* Ex: old explored (safe) state has STACK_SPILL in
5945 			 * this stack slot, but current has has STACK_MISC ->
5946 			 * this verifier states are not equivalent,
5947 			 * return false to continue verification of this path
5948 			 */
5949 			return false;
5950 		if (i % BPF_REG_SIZE)
5951 			continue;
5952 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
5953 			continue;
5954 		if (!regsafe(&old->stack[spi].spilled_ptr,
5955 			     &cur->stack[spi].spilled_ptr,
5956 			     idmap))
5957 			/* when explored and current stack slot are both storing
5958 			 * spilled registers, check that stored pointers types
5959 			 * are the same as well.
5960 			 * Ex: explored safe path could have stored
5961 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5962 			 * but current path has stored:
5963 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5964 			 * such verifier states are not equivalent.
5965 			 * return false to continue verification of this path
5966 			 */
5967 			return false;
5968 	}
5969 	return true;
5970 }
5971 
5972 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5973 {
5974 	if (old->acquired_refs != cur->acquired_refs)
5975 		return false;
5976 	return !memcmp(old->refs, cur->refs,
5977 		       sizeof(*old->refs) * old->acquired_refs);
5978 }
5979 
5980 /* compare two verifier states
5981  *
5982  * all states stored in state_list are known to be valid, since
5983  * verifier reached 'bpf_exit' instruction through them
5984  *
5985  * this function is called when verifier exploring different branches of
5986  * execution popped from the state stack. If it sees an old state that has
5987  * more strict register state and more strict stack state then this execution
5988  * branch doesn't need to be explored further, since verifier already
5989  * concluded that more strict state leads to valid finish.
5990  *
5991  * Therefore two states are equivalent if register state is more conservative
5992  * and explored stack state is more conservative than the current one.
5993  * Example:
5994  *       explored                   current
5995  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5996  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5997  *
5998  * In other words if current stack state (one being explored) has more
5999  * valid slots than old one that already passed validation, it means
6000  * the verifier can stop exploring and conclude that current state is valid too
6001  *
6002  * Similarly with registers. If explored state has register type as invalid
6003  * whereas register type in current state is meaningful, it means that
6004  * the current state will reach 'bpf_exit' instruction safely
6005  */
6006 static bool func_states_equal(struct bpf_func_state *old,
6007 			      struct bpf_func_state *cur)
6008 {
6009 	struct idpair *idmap;
6010 	bool ret = false;
6011 	int i;
6012 
6013 	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
6014 	/* If we failed to allocate the idmap, just say it's not safe */
6015 	if (!idmap)
6016 		return false;
6017 
6018 	for (i = 0; i < MAX_BPF_REG; i++) {
6019 		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
6020 			goto out_free;
6021 	}
6022 
6023 	if (!stacksafe(old, cur, idmap))
6024 		goto out_free;
6025 
6026 	if (!refsafe(old, cur))
6027 		goto out_free;
6028 	ret = true;
6029 out_free:
6030 	kfree(idmap);
6031 	return ret;
6032 }
6033 
6034 static bool states_equal(struct bpf_verifier_env *env,
6035 			 struct bpf_verifier_state *old,
6036 			 struct bpf_verifier_state *cur)
6037 {
6038 	int i;
6039 
6040 	if (old->curframe != cur->curframe)
6041 		return false;
6042 
6043 	/* Verification state from speculative execution simulation
6044 	 * must never prune a non-speculative execution one.
6045 	 */
6046 	if (old->speculative && !cur->speculative)
6047 		return false;
6048 
6049 	if (old->active_spin_lock != cur->active_spin_lock)
6050 		return false;
6051 
6052 	/* for states to be equal callsites have to be the same
6053 	 * and all frame states need to be equivalent
6054 	 */
6055 	for (i = 0; i <= old->curframe; i++) {
6056 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
6057 			return false;
6058 		if (!func_states_equal(old->frame[i], cur->frame[i]))
6059 			return false;
6060 	}
6061 	return true;
6062 }
6063 
6064 /* A write screens off any subsequent reads; but write marks come from the
6065  * straight-line code between a state and its parent.  When we arrive at an
6066  * equivalent state (jump target or such) we didn't arrive by the straight-line
6067  * code, so read marks in the state must propagate to the parent regardless
6068  * of the state's write marks. That's what 'parent == state->parent' comparison
6069  * in mark_reg_read() is for.
6070  */
6071 static int propagate_liveness(struct bpf_verifier_env *env,
6072 			      const struct bpf_verifier_state *vstate,
6073 			      struct bpf_verifier_state *vparent)
6074 {
6075 	int i, frame, err = 0;
6076 	struct bpf_func_state *state, *parent;
6077 
6078 	if (vparent->curframe != vstate->curframe) {
6079 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
6080 		     vparent->curframe, vstate->curframe);
6081 		return -EFAULT;
6082 	}
6083 	/* Propagate read liveness of registers... */
6084 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
6085 	for (frame = 0; frame <= vstate->curframe; frame++) {
6086 		/* We don't need to worry about FP liveness, it's read-only */
6087 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
6088 			if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
6089 				continue;
6090 			if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
6091 				err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
6092 						    &vparent->frame[frame]->regs[i]);
6093 				if (err)
6094 					return err;
6095 			}
6096 		}
6097 	}
6098 
6099 	/* ... and stack slots */
6100 	for (frame = 0; frame <= vstate->curframe; frame++) {
6101 		state = vstate->frame[frame];
6102 		parent = vparent->frame[frame];
6103 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
6104 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
6105 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
6106 				continue;
6107 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
6108 				mark_reg_read(env, &state->stack[i].spilled_ptr,
6109 					      &parent->stack[i].spilled_ptr);
6110 		}
6111 	}
6112 	return err;
6113 }
6114 
6115 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6116 {
6117 	struct bpf_verifier_state_list *new_sl;
6118 	struct bpf_verifier_state_list *sl;
6119 	struct bpf_verifier_state *cur = env->cur_state, *new;
6120 	int i, j, err, states_cnt = 0;
6121 
6122 	sl = env->explored_states[insn_idx];
6123 	if (!sl)
6124 		/* this 'insn_idx' instruction wasn't marked, so we will not
6125 		 * be doing state search here
6126 		 */
6127 		return 0;
6128 
6129 	clean_live_states(env, insn_idx, cur);
6130 
6131 	while (sl != STATE_LIST_MARK) {
6132 		if (states_equal(env, &sl->state, cur)) {
6133 			/* reached equivalent register/stack state,
6134 			 * prune the search.
6135 			 * Registers read by the continuation are read by us.
6136 			 * If we have any write marks in env->cur_state, they
6137 			 * will prevent corresponding reads in the continuation
6138 			 * from reaching our parent (an explored_state).  Our
6139 			 * own state will get the read marks recorded, but
6140 			 * they'll be immediately forgotten as we're pruning
6141 			 * this state and will pop a new one.
6142 			 */
6143 			err = propagate_liveness(env, &sl->state, cur);
6144 			if (err)
6145 				return err;
6146 			return 1;
6147 		}
6148 		sl = sl->next;
6149 		states_cnt++;
6150 	}
6151 
6152 	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6153 		return 0;
6154 
6155 	/* there were no equivalent states, remember current one.
6156 	 * technically the current state is not proven to be safe yet,
6157 	 * but it will either reach outer most bpf_exit (which means it's safe)
6158 	 * or it will be rejected. Since there are no loops, we won't be
6159 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6160 	 * again on the way to bpf_exit
6161 	 */
6162 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
6163 	if (!new_sl)
6164 		return -ENOMEM;
6165 
6166 	/* add new state to the head of linked list */
6167 	new = &new_sl->state;
6168 	err = copy_verifier_state(new, cur);
6169 	if (err) {
6170 		free_verifier_state(new, false);
6171 		kfree(new_sl);
6172 		return err;
6173 	}
6174 	new_sl->next = env->explored_states[insn_idx];
6175 	env->explored_states[insn_idx] = new_sl;
6176 	/* connect new state to parentage chain. Current frame needs all
6177 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
6178 	 * to the stack implicitly by JITs) so in callers' frames connect just
6179 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
6180 	 * the state of the call instruction (with WRITTEN set), and r0 comes
6181 	 * from callee with its full parentage chain, anyway.
6182 	 */
6183 	for (j = 0; j <= cur->curframe; j++)
6184 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6185 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
6186 	/* clear write marks in current state: the writes we did are not writes
6187 	 * our child did, so they don't screen off its reads from us.
6188 	 * (There are no read marks in current state, because reads always mark
6189 	 * their parent and current state never has children yet.  Only
6190 	 * explored_states can get read marks.)
6191 	 */
6192 	for (i = 0; i < BPF_REG_FP; i++)
6193 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
6194 
6195 	/* all stack frames are accessible from callee, clear them all */
6196 	for (j = 0; j <= cur->curframe; j++) {
6197 		struct bpf_func_state *frame = cur->frame[j];
6198 		struct bpf_func_state *newframe = new->frame[j];
6199 
6200 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
6201 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
6202 			frame->stack[i].spilled_ptr.parent =
6203 						&newframe->stack[i].spilled_ptr;
6204 		}
6205 	}
6206 	return 0;
6207 }
6208 
6209 /* Return true if it's OK to have the same insn return a different type. */
6210 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6211 {
6212 	switch (type) {
6213 	case PTR_TO_CTX:
6214 	case PTR_TO_SOCKET:
6215 	case PTR_TO_SOCKET_OR_NULL:
6216 	case PTR_TO_SOCK_COMMON:
6217 	case PTR_TO_SOCK_COMMON_OR_NULL:
6218 	case PTR_TO_TCP_SOCK:
6219 	case PTR_TO_TCP_SOCK_OR_NULL:
6220 		return false;
6221 	default:
6222 		return true;
6223 	}
6224 }
6225 
6226 /* If an instruction was previously used with particular pointer types, then we
6227  * need to be careful to avoid cases such as the below, where it may be ok
6228  * for one branch accessing the pointer, but not ok for the other branch:
6229  *
6230  * R1 = sock_ptr
6231  * goto X;
6232  * ...
6233  * R1 = some_other_valid_ptr;
6234  * goto X;
6235  * ...
6236  * R2 = *(u32 *)(R1 + 0);
6237  */
6238 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
6239 {
6240 	return src != prev && (!reg_type_mismatch_ok(src) ||
6241 			       !reg_type_mismatch_ok(prev));
6242 }
6243 
6244 static int do_check(struct bpf_verifier_env *env)
6245 {
6246 	struct bpf_verifier_state *state;
6247 	struct bpf_insn *insns = env->prog->insnsi;
6248 	struct bpf_reg_state *regs;
6249 	int insn_cnt = env->prog->len, i;
6250 	int insn_processed = 0;
6251 	bool do_print_state = false;
6252 
6253 	env->prev_linfo = NULL;
6254 
6255 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
6256 	if (!state)
6257 		return -ENOMEM;
6258 	state->curframe = 0;
6259 	state->speculative = false;
6260 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6261 	if (!state->frame[0]) {
6262 		kfree(state);
6263 		return -ENOMEM;
6264 	}
6265 	env->cur_state = state;
6266 	init_func_state(env, state->frame[0],
6267 			BPF_MAIN_FUNC /* callsite */,
6268 			0 /* frameno */,
6269 			0 /* subprogno, zero == main subprog */);
6270 
6271 	for (;;) {
6272 		struct bpf_insn *insn;
6273 		u8 class;
6274 		int err;
6275 
6276 		if (env->insn_idx >= insn_cnt) {
6277 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
6278 				env->insn_idx, insn_cnt);
6279 			return -EFAULT;
6280 		}
6281 
6282 		insn = &insns[env->insn_idx];
6283 		class = BPF_CLASS(insn->code);
6284 
6285 		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
6286 			verbose(env,
6287 				"BPF program is too large. Processed %d insn\n",
6288 				insn_processed);
6289 			return -E2BIG;
6290 		}
6291 
6292 		err = is_state_visited(env, env->insn_idx);
6293 		if (err < 0)
6294 			return err;
6295 		if (err == 1) {
6296 			/* found equivalent state, can prune the search */
6297 			if (env->log.level) {
6298 				if (do_print_state)
6299 					verbose(env, "\nfrom %d to %d%s: safe\n",
6300 						env->prev_insn_idx, env->insn_idx,
6301 						env->cur_state->speculative ?
6302 						" (speculative execution)" : "");
6303 				else
6304 					verbose(env, "%d: safe\n", env->insn_idx);
6305 			}
6306 			goto process_bpf_exit;
6307 		}
6308 
6309 		if (signal_pending(current))
6310 			return -EAGAIN;
6311 
6312 		if (need_resched())
6313 			cond_resched();
6314 
6315 		if (env->log.level > 1 || (env->log.level && do_print_state)) {
6316 			if (env->log.level > 1)
6317 				verbose(env, "%d:", env->insn_idx);
6318 			else
6319 				verbose(env, "\nfrom %d to %d%s:",
6320 					env->prev_insn_idx, env->insn_idx,
6321 					env->cur_state->speculative ?
6322 					" (speculative execution)" : "");
6323 			print_verifier_state(env, state->frame[state->curframe]);
6324 			do_print_state = false;
6325 		}
6326 
6327 		if (env->log.level) {
6328 			const struct bpf_insn_cbs cbs = {
6329 				.cb_print	= verbose,
6330 				.private_data	= env,
6331 			};
6332 
6333 			verbose_linfo(env, env->insn_idx, "; ");
6334 			verbose(env, "%d: ", env->insn_idx);
6335 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
6336 		}
6337 
6338 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
6339 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
6340 							   env->prev_insn_idx);
6341 			if (err)
6342 				return err;
6343 		}
6344 
6345 		regs = cur_regs(env);
6346 		env->insn_aux_data[env->insn_idx].seen = true;
6347 
6348 		if (class == BPF_ALU || class == BPF_ALU64) {
6349 			err = check_alu_op(env, insn);
6350 			if (err)
6351 				return err;
6352 
6353 		} else if (class == BPF_LDX) {
6354 			enum bpf_reg_type *prev_src_type, src_reg_type;
6355 
6356 			/* check for reserved fields is already done */
6357 
6358 			/* check src operand */
6359 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
6360 			if (err)
6361 				return err;
6362 
6363 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6364 			if (err)
6365 				return err;
6366 
6367 			src_reg_type = regs[insn->src_reg].type;
6368 
6369 			/* check that memory (src_reg + off) is readable,
6370 			 * the state of dst_reg will be updated by this func
6371 			 */
6372 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
6373 					       insn->off, BPF_SIZE(insn->code),
6374 					       BPF_READ, insn->dst_reg, false);
6375 			if (err)
6376 				return err;
6377 
6378 			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
6379 
6380 			if (*prev_src_type == NOT_INIT) {
6381 				/* saw a valid insn
6382 				 * dst_reg = *(u32 *)(src_reg + off)
6383 				 * save type to validate intersecting paths
6384 				 */
6385 				*prev_src_type = src_reg_type;
6386 
6387 			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
6388 				/* ABuser program is trying to use the same insn
6389 				 * dst_reg = *(u32*) (src_reg + off)
6390 				 * with different pointer types:
6391 				 * src_reg == ctx in one branch and
6392 				 * src_reg == stack|map in some other branch.
6393 				 * Reject it.
6394 				 */
6395 				verbose(env, "same insn cannot be used with different pointers\n");
6396 				return -EINVAL;
6397 			}
6398 
6399 		} else if (class == BPF_STX) {
6400 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
6401 
6402 			if (BPF_MODE(insn->code) == BPF_XADD) {
6403 				err = check_xadd(env, env->insn_idx, insn);
6404 				if (err)
6405 					return err;
6406 				env->insn_idx++;
6407 				continue;
6408 			}
6409 
6410 			/* check src1 operand */
6411 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
6412 			if (err)
6413 				return err;
6414 			/* check src2 operand */
6415 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6416 			if (err)
6417 				return err;
6418 
6419 			dst_reg_type = regs[insn->dst_reg].type;
6420 
6421 			/* check that memory (dst_reg + off) is writeable */
6422 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6423 					       insn->off, BPF_SIZE(insn->code),
6424 					       BPF_WRITE, insn->src_reg, false);
6425 			if (err)
6426 				return err;
6427 
6428 			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
6429 
6430 			if (*prev_dst_type == NOT_INIT) {
6431 				*prev_dst_type = dst_reg_type;
6432 			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
6433 				verbose(env, "same insn cannot be used with different pointers\n");
6434 				return -EINVAL;
6435 			}
6436 
6437 		} else if (class == BPF_ST) {
6438 			if (BPF_MODE(insn->code) != BPF_MEM ||
6439 			    insn->src_reg != BPF_REG_0) {
6440 				verbose(env, "BPF_ST uses reserved fields\n");
6441 				return -EINVAL;
6442 			}
6443 			/* check src operand */
6444 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6445 			if (err)
6446 				return err;
6447 
6448 			if (is_ctx_reg(env, insn->dst_reg)) {
6449 				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
6450 					insn->dst_reg,
6451 					reg_type_str[reg_state(env, insn->dst_reg)->type]);
6452 				return -EACCES;
6453 			}
6454 
6455 			/* check that memory (dst_reg + off) is writeable */
6456 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6457 					       insn->off, BPF_SIZE(insn->code),
6458 					       BPF_WRITE, -1, false);
6459 			if (err)
6460 				return err;
6461 
6462 		} else if (class == BPF_JMP || class == BPF_JMP32) {
6463 			u8 opcode = BPF_OP(insn->code);
6464 
6465 			if (opcode == BPF_CALL) {
6466 				if (BPF_SRC(insn->code) != BPF_K ||
6467 				    insn->off != 0 ||
6468 				    (insn->src_reg != BPF_REG_0 &&
6469 				     insn->src_reg != BPF_PSEUDO_CALL) ||
6470 				    insn->dst_reg != BPF_REG_0 ||
6471 				    class == BPF_JMP32) {
6472 					verbose(env, "BPF_CALL uses reserved fields\n");
6473 					return -EINVAL;
6474 				}
6475 
6476 				if (env->cur_state->active_spin_lock &&
6477 				    (insn->src_reg == BPF_PSEUDO_CALL ||
6478 				     insn->imm != BPF_FUNC_spin_unlock)) {
6479 					verbose(env, "function calls are not allowed while holding a lock\n");
6480 					return -EINVAL;
6481 				}
6482 				if (insn->src_reg == BPF_PSEUDO_CALL)
6483 					err = check_func_call(env, insn, &env->insn_idx);
6484 				else
6485 					err = check_helper_call(env, insn->imm, env->insn_idx);
6486 				if (err)
6487 					return err;
6488 
6489 			} else if (opcode == BPF_JA) {
6490 				if (BPF_SRC(insn->code) != BPF_K ||
6491 				    insn->imm != 0 ||
6492 				    insn->src_reg != BPF_REG_0 ||
6493 				    insn->dst_reg != BPF_REG_0 ||
6494 				    class == BPF_JMP32) {
6495 					verbose(env, "BPF_JA uses reserved fields\n");
6496 					return -EINVAL;
6497 				}
6498 
6499 				env->insn_idx += insn->off + 1;
6500 				continue;
6501 
6502 			} else if (opcode == BPF_EXIT) {
6503 				if (BPF_SRC(insn->code) != BPF_K ||
6504 				    insn->imm != 0 ||
6505 				    insn->src_reg != BPF_REG_0 ||
6506 				    insn->dst_reg != BPF_REG_0 ||
6507 				    class == BPF_JMP32) {
6508 					verbose(env, "BPF_EXIT uses reserved fields\n");
6509 					return -EINVAL;
6510 				}
6511 
6512 				if (env->cur_state->active_spin_lock) {
6513 					verbose(env, "bpf_spin_unlock is missing\n");
6514 					return -EINVAL;
6515 				}
6516 
6517 				if (state->curframe) {
6518 					/* exit from nested function */
6519 					env->prev_insn_idx = env->insn_idx;
6520 					err = prepare_func_exit(env, &env->insn_idx);
6521 					if (err)
6522 						return err;
6523 					do_print_state = true;
6524 					continue;
6525 				}
6526 
6527 				err = check_reference_leak(env);
6528 				if (err)
6529 					return err;
6530 
6531 				/* eBPF calling convetion is such that R0 is used
6532 				 * to return the value from eBPF program.
6533 				 * Make sure that it's readable at this time
6534 				 * of bpf_exit, which means that program wrote
6535 				 * something into it earlier
6536 				 */
6537 				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
6538 				if (err)
6539 					return err;
6540 
6541 				if (is_pointer_value(env, BPF_REG_0)) {
6542 					verbose(env, "R0 leaks addr as return value\n");
6543 					return -EACCES;
6544 				}
6545 
6546 				err = check_return_code(env);
6547 				if (err)
6548 					return err;
6549 process_bpf_exit:
6550 				err = pop_stack(env, &env->prev_insn_idx,
6551 						&env->insn_idx);
6552 				if (err < 0) {
6553 					if (err != -ENOENT)
6554 						return err;
6555 					break;
6556 				} else {
6557 					do_print_state = true;
6558 					continue;
6559 				}
6560 			} else {
6561 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
6562 				if (err)
6563 					return err;
6564 			}
6565 		} else if (class == BPF_LD) {
6566 			u8 mode = BPF_MODE(insn->code);
6567 
6568 			if (mode == BPF_ABS || mode == BPF_IND) {
6569 				err = check_ld_abs(env, insn);
6570 				if (err)
6571 					return err;
6572 
6573 			} else if (mode == BPF_IMM) {
6574 				err = check_ld_imm(env, insn);
6575 				if (err)
6576 					return err;
6577 
6578 				env->insn_idx++;
6579 				env->insn_aux_data[env->insn_idx].seen = true;
6580 			} else {
6581 				verbose(env, "invalid BPF_LD mode\n");
6582 				return -EINVAL;
6583 			}
6584 		} else {
6585 			verbose(env, "unknown insn class %d\n", class);
6586 			return -EINVAL;
6587 		}
6588 
6589 		env->insn_idx++;
6590 	}
6591 
6592 	verbose(env, "processed %d insns (limit %d), stack depth ",
6593 		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
6594 	for (i = 0; i < env->subprog_cnt; i++) {
6595 		u32 depth = env->subprog_info[i].stack_depth;
6596 
6597 		verbose(env, "%d", depth);
6598 		if (i + 1 < env->subprog_cnt)
6599 			verbose(env, "+");
6600 	}
6601 	verbose(env, "\n");
6602 	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
6603 	return 0;
6604 }
6605 
6606 static int check_map_prealloc(struct bpf_map *map)
6607 {
6608 	return (map->map_type != BPF_MAP_TYPE_HASH &&
6609 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6610 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
6611 		!(map->map_flags & BPF_F_NO_PREALLOC);
6612 }
6613 
6614 static bool is_tracing_prog_type(enum bpf_prog_type type)
6615 {
6616 	switch (type) {
6617 	case BPF_PROG_TYPE_KPROBE:
6618 	case BPF_PROG_TYPE_TRACEPOINT:
6619 	case BPF_PROG_TYPE_PERF_EVENT:
6620 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
6621 		return true;
6622 	default:
6623 		return false;
6624 	}
6625 }
6626 
6627 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6628 					struct bpf_map *map,
6629 					struct bpf_prog *prog)
6630 
6631 {
6632 	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
6633 	 * preallocated hash maps, since doing memory allocation
6634 	 * in overflow_handler can crash depending on where nmi got
6635 	 * triggered.
6636 	 */
6637 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
6638 		if (!check_map_prealloc(map)) {
6639 			verbose(env, "perf_event programs can only use preallocated hash map\n");
6640 			return -EINVAL;
6641 		}
6642 		if (map->inner_map_meta &&
6643 		    !check_map_prealloc(map->inner_map_meta)) {
6644 			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
6645 			return -EINVAL;
6646 		}
6647 	}
6648 
6649 	if ((is_tracing_prog_type(prog->type) ||
6650 	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6651 	    map_value_has_spin_lock(map)) {
6652 		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6653 		return -EINVAL;
6654 	}
6655 
6656 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
6657 	    !bpf_offload_prog_map_match(prog, map)) {
6658 		verbose(env, "offload device mismatch between prog and map\n");
6659 		return -EINVAL;
6660 	}
6661 
6662 	return 0;
6663 }
6664 
6665 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
6666 {
6667 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
6668 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
6669 }
6670 
6671 /* look for pseudo eBPF instructions that access map FDs and
6672  * replace them with actual map pointers
6673  */
6674 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6675 {
6676 	struct bpf_insn *insn = env->prog->insnsi;
6677 	int insn_cnt = env->prog->len;
6678 	int i, j, err;
6679 
6680 	err = bpf_prog_calc_tag(env->prog);
6681 	if (err)
6682 		return err;
6683 
6684 	for (i = 0; i < insn_cnt; i++, insn++) {
6685 		if (BPF_CLASS(insn->code) == BPF_LDX &&
6686 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
6687 			verbose(env, "BPF_LDX uses reserved fields\n");
6688 			return -EINVAL;
6689 		}
6690 
6691 		if (BPF_CLASS(insn->code) == BPF_STX &&
6692 		    ((BPF_MODE(insn->code) != BPF_MEM &&
6693 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
6694 			verbose(env, "BPF_STX uses reserved fields\n");
6695 			return -EINVAL;
6696 		}
6697 
6698 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6699 			struct bpf_map *map;
6700 			struct fd f;
6701 
6702 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
6703 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
6704 			    insn[1].off != 0) {
6705 				verbose(env, "invalid bpf_ld_imm64 insn\n");
6706 				return -EINVAL;
6707 			}
6708 
6709 			if (insn->src_reg == 0)
6710 				/* valid generic load 64-bit imm */
6711 				goto next_insn;
6712 
6713 			if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
6714 			    insn[1].imm != 0) {
6715 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
6716 				return -EINVAL;
6717 			}
6718 
6719 			f = fdget(insn[0].imm);
6720 			map = __bpf_map_get(f);
6721 			if (IS_ERR(map)) {
6722 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
6723 					insn[0].imm);
6724 				return PTR_ERR(map);
6725 			}
6726 
6727 			err = check_map_prog_compatibility(env, map, env->prog);
6728 			if (err) {
6729 				fdput(f);
6730 				return err;
6731 			}
6732 
6733 			/* store map pointer inside BPF_LD_IMM64 instruction */
6734 			insn[0].imm = (u32) (unsigned long) map;
6735 			insn[1].imm = ((u64) (unsigned long) map) >> 32;
6736 
6737 			/* check whether we recorded this map already */
6738 			for (j = 0; j < env->used_map_cnt; j++)
6739 				if (env->used_maps[j] == map) {
6740 					fdput(f);
6741 					goto next_insn;
6742 				}
6743 
6744 			if (env->used_map_cnt >= MAX_USED_MAPS) {
6745 				fdput(f);
6746 				return -E2BIG;
6747 			}
6748 
6749 			/* hold the map. If the program is rejected by verifier,
6750 			 * the map will be released by release_maps() or it
6751 			 * will be used by the valid program until it's unloaded
6752 			 * and all maps are released in free_used_maps()
6753 			 */
6754 			map = bpf_map_inc(map, false);
6755 			if (IS_ERR(map)) {
6756 				fdput(f);
6757 				return PTR_ERR(map);
6758 			}
6759 			env->used_maps[env->used_map_cnt++] = map;
6760 
6761 			if (bpf_map_is_cgroup_storage(map) &&
6762 			    bpf_cgroup_storage_assign(env->prog, map)) {
6763 				verbose(env, "only one cgroup storage of each type is allowed\n");
6764 				fdput(f);
6765 				return -EBUSY;
6766 			}
6767 
6768 			fdput(f);
6769 next_insn:
6770 			insn++;
6771 			i++;
6772 			continue;
6773 		}
6774 
6775 		/* Basic sanity check before we invest more work here. */
6776 		if (!bpf_opcode_in_insntable(insn->code)) {
6777 			verbose(env, "unknown opcode %02x\n", insn->code);
6778 			return -EINVAL;
6779 		}
6780 	}
6781 
6782 	/* now all pseudo BPF_LD_IMM64 instructions load valid
6783 	 * 'struct bpf_map *' into a register instead of user map_fd.
6784 	 * These pointers will be used later by verifier to validate map access.
6785 	 */
6786 	return 0;
6787 }
6788 
6789 /* drop refcnt of maps used by the rejected program */
6790 static void release_maps(struct bpf_verifier_env *env)
6791 {
6792 	enum bpf_cgroup_storage_type stype;
6793 	int i;
6794 
6795 	for_each_cgroup_storage_type(stype) {
6796 		if (!env->prog->aux->cgroup_storage[stype])
6797 			continue;
6798 		bpf_cgroup_storage_release(env->prog,
6799 			env->prog->aux->cgroup_storage[stype]);
6800 	}
6801 
6802 	for (i = 0; i < env->used_map_cnt; i++)
6803 		bpf_map_put(env->used_maps[i]);
6804 }
6805 
6806 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
6807 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
6808 {
6809 	struct bpf_insn *insn = env->prog->insnsi;
6810 	int insn_cnt = env->prog->len;
6811 	int i;
6812 
6813 	for (i = 0; i < insn_cnt; i++, insn++)
6814 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
6815 			insn->src_reg = 0;
6816 }
6817 
6818 /* single env->prog->insni[off] instruction was replaced with the range
6819  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
6820  * [0, off) and [off, end) to new locations, so the patched range stays zero
6821  */
6822 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
6823 				u32 off, u32 cnt)
6824 {
6825 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
6826 	int i;
6827 
6828 	if (cnt == 1)
6829 		return 0;
6830 	new_data = vzalloc(array_size(prog_len,
6831 				      sizeof(struct bpf_insn_aux_data)));
6832 	if (!new_data)
6833 		return -ENOMEM;
6834 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
6835 	memcpy(new_data + off + cnt - 1, old_data + off,
6836 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
6837 	for (i = off; i < off + cnt - 1; i++)
6838 		new_data[i].seen = true;
6839 	env->insn_aux_data = new_data;
6840 	vfree(old_data);
6841 	return 0;
6842 }
6843 
6844 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
6845 {
6846 	int i;
6847 
6848 	if (len == 1)
6849 		return;
6850 	/* NOTE: fake 'exit' subprog should be updated as well. */
6851 	for (i = 0; i <= env->subprog_cnt; i++) {
6852 		if (env->subprog_info[i].start <= off)
6853 			continue;
6854 		env->subprog_info[i].start += len - 1;
6855 	}
6856 }
6857 
6858 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
6859 					    const struct bpf_insn *patch, u32 len)
6860 {
6861 	struct bpf_prog *new_prog;
6862 
6863 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6864 	if (!new_prog)
6865 		return NULL;
6866 	if (adjust_insn_aux_data(env, new_prog->len, off, len))
6867 		return NULL;
6868 	adjust_subprog_starts(env, off, len);
6869 	return new_prog;
6870 }
6871 
6872 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6873 					      u32 off, u32 cnt)
6874 {
6875 	int i, j;
6876 
6877 	/* find first prog starting at or after off (first to remove) */
6878 	for (i = 0; i < env->subprog_cnt; i++)
6879 		if (env->subprog_info[i].start >= off)
6880 			break;
6881 	/* find first prog starting at or after off + cnt (first to stay) */
6882 	for (j = i; j < env->subprog_cnt; j++)
6883 		if (env->subprog_info[j].start >= off + cnt)
6884 			break;
6885 	/* if j doesn't start exactly at off + cnt, we are just removing
6886 	 * the front of previous prog
6887 	 */
6888 	if (env->subprog_info[j].start != off + cnt)
6889 		j--;
6890 
6891 	if (j > i) {
6892 		struct bpf_prog_aux *aux = env->prog->aux;
6893 		int move;
6894 
6895 		/* move fake 'exit' subprog as well */
6896 		move = env->subprog_cnt + 1 - j;
6897 
6898 		memmove(env->subprog_info + i,
6899 			env->subprog_info + j,
6900 			sizeof(*env->subprog_info) * move);
6901 		env->subprog_cnt -= j - i;
6902 
6903 		/* remove func_info */
6904 		if (aux->func_info) {
6905 			move = aux->func_info_cnt - j;
6906 
6907 			memmove(aux->func_info + i,
6908 				aux->func_info + j,
6909 				sizeof(*aux->func_info) * move);
6910 			aux->func_info_cnt -= j - i;
6911 			/* func_info->insn_off is set after all code rewrites,
6912 			 * in adjust_btf_func() - no need to adjust
6913 			 */
6914 		}
6915 	} else {
6916 		/* convert i from "first prog to remove" to "first to adjust" */
6917 		if (env->subprog_info[i].start == off)
6918 			i++;
6919 	}
6920 
6921 	/* update fake 'exit' subprog as well */
6922 	for (; i <= env->subprog_cnt; i++)
6923 		env->subprog_info[i].start -= cnt;
6924 
6925 	return 0;
6926 }
6927 
6928 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6929 				      u32 cnt)
6930 {
6931 	struct bpf_prog *prog = env->prog;
6932 	u32 i, l_off, l_cnt, nr_linfo;
6933 	struct bpf_line_info *linfo;
6934 
6935 	nr_linfo = prog->aux->nr_linfo;
6936 	if (!nr_linfo)
6937 		return 0;
6938 
6939 	linfo = prog->aux->linfo;
6940 
6941 	/* find first line info to remove, count lines to be removed */
6942 	for (i = 0; i < nr_linfo; i++)
6943 		if (linfo[i].insn_off >= off)
6944 			break;
6945 
6946 	l_off = i;
6947 	l_cnt = 0;
6948 	for (; i < nr_linfo; i++)
6949 		if (linfo[i].insn_off < off + cnt)
6950 			l_cnt++;
6951 		else
6952 			break;
6953 
6954 	/* First live insn doesn't match first live linfo, it needs to "inherit"
6955 	 * last removed linfo.  prog is already modified, so prog->len == off
6956 	 * means no live instructions after (tail of the program was removed).
6957 	 */
6958 	if (prog->len != off && l_cnt &&
6959 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6960 		l_cnt--;
6961 		linfo[--i].insn_off = off + cnt;
6962 	}
6963 
6964 	/* remove the line info which refer to the removed instructions */
6965 	if (l_cnt) {
6966 		memmove(linfo + l_off, linfo + i,
6967 			sizeof(*linfo) * (nr_linfo - i));
6968 
6969 		prog->aux->nr_linfo -= l_cnt;
6970 		nr_linfo = prog->aux->nr_linfo;
6971 	}
6972 
6973 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
6974 	for (i = l_off; i < nr_linfo; i++)
6975 		linfo[i].insn_off -= cnt;
6976 
6977 	/* fix up all subprogs (incl. 'exit') which start >= off */
6978 	for (i = 0; i <= env->subprog_cnt; i++)
6979 		if (env->subprog_info[i].linfo_idx > l_off) {
6980 			/* program may have started in the removed region but
6981 			 * may not be fully removed
6982 			 */
6983 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6984 				env->subprog_info[i].linfo_idx -= l_cnt;
6985 			else
6986 				env->subprog_info[i].linfo_idx = l_off;
6987 		}
6988 
6989 	return 0;
6990 }
6991 
6992 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6993 {
6994 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6995 	unsigned int orig_prog_len = env->prog->len;
6996 	int err;
6997 
6998 	if (bpf_prog_is_dev_bound(env->prog->aux))
6999 		bpf_prog_offload_remove_insns(env, off, cnt);
7000 
7001 	err = bpf_remove_insns(env->prog, off, cnt);
7002 	if (err)
7003 		return err;
7004 
7005 	err = adjust_subprog_starts_after_remove(env, off, cnt);
7006 	if (err)
7007 		return err;
7008 
7009 	err = bpf_adj_linfo_after_remove(env, off, cnt);
7010 	if (err)
7011 		return err;
7012 
7013 	memmove(aux_data + off,	aux_data + off + cnt,
7014 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
7015 
7016 	return 0;
7017 }
7018 
7019 /* The verifier does more data flow analysis than llvm and will not
7020  * explore branches that are dead at run time. Malicious programs can
7021  * have dead code too. Therefore replace all dead at-run-time code
7022  * with 'ja -1'.
7023  *
7024  * Just nops are not optimal, e.g. if they would sit at the end of the
7025  * program and through another bug we would manage to jump there, then
7026  * we'd execute beyond program memory otherwise. Returning exception
7027  * code also wouldn't work since we can have subprogs where the dead
7028  * code could be located.
7029  */
7030 static void sanitize_dead_code(struct bpf_verifier_env *env)
7031 {
7032 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7033 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
7034 	struct bpf_insn *insn = env->prog->insnsi;
7035 	const int insn_cnt = env->prog->len;
7036 	int i;
7037 
7038 	for (i = 0; i < insn_cnt; i++) {
7039 		if (aux_data[i].seen)
7040 			continue;
7041 		memcpy(insn + i, &trap, sizeof(trap));
7042 	}
7043 }
7044 
7045 static bool insn_is_cond_jump(u8 code)
7046 {
7047 	u8 op;
7048 
7049 	if (BPF_CLASS(code) == BPF_JMP32)
7050 		return true;
7051 
7052 	if (BPF_CLASS(code) != BPF_JMP)
7053 		return false;
7054 
7055 	op = BPF_OP(code);
7056 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
7057 }
7058 
7059 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
7060 {
7061 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7062 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7063 	struct bpf_insn *insn = env->prog->insnsi;
7064 	const int insn_cnt = env->prog->len;
7065 	int i;
7066 
7067 	for (i = 0; i < insn_cnt; i++, insn++) {
7068 		if (!insn_is_cond_jump(insn->code))
7069 			continue;
7070 
7071 		if (!aux_data[i + 1].seen)
7072 			ja.off = insn->off;
7073 		else if (!aux_data[i + 1 + insn->off].seen)
7074 			ja.off = 0;
7075 		else
7076 			continue;
7077 
7078 		if (bpf_prog_is_dev_bound(env->prog->aux))
7079 			bpf_prog_offload_replace_insn(env, i, &ja);
7080 
7081 		memcpy(insn, &ja, sizeof(ja));
7082 	}
7083 }
7084 
7085 static int opt_remove_dead_code(struct bpf_verifier_env *env)
7086 {
7087 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7088 	int insn_cnt = env->prog->len;
7089 	int i, err;
7090 
7091 	for (i = 0; i < insn_cnt; i++) {
7092 		int j;
7093 
7094 		j = 0;
7095 		while (i + j < insn_cnt && !aux_data[i + j].seen)
7096 			j++;
7097 		if (!j)
7098 			continue;
7099 
7100 		err = verifier_remove_insns(env, i, j);
7101 		if (err)
7102 			return err;
7103 		insn_cnt = env->prog->len;
7104 	}
7105 
7106 	return 0;
7107 }
7108 
7109 static int opt_remove_nops(struct bpf_verifier_env *env)
7110 {
7111 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7112 	struct bpf_insn *insn = env->prog->insnsi;
7113 	int insn_cnt = env->prog->len;
7114 	int i, err;
7115 
7116 	for (i = 0; i < insn_cnt; i++) {
7117 		if (memcmp(&insn[i], &ja, sizeof(ja)))
7118 			continue;
7119 
7120 		err = verifier_remove_insns(env, i, 1);
7121 		if (err)
7122 			return err;
7123 		insn_cnt--;
7124 		i--;
7125 	}
7126 
7127 	return 0;
7128 }
7129 
7130 /* convert load instructions that access fields of a context type into a
7131  * sequence of instructions that access fields of the underlying structure:
7132  *     struct __sk_buff    -> struct sk_buff
7133  *     struct bpf_sock_ops -> struct sock
7134  */
7135 static int convert_ctx_accesses(struct bpf_verifier_env *env)
7136 {
7137 	const struct bpf_verifier_ops *ops = env->ops;
7138 	int i, cnt, size, ctx_field_size, delta = 0;
7139 	const int insn_cnt = env->prog->len;
7140 	struct bpf_insn insn_buf[16], *insn;
7141 	u32 target_size, size_default, off;
7142 	struct bpf_prog *new_prog;
7143 	enum bpf_access_type type;
7144 	bool is_narrower_load;
7145 
7146 	if (ops->gen_prologue || env->seen_direct_write) {
7147 		if (!ops->gen_prologue) {
7148 			verbose(env, "bpf verifier is misconfigured\n");
7149 			return -EINVAL;
7150 		}
7151 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
7152 					env->prog);
7153 		if (cnt >= ARRAY_SIZE(insn_buf)) {
7154 			verbose(env, "bpf verifier is misconfigured\n");
7155 			return -EINVAL;
7156 		} else if (cnt) {
7157 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
7158 			if (!new_prog)
7159 				return -ENOMEM;
7160 
7161 			env->prog = new_prog;
7162 			delta += cnt - 1;
7163 		}
7164 	}
7165 
7166 	if (bpf_prog_is_dev_bound(env->prog->aux))
7167 		return 0;
7168 
7169 	insn = env->prog->insnsi + delta;
7170 
7171 	for (i = 0; i < insn_cnt; i++, insn++) {
7172 		bpf_convert_ctx_access_t convert_ctx_access;
7173 
7174 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
7175 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
7176 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
7177 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
7178 			type = BPF_READ;
7179 		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
7180 			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
7181 			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
7182 			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
7183 			type = BPF_WRITE;
7184 		else
7185 			continue;
7186 
7187 		if (type == BPF_WRITE &&
7188 		    env->insn_aux_data[i + delta].sanitize_stack_off) {
7189 			struct bpf_insn patch[] = {
7190 				/* Sanitize suspicious stack slot with zero.
7191 				 * There are no memory dependencies for this store,
7192 				 * since it's only using frame pointer and immediate
7193 				 * constant of zero
7194 				 */
7195 				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
7196 					   env->insn_aux_data[i + delta].sanitize_stack_off,
7197 					   0),
7198 				/* the original STX instruction will immediately
7199 				 * overwrite the same stack slot with appropriate value
7200 				 */
7201 				*insn,
7202 			};
7203 
7204 			cnt = ARRAY_SIZE(patch);
7205 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
7206 			if (!new_prog)
7207 				return -ENOMEM;
7208 
7209 			delta    += cnt - 1;
7210 			env->prog = new_prog;
7211 			insn      = new_prog->insnsi + i + delta;
7212 			continue;
7213 		}
7214 
7215 		switch (env->insn_aux_data[i + delta].ptr_type) {
7216 		case PTR_TO_CTX:
7217 			if (!ops->convert_ctx_access)
7218 				continue;
7219 			convert_ctx_access = ops->convert_ctx_access;
7220 			break;
7221 		case PTR_TO_SOCKET:
7222 		case PTR_TO_SOCK_COMMON:
7223 			convert_ctx_access = bpf_sock_convert_ctx_access;
7224 			break;
7225 		case PTR_TO_TCP_SOCK:
7226 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7227 			break;
7228 		default:
7229 			continue;
7230 		}
7231 
7232 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
7233 		size = BPF_LDST_BYTES(insn);
7234 
7235 		/* If the read access is a narrower load of the field,
7236 		 * convert to a 4/8-byte load, to minimum program type specific
7237 		 * convert_ctx_access changes. If conversion is successful,
7238 		 * we will apply proper mask to the result.
7239 		 */
7240 		is_narrower_load = size < ctx_field_size;
7241 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
7242 		off = insn->off;
7243 		if (is_narrower_load) {
7244 			u8 size_code;
7245 
7246 			if (type == BPF_WRITE) {
7247 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
7248 				return -EINVAL;
7249 			}
7250 
7251 			size_code = BPF_H;
7252 			if (ctx_field_size == 4)
7253 				size_code = BPF_W;
7254 			else if (ctx_field_size == 8)
7255 				size_code = BPF_DW;
7256 
7257 			insn->off = off & ~(size_default - 1);
7258 			insn->code = BPF_LDX | BPF_MEM | size_code;
7259 		}
7260 
7261 		target_size = 0;
7262 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
7263 					 &target_size);
7264 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
7265 		    (ctx_field_size && !target_size)) {
7266 			verbose(env, "bpf verifier is misconfigured\n");
7267 			return -EINVAL;
7268 		}
7269 
7270 		if (is_narrower_load && size < target_size) {
7271 			u8 shift = (off & (size_default - 1)) * 8;
7272 
7273 			if (ctx_field_size <= 4) {
7274 				if (shift)
7275 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
7276 									insn->dst_reg,
7277 									shift);
7278 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
7279 								(1 << size * 8) - 1);
7280 			} else {
7281 				if (shift)
7282 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
7283 									insn->dst_reg,
7284 									shift);
7285 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
7286 								(1 << size * 8) - 1);
7287 			}
7288 		}
7289 
7290 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7291 		if (!new_prog)
7292 			return -ENOMEM;
7293 
7294 		delta += cnt - 1;
7295 
7296 		/* keep walking new program and skip insns we just inserted */
7297 		env->prog = new_prog;
7298 		insn      = new_prog->insnsi + i + delta;
7299 	}
7300 
7301 	return 0;
7302 }
7303 
7304 static int jit_subprogs(struct bpf_verifier_env *env)
7305 {
7306 	struct bpf_prog *prog = env->prog, **func, *tmp;
7307 	int i, j, subprog_start, subprog_end = 0, len, subprog;
7308 	struct bpf_insn *insn;
7309 	void *old_bpf_func;
7310 	int err;
7311 
7312 	if (env->subprog_cnt <= 1)
7313 		return 0;
7314 
7315 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7316 		if (insn->code != (BPF_JMP | BPF_CALL) ||
7317 		    insn->src_reg != BPF_PSEUDO_CALL)
7318 			continue;
7319 		/* Upon error here we cannot fall back to interpreter but
7320 		 * need a hard reject of the program. Thus -EFAULT is
7321 		 * propagated in any case.
7322 		 */
7323 		subprog = find_subprog(env, i + insn->imm + 1);
7324 		if (subprog < 0) {
7325 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7326 				  i + insn->imm + 1);
7327 			return -EFAULT;
7328 		}
7329 		/* temporarily remember subprog id inside insn instead of
7330 		 * aux_data, since next loop will split up all insns into funcs
7331 		 */
7332 		insn->off = subprog;
7333 		/* remember original imm in case JIT fails and fallback
7334 		 * to interpreter will be needed
7335 		 */
7336 		env->insn_aux_data[i].call_imm = insn->imm;
7337 		/* point imm to __bpf_call_base+1 from JITs point of view */
7338 		insn->imm = 1;
7339 	}
7340 
7341 	err = bpf_prog_alloc_jited_linfo(prog);
7342 	if (err)
7343 		goto out_undo_insn;
7344 
7345 	err = -ENOMEM;
7346 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
7347 	if (!func)
7348 		goto out_undo_insn;
7349 
7350 	for (i = 0; i < env->subprog_cnt; i++) {
7351 		subprog_start = subprog_end;
7352 		subprog_end = env->subprog_info[i + 1].start;
7353 
7354 		len = subprog_end - subprog_start;
7355 		/* BPF_PROG_RUN doesn't call subprogs directly,
7356 		 * hence main prog stats include the runtime of subprogs.
7357 		 * subprogs don't have IDs and not reachable via prog_get_next_id
7358 		 * func[i]->aux->stats will never be accessed and stays NULL
7359 		 */
7360 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
7361 		if (!func[i])
7362 			goto out_free;
7363 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
7364 		       len * sizeof(struct bpf_insn));
7365 		func[i]->type = prog->type;
7366 		func[i]->len = len;
7367 		if (bpf_prog_calc_tag(func[i]))
7368 			goto out_free;
7369 		func[i]->is_func = 1;
7370 		func[i]->aux->func_idx = i;
7371 		/* the btf and func_info will be freed only at prog->aux */
7372 		func[i]->aux->btf = prog->aux->btf;
7373 		func[i]->aux->func_info = prog->aux->func_info;
7374 
7375 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
7376 		 * Long term would need debug info to populate names
7377 		 */
7378 		func[i]->aux->name[0] = 'F';
7379 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
7380 		func[i]->jit_requested = 1;
7381 		func[i]->aux->linfo = prog->aux->linfo;
7382 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
7383 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
7384 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
7385 		func[i] = bpf_int_jit_compile(func[i]);
7386 		if (!func[i]->jited) {
7387 			err = -ENOTSUPP;
7388 			goto out_free;
7389 		}
7390 		cond_resched();
7391 	}
7392 	/* at this point all bpf functions were successfully JITed
7393 	 * now populate all bpf_calls with correct addresses and
7394 	 * run last pass of JIT
7395 	 */
7396 	for (i = 0; i < env->subprog_cnt; i++) {
7397 		insn = func[i]->insnsi;
7398 		for (j = 0; j < func[i]->len; j++, insn++) {
7399 			if (insn->code != (BPF_JMP | BPF_CALL) ||
7400 			    insn->src_reg != BPF_PSEUDO_CALL)
7401 				continue;
7402 			subprog = insn->off;
7403 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
7404 				func[subprog]->bpf_func -
7405 				__bpf_call_base;
7406 		}
7407 
7408 		/* we use the aux data to keep a list of the start addresses
7409 		 * of the JITed images for each function in the program
7410 		 *
7411 		 * for some architectures, such as powerpc64, the imm field
7412 		 * might not be large enough to hold the offset of the start
7413 		 * address of the callee's JITed image from __bpf_call_base
7414 		 *
7415 		 * in such cases, we can lookup the start address of a callee
7416 		 * by using its subprog id, available from the off field of
7417 		 * the call instruction, as an index for this list
7418 		 */
7419 		func[i]->aux->func = func;
7420 		func[i]->aux->func_cnt = env->subprog_cnt;
7421 	}
7422 	for (i = 0; i < env->subprog_cnt; i++) {
7423 		old_bpf_func = func[i]->bpf_func;
7424 		tmp = bpf_int_jit_compile(func[i]);
7425 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
7426 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
7427 			err = -ENOTSUPP;
7428 			goto out_free;
7429 		}
7430 		cond_resched();
7431 	}
7432 
7433 	/* finally lock prog and jit images for all functions and
7434 	 * populate kallsysm
7435 	 */
7436 	for (i = 0; i < env->subprog_cnt; i++) {
7437 		bpf_prog_lock_ro(func[i]);
7438 		bpf_prog_kallsyms_add(func[i]);
7439 	}
7440 
7441 	/* Last step: make now unused interpreter insns from main
7442 	 * prog consistent for later dump requests, so they can
7443 	 * later look the same as if they were interpreted only.
7444 	 */
7445 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7446 		if (insn->code != (BPF_JMP | BPF_CALL) ||
7447 		    insn->src_reg != BPF_PSEUDO_CALL)
7448 			continue;
7449 		insn->off = env->insn_aux_data[i].call_imm;
7450 		subprog = find_subprog(env, i + insn->off + 1);
7451 		insn->imm = subprog;
7452 	}
7453 
7454 	prog->jited = 1;
7455 	prog->bpf_func = func[0]->bpf_func;
7456 	prog->aux->func = func;
7457 	prog->aux->func_cnt = env->subprog_cnt;
7458 	bpf_prog_free_unused_jited_linfo(prog);
7459 	return 0;
7460 out_free:
7461 	for (i = 0; i < env->subprog_cnt; i++)
7462 		if (func[i])
7463 			bpf_jit_free(func[i]);
7464 	kfree(func);
7465 out_undo_insn:
7466 	/* cleanup main prog to be interpreted */
7467 	prog->jit_requested = 0;
7468 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7469 		if (insn->code != (BPF_JMP | BPF_CALL) ||
7470 		    insn->src_reg != BPF_PSEUDO_CALL)
7471 			continue;
7472 		insn->off = 0;
7473 		insn->imm = env->insn_aux_data[i].call_imm;
7474 	}
7475 	bpf_prog_free_jited_linfo(prog);
7476 	return err;
7477 }
7478 
7479 static int fixup_call_args(struct bpf_verifier_env *env)
7480 {
7481 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
7482 	struct bpf_prog *prog = env->prog;
7483 	struct bpf_insn *insn = prog->insnsi;
7484 	int i, depth;
7485 #endif
7486 	int err = 0;
7487 
7488 	if (env->prog->jit_requested &&
7489 	    !bpf_prog_is_dev_bound(env->prog->aux)) {
7490 		err = jit_subprogs(env);
7491 		if (err == 0)
7492 			return 0;
7493 		if (err == -EFAULT)
7494 			return err;
7495 	}
7496 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
7497 	for (i = 0; i < prog->len; i++, insn++) {
7498 		if (insn->code != (BPF_JMP | BPF_CALL) ||
7499 		    insn->src_reg != BPF_PSEUDO_CALL)
7500 			continue;
7501 		depth = get_callee_stack_depth(env, insn, i);
7502 		if (depth < 0)
7503 			return depth;
7504 		bpf_patch_call_args(insn, depth);
7505 	}
7506 	err = 0;
7507 #endif
7508 	return err;
7509 }
7510 
7511 /* fixup insn->imm field of bpf_call instructions
7512  * and inline eligible helpers as explicit sequence of BPF instructions
7513  *
7514  * this function is called after eBPF program passed verification
7515  */
7516 static int fixup_bpf_calls(struct bpf_verifier_env *env)
7517 {
7518 	struct bpf_prog *prog = env->prog;
7519 	struct bpf_insn *insn = prog->insnsi;
7520 	const struct bpf_func_proto *fn;
7521 	const int insn_cnt = prog->len;
7522 	const struct bpf_map_ops *ops;
7523 	struct bpf_insn_aux_data *aux;
7524 	struct bpf_insn insn_buf[16];
7525 	struct bpf_prog *new_prog;
7526 	struct bpf_map *map_ptr;
7527 	int i, cnt, delta = 0;
7528 
7529 	for (i = 0; i < insn_cnt; i++, insn++) {
7530 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
7531 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7532 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
7533 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7534 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
7535 			struct bpf_insn mask_and_div[] = {
7536 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7537 				/* Rx div 0 -> 0 */
7538 				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
7539 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
7540 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
7541 				*insn,
7542 			};
7543 			struct bpf_insn mask_and_mod[] = {
7544 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7545 				/* Rx mod 0 -> Rx */
7546 				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
7547 				*insn,
7548 			};
7549 			struct bpf_insn *patchlet;
7550 
7551 			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7552 			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7553 				patchlet = mask_and_div + (is64 ? 1 : 0);
7554 				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
7555 			} else {
7556 				patchlet = mask_and_mod + (is64 ? 1 : 0);
7557 				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
7558 			}
7559 
7560 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
7561 			if (!new_prog)
7562 				return -ENOMEM;
7563 
7564 			delta    += cnt - 1;
7565 			env->prog = prog = new_prog;
7566 			insn      = new_prog->insnsi + i + delta;
7567 			continue;
7568 		}
7569 
7570 		if (BPF_CLASS(insn->code) == BPF_LD &&
7571 		    (BPF_MODE(insn->code) == BPF_ABS ||
7572 		     BPF_MODE(insn->code) == BPF_IND)) {
7573 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
7574 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7575 				verbose(env, "bpf verifier is misconfigured\n");
7576 				return -EINVAL;
7577 			}
7578 
7579 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7580 			if (!new_prog)
7581 				return -ENOMEM;
7582 
7583 			delta    += cnt - 1;
7584 			env->prog = prog = new_prog;
7585 			insn      = new_prog->insnsi + i + delta;
7586 			continue;
7587 		}
7588 
7589 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
7590 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
7591 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
7592 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
7593 			struct bpf_insn insn_buf[16];
7594 			struct bpf_insn *patch = &insn_buf[0];
7595 			bool issrc, isneg;
7596 			u32 off_reg;
7597 
7598 			aux = &env->insn_aux_data[i + delta];
7599 			if (!aux->alu_state ||
7600 			    aux->alu_state == BPF_ALU_NON_POINTER)
7601 				continue;
7602 
7603 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
7604 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
7605 				BPF_ALU_SANITIZE_SRC;
7606 
7607 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
7608 			if (isneg)
7609 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7610 			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
7611 			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
7612 			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
7613 			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
7614 			*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
7615 			if (issrc) {
7616 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
7617 							 off_reg);
7618 				insn->src_reg = BPF_REG_AX;
7619 			} else {
7620 				*patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
7621 							 BPF_REG_AX);
7622 			}
7623 			if (isneg)
7624 				insn->code = insn->code == code_add ?
7625 					     code_sub : code_add;
7626 			*patch++ = *insn;
7627 			if (issrc && isneg)
7628 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7629 			cnt = patch - insn_buf;
7630 
7631 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7632 			if (!new_prog)
7633 				return -ENOMEM;
7634 
7635 			delta    += cnt - 1;
7636 			env->prog = prog = new_prog;
7637 			insn      = new_prog->insnsi + i + delta;
7638 			continue;
7639 		}
7640 
7641 		if (insn->code != (BPF_JMP | BPF_CALL))
7642 			continue;
7643 		if (insn->src_reg == BPF_PSEUDO_CALL)
7644 			continue;
7645 
7646 		if (insn->imm == BPF_FUNC_get_route_realm)
7647 			prog->dst_needed = 1;
7648 		if (insn->imm == BPF_FUNC_get_prandom_u32)
7649 			bpf_user_rnd_init_once();
7650 		if (insn->imm == BPF_FUNC_override_return)
7651 			prog->kprobe_override = 1;
7652 		if (insn->imm == BPF_FUNC_tail_call) {
7653 			/* If we tail call into other programs, we
7654 			 * cannot make any assumptions since they can
7655 			 * be replaced dynamically during runtime in
7656 			 * the program array.
7657 			 */
7658 			prog->cb_access = 1;
7659 			env->prog->aux->stack_depth = MAX_BPF_STACK;
7660 			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7661 
7662 			/* mark bpf_tail_call as different opcode to avoid
7663 			 * conditional branch in the interpeter for every normal
7664 			 * call and to prevent accidental JITing by JIT compiler
7665 			 * that doesn't support bpf_tail_call yet
7666 			 */
7667 			insn->imm = 0;
7668 			insn->code = BPF_JMP | BPF_TAIL_CALL;
7669 
7670 			aux = &env->insn_aux_data[i + delta];
7671 			if (!bpf_map_ptr_unpriv(aux))
7672 				continue;
7673 
7674 			/* instead of changing every JIT dealing with tail_call
7675 			 * emit two extra insns:
7676 			 * if (index >= max_entries) goto out;
7677 			 * index &= array->index_mask;
7678 			 * to avoid out-of-bounds cpu speculation
7679 			 */
7680 			if (bpf_map_ptr_poisoned(aux)) {
7681 				verbose(env, "tail_call abusing map_ptr\n");
7682 				return -EINVAL;
7683 			}
7684 
7685 			map_ptr = BPF_MAP_PTR(aux->map_state);
7686 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
7687 						  map_ptr->max_entries, 2);
7688 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
7689 						    container_of(map_ptr,
7690 								 struct bpf_array,
7691 								 map)->index_mask);
7692 			insn_buf[2] = *insn;
7693 			cnt = 3;
7694 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7695 			if (!new_prog)
7696 				return -ENOMEM;
7697 
7698 			delta    += cnt - 1;
7699 			env->prog = prog = new_prog;
7700 			insn      = new_prog->insnsi + i + delta;
7701 			continue;
7702 		}
7703 
7704 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
7705 		 * and other inlining handlers are currently limited to 64 bit
7706 		 * only.
7707 		 */
7708 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
7709 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
7710 		     insn->imm == BPF_FUNC_map_update_elem ||
7711 		     insn->imm == BPF_FUNC_map_delete_elem ||
7712 		     insn->imm == BPF_FUNC_map_push_elem   ||
7713 		     insn->imm == BPF_FUNC_map_pop_elem    ||
7714 		     insn->imm == BPF_FUNC_map_peek_elem)) {
7715 			aux = &env->insn_aux_data[i + delta];
7716 			if (bpf_map_ptr_poisoned(aux))
7717 				goto patch_call_imm;
7718 
7719 			map_ptr = BPF_MAP_PTR(aux->map_state);
7720 			ops = map_ptr->ops;
7721 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
7722 			    ops->map_gen_lookup) {
7723 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
7724 				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7725 					verbose(env, "bpf verifier is misconfigured\n");
7726 					return -EINVAL;
7727 				}
7728 
7729 				new_prog = bpf_patch_insn_data(env, i + delta,
7730 							       insn_buf, cnt);
7731 				if (!new_prog)
7732 					return -ENOMEM;
7733 
7734 				delta    += cnt - 1;
7735 				env->prog = prog = new_prog;
7736 				insn      = new_prog->insnsi + i + delta;
7737 				continue;
7738 			}
7739 
7740 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
7741 				     (void *(*)(struct bpf_map *map, void *key))NULL));
7742 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
7743 				     (int (*)(struct bpf_map *map, void *key))NULL));
7744 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
7745 				     (int (*)(struct bpf_map *map, void *key, void *value,
7746 					      u64 flags))NULL));
7747 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
7748 				     (int (*)(struct bpf_map *map, void *value,
7749 					      u64 flags))NULL));
7750 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
7751 				     (int (*)(struct bpf_map *map, void *value))NULL));
7752 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
7753 				     (int (*)(struct bpf_map *map, void *value))NULL));
7754 
7755 			switch (insn->imm) {
7756 			case BPF_FUNC_map_lookup_elem:
7757 				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
7758 					    __bpf_call_base;
7759 				continue;
7760 			case BPF_FUNC_map_update_elem:
7761 				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
7762 					    __bpf_call_base;
7763 				continue;
7764 			case BPF_FUNC_map_delete_elem:
7765 				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
7766 					    __bpf_call_base;
7767 				continue;
7768 			case BPF_FUNC_map_push_elem:
7769 				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
7770 					    __bpf_call_base;
7771 				continue;
7772 			case BPF_FUNC_map_pop_elem:
7773 				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
7774 					    __bpf_call_base;
7775 				continue;
7776 			case BPF_FUNC_map_peek_elem:
7777 				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
7778 					    __bpf_call_base;
7779 				continue;
7780 			}
7781 
7782 			goto patch_call_imm;
7783 		}
7784 
7785 patch_call_imm:
7786 		fn = env->ops->get_func_proto(insn->imm, env->prog);
7787 		/* all functions that have prototype and verifier allowed
7788 		 * programs to call them, must be real in-kernel functions
7789 		 */
7790 		if (!fn->func) {
7791 			verbose(env,
7792 				"kernel subsystem misconfigured func %s#%d\n",
7793 				func_id_name(insn->imm), insn->imm);
7794 			return -EFAULT;
7795 		}
7796 		insn->imm = fn->func - __bpf_call_base;
7797 	}
7798 
7799 	return 0;
7800 }
7801 
7802 static void free_states(struct bpf_verifier_env *env)
7803 {
7804 	struct bpf_verifier_state_list *sl, *sln;
7805 	int i;
7806 
7807 	if (!env->explored_states)
7808 		return;
7809 
7810 	for (i = 0; i < env->prog->len; i++) {
7811 		sl = env->explored_states[i];
7812 
7813 		if (sl)
7814 			while (sl != STATE_LIST_MARK) {
7815 				sln = sl->next;
7816 				free_verifier_state(&sl->state, false);
7817 				kfree(sl);
7818 				sl = sln;
7819 			}
7820 	}
7821 
7822 	kfree(env->explored_states);
7823 }
7824 
7825 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7826 	      union bpf_attr __user *uattr)
7827 {
7828 	struct bpf_verifier_env *env;
7829 	struct bpf_verifier_log *log;
7830 	int i, len, ret = -EINVAL;
7831 	bool is_priv;
7832 
7833 	/* no program is valid */
7834 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
7835 		return -EINVAL;
7836 
7837 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
7838 	 * allocate/free it every time bpf_check() is called
7839 	 */
7840 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
7841 	if (!env)
7842 		return -ENOMEM;
7843 	log = &env->log;
7844 
7845 	len = (*prog)->len;
7846 	env->insn_aux_data =
7847 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
7848 	ret = -ENOMEM;
7849 	if (!env->insn_aux_data)
7850 		goto err_free_env;
7851 	for (i = 0; i < len; i++)
7852 		env->insn_aux_data[i].orig_idx = i;
7853 	env->prog = *prog;
7854 	env->ops = bpf_verifier_ops[env->prog->type];
7855 
7856 	/* grab the mutex to protect few globals used by verifier */
7857 	mutex_lock(&bpf_verifier_lock);
7858 
7859 	if (attr->log_level || attr->log_buf || attr->log_size) {
7860 		/* user requested verbose verifier output
7861 		 * and supplied buffer to store the verification trace
7862 		 */
7863 		log->level = attr->log_level;
7864 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
7865 		log->len_total = attr->log_size;
7866 
7867 		ret = -EINVAL;
7868 		/* log attributes have to be sane */
7869 		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
7870 		    !log->level || !log->ubuf)
7871 			goto err_unlock;
7872 	}
7873 
7874 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
7875 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
7876 		env->strict_alignment = true;
7877 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7878 		env->strict_alignment = false;
7879 
7880 	is_priv = capable(CAP_SYS_ADMIN);
7881 	env->allow_ptr_leaks = is_priv;
7882 
7883 	ret = replace_map_fd_with_map_ptr(env);
7884 	if (ret < 0)
7885 		goto skip_full_check;
7886 
7887 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
7888 		ret = bpf_prog_offload_verifier_prep(env->prog);
7889 		if (ret)
7890 			goto skip_full_check;
7891 	}
7892 
7893 	env->explored_states = kcalloc(env->prog->len,
7894 				       sizeof(struct bpf_verifier_state_list *),
7895 				       GFP_USER);
7896 	ret = -ENOMEM;
7897 	if (!env->explored_states)
7898 		goto skip_full_check;
7899 
7900 	ret = check_subprogs(env);
7901 	if (ret < 0)
7902 		goto skip_full_check;
7903 
7904 	ret = check_btf_info(env, attr, uattr);
7905 	if (ret < 0)
7906 		goto skip_full_check;
7907 
7908 	ret = check_cfg(env);
7909 	if (ret < 0)
7910 		goto skip_full_check;
7911 
7912 	ret = do_check(env);
7913 	if (env->cur_state) {
7914 		free_verifier_state(env->cur_state, true);
7915 		env->cur_state = NULL;
7916 	}
7917 
7918 	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
7919 		ret = bpf_prog_offload_finalize(env);
7920 
7921 skip_full_check:
7922 	while (!pop_stack(env, NULL, NULL));
7923 	free_states(env);
7924 
7925 	if (ret == 0)
7926 		ret = check_max_stack_depth(env);
7927 
7928 	/* instruction rewrites happen after this point */
7929 	if (is_priv) {
7930 		if (ret == 0)
7931 			opt_hard_wire_dead_code_branches(env);
7932 		if (ret == 0)
7933 			ret = opt_remove_dead_code(env);
7934 		if (ret == 0)
7935 			ret = opt_remove_nops(env);
7936 	} else {
7937 		if (ret == 0)
7938 			sanitize_dead_code(env);
7939 	}
7940 
7941 	if (ret == 0)
7942 		/* program is valid, convert *(u32*)(ctx + off) accesses */
7943 		ret = convert_ctx_accesses(env);
7944 
7945 	if (ret == 0)
7946 		ret = fixup_bpf_calls(env);
7947 
7948 	if (ret == 0)
7949 		ret = fixup_call_args(env);
7950 
7951 	if (log->level && bpf_verifier_log_full(log))
7952 		ret = -ENOSPC;
7953 	if (log->level && !log->ubuf) {
7954 		ret = -EFAULT;
7955 		goto err_release_maps;
7956 	}
7957 
7958 	if (ret == 0 && env->used_map_cnt) {
7959 		/* if program passed verifier, update used_maps in bpf_prog_info */
7960 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
7961 							  sizeof(env->used_maps[0]),
7962 							  GFP_KERNEL);
7963 
7964 		if (!env->prog->aux->used_maps) {
7965 			ret = -ENOMEM;
7966 			goto err_release_maps;
7967 		}
7968 
7969 		memcpy(env->prog->aux->used_maps, env->used_maps,
7970 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
7971 		env->prog->aux->used_map_cnt = env->used_map_cnt;
7972 
7973 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
7974 		 * bpf_ld_imm64 instructions
7975 		 */
7976 		convert_pseudo_ld_imm64(env);
7977 	}
7978 
7979 	if (ret == 0)
7980 		adjust_btf_func(env);
7981 
7982 err_release_maps:
7983 	if (!env->prog->aux->used_maps)
7984 		/* if we didn't copy map pointers into bpf_prog_info, release
7985 		 * them now. Otherwise free_used_maps() will release them.
7986 		 */
7987 		release_maps(env);
7988 	*prog = env->prog;
7989 err_unlock:
7990 	mutex_unlock(&bpf_verifier_lock);
7991 	vfree(env->insn_aux_data);
7992 err_free_env:
7993 	kfree(env);
7994 	return ret;
7995 }
7996