xref: /openbmc/linux/kernel/bpf/verifier.c (revision e33bbe69149b802c0c77bfb822685772f85388ca)
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  * Copyright (c) 2016 Facebook
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/kernel.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/bpf.h>
17 #include <linux/bpf_verifier.h>
18 #include <linux/filter.h>
19 #include <net/netlink.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <linux/stringify.h>
23 #include <linux/bsearch.h>
24 #include <linux/sort.h>
25 
26 #include "disasm.h"
27 
28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name) \
30 	[_id] = & _name ## _verifier_ops,
31 #define BPF_MAP_TYPE(_id, _ops)
32 #include <linux/bpf_types.h>
33 #undef BPF_PROG_TYPE
34 #undef BPF_MAP_TYPE
35 };
36 
37 /* bpf_check() is a static code analyzer that walks eBPF program
38  * instruction by instruction and updates register/stack state.
39  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
40  *
41  * The first pass is depth-first-search to check that the program is a DAG.
42  * It rejects the following programs:
43  * - larger than BPF_MAXINSNS insns
44  * - if loop is present (detected via back-edge)
45  * - unreachable insns exist (shouldn't be a forest. program = one function)
46  * - out of bounds or malformed jumps
47  * The second pass is all possible path descent from the 1st insn.
48  * Since it's analyzing all pathes through the program, the length of the
49  * analysis is limited to 64k insn, which may be hit even if total number of
50  * insn is less then 4K, but there are too many branches that change stack/regs.
51  * Number of 'branches to be analyzed' is limited to 1k
52  *
53  * On entry to each instruction, each register has a type, and the instruction
54  * changes the types of the registers depending on instruction semantics.
55  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
56  * copied to R1.
57  *
58  * All registers are 64-bit.
59  * R0 - return register
60  * R1-R5 argument passing registers
61  * R6-R9 callee saved registers
62  * R10 - frame pointer read-only
63  *
64  * At the start of BPF program the register R1 contains a pointer to bpf_context
65  * and has type PTR_TO_CTX.
66  *
67  * Verifier tracks arithmetic operations on pointers in case:
68  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
69  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
70  * 1st insn copies R10 (which has FRAME_PTR) type into R1
71  * and 2nd arithmetic instruction is pattern matched to recognize
72  * that it wants to construct a pointer to some element within stack.
73  * So after 2nd insn, the register R1 has type PTR_TO_STACK
74  * (and -20 constant is saved for further stack bounds checking).
75  * Meaning that this reg is a pointer to stack plus known immediate constant.
76  *
77  * Most of the time the registers have SCALAR_VALUE type, which
78  * means the register has some value, but it's not a valid pointer.
79  * (like pointer plus pointer becomes SCALAR_VALUE type)
80  *
81  * When verifier sees load or store instructions the type of base register
82  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
83  * types recognized by check_mem_access() function.
84  *
85  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
86  * and the range of [ptr, ptr + map's value_size) is accessible.
87  *
88  * registers used to pass values to function calls are checked against
89  * function argument constraints.
90  *
91  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
92  * It means that the register type passed to this function must be
93  * PTR_TO_STACK and it will be used inside the function as
94  * 'pointer to map element key'
95  *
96  * For example the argument constraints for bpf_map_lookup_elem():
97  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
98  *   .arg1_type = ARG_CONST_MAP_PTR,
99  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
100  *
101  * ret_type says that this function returns 'pointer to map elem value or null'
102  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
103  * 2nd argument should be a pointer to stack, which will be used inside
104  * the helper function as a pointer to map element key.
105  *
106  * On the kernel side the helper function looks like:
107  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
108  * {
109  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
110  *    void *key = (void *) (unsigned long) r2;
111  *    void *value;
112  *
113  *    here kernel can access 'key' and 'map' pointers safely, knowing that
114  *    [key, key + map->key_size) bytes are valid and were initialized on
115  *    the stack of eBPF program.
116  * }
117  *
118  * Corresponding eBPF program may look like:
119  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
120  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
121  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
122  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
123  * here verifier looks at prototype of map_lookup_elem() and sees:
124  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
125  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
126  *
127  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
128  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
129  * and were initialized prior to this call.
130  * If it's ok, then verifier allows this BPF_CALL insn and looks at
131  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
132  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
133  * returns ether pointer to map value or NULL.
134  *
135  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
136  * insn, the register holding that pointer in the true branch changes state to
137  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
138  * branch. See check_cond_jmp_op().
139  *
140  * After the call R0 is set to return type of the function and registers R1-R5
141  * are set to NOT_INIT to indicate that they are no longer readable.
142  */
143 
144 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
145 struct bpf_verifier_stack_elem {
146 	/* verifer state is 'st'
147 	 * before processing instruction 'insn_idx'
148 	 * and after processing instruction 'prev_insn_idx'
149 	 */
150 	struct bpf_verifier_state st;
151 	int insn_idx;
152 	int prev_insn_idx;
153 	struct bpf_verifier_stack_elem *next;
154 };
155 
156 #define BPF_COMPLEXITY_LIMIT_INSNS	131072
157 #define BPF_COMPLEXITY_LIMIT_STACK	1024
158 
159 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
160 
161 struct bpf_call_arg_meta {
162 	struct bpf_map *map_ptr;
163 	bool raw_mode;
164 	bool pkt_access;
165 	int regno;
166 	int access_size;
167 };
168 
169 static DEFINE_MUTEX(bpf_verifier_lock);
170 
171 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
172 		       va_list args)
173 {
174 	unsigned int n;
175 
176 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
177 
178 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
179 		  "verifier log line truncated - local buffer too short\n");
180 
181 	n = min(log->len_total - log->len_used - 1, n);
182 	log->kbuf[n] = '\0';
183 
184 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
185 		log->len_used += n;
186 	else
187 		log->ubuf = NULL;
188 }
189 
190 /* log_level controls verbosity level of eBPF verifier.
191  * bpf_verifier_log_write() is used to dump the verification trace to the log,
192  * so the user can figure out what's wrong with the program
193  */
194 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
195 					   const char *fmt, ...)
196 {
197 	va_list args;
198 
199 	if (!bpf_verifier_log_needed(&env->log))
200 		return;
201 
202 	va_start(args, fmt);
203 	bpf_verifier_vlog(&env->log, fmt, args);
204 	va_end(args);
205 }
206 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
207 
208 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
209 {
210 	struct bpf_verifier_env *env = private_data;
211 	va_list args;
212 
213 	if (!bpf_verifier_log_needed(&env->log))
214 		return;
215 
216 	va_start(args, fmt);
217 	bpf_verifier_vlog(&env->log, fmt, args);
218 	va_end(args);
219 }
220 
221 static bool type_is_pkt_pointer(enum bpf_reg_type type)
222 {
223 	return type == PTR_TO_PACKET ||
224 	       type == PTR_TO_PACKET_META;
225 }
226 
227 /* string representation of 'enum bpf_reg_type' */
228 static const char * const reg_type_str[] = {
229 	[NOT_INIT]		= "?",
230 	[SCALAR_VALUE]		= "inv",
231 	[PTR_TO_CTX]		= "ctx",
232 	[CONST_PTR_TO_MAP]	= "map_ptr",
233 	[PTR_TO_MAP_VALUE]	= "map_value",
234 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
235 	[PTR_TO_STACK]		= "fp",
236 	[PTR_TO_PACKET]		= "pkt",
237 	[PTR_TO_PACKET_META]	= "pkt_meta",
238 	[PTR_TO_PACKET_END]	= "pkt_end",
239 };
240 
241 static void print_liveness(struct bpf_verifier_env *env,
242 			   enum bpf_reg_liveness live)
243 {
244 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
245 	    verbose(env, "_");
246 	if (live & REG_LIVE_READ)
247 		verbose(env, "r");
248 	if (live & REG_LIVE_WRITTEN)
249 		verbose(env, "w");
250 }
251 
252 static struct bpf_func_state *func(struct bpf_verifier_env *env,
253 				   const struct bpf_reg_state *reg)
254 {
255 	struct bpf_verifier_state *cur = env->cur_state;
256 
257 	return cur->frame[reg->frameno];
258 }
259 
260 static void print_verifier_state(struct bpf_verifier_env *env,
261 				 const struct bpf_func_state *state)
262 {
263 	const struct bpf_reg_state *reg;
264 	enum bpf_reg_type t;
265 	int i;
266 
267 	if (state->frameno)
268 		verbose(env, " frame%d:", state->frameno);
269 	for (i = 0; i < MAX_BPF_REG; i++) {
270 		reg = &state->regs[i];
271 		t = reg->type;
272 		if (t == NOT_INIT)
273 			continue;
274 		verbose(env, " R%d", i);
275 		print_liveness(env, reg->live);
276 		verbose(env, "=%s", reg_type_str[t]);
277 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
278 		    tnum_is_const(reg->var_off)) {
279 			/* reg->off should be 0 for SCALAR_VALUE */
280 			verbose(env, "%lld", reg->var_off.value + reg->off);
281 			if (t == PTR_TO_STACK)
282 				verbose(env, ",call_%d", func(env, reg)->callsite);
283 		} else {
284 			verbose(env, "(id=%d", reg->id);
285 			if (t != SCALAR_VALUE)
286 				verbose(env, ",off=%d", reg->off);
287 			if (type_is_pkt_pointer(t))
288 				verbose(env, ",r=%d", reg->range);
289 			else if (t == CONST_PTR_TO_MAP ||
290 				 t == PTR_TO_MAP_VALUE ||
291 				 t == PTR_TO_MAP_VALUE_OR_NULL)
292 				verbose(env, ",ks=%d,vs=%d",
293 					reg->map_ptr->key_size,
294 					reg->map_ptr->value_size);
295 			if (tnum_is_const(reg->var_off)) {
296 				/* Typically an immediate SCALAR_VALUE, but
297 				 * could be a pointer whose offset is too big
298 				 * for reg->off
299 				 */
300 				verbose(env, ",imm=%llx", reg->var_off.value);
301 			} else {
302 				if (reg->smin_value != reg->umin_value &&
303 				    reg->smin_value != S64_MIN)
304 					verbose(env, ",smin_value=%lld",
305 						(long long)reg->smin_value);
306 				if (reg->smax_value != reg->umax_value &&
307 				    reg->smax_value != S64_MAX)
308 					verbose(env, ",smax_value=%lld",
309 						(long long)reg->smax_value);
310 				if (reg->umin_value != 0)
311 					verbose(env, ",umin_value=%llu",
312 						(unsigned long long)reg->umin_value);
313 				if (reg->umax_value != U64_MAX)
314 					verbose(env, ",umax_value=%llu",
315 						(unsigned long long)reg->umax_value);
316 				if (!tnum_is_unknown(reg->var_off)) {
317 					char tn_buf[48];
318 
319 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
320 					verbose(env, ",var_off=%s", tn_buf);
321 				}
322 			}
323 			verbose(env, ")");
324 		}
325 	}
326 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
327 		if (state->stack[i].slot_type[0] == STACK_SPILL) {
328 			verbose(env, " fp%d",
329 				(-i - 1) * BPF_REG_SIZE);
330 			print_liveness(env, state->stack[i].spilled_ptr.live);
331 			verbose(env, "=%s",
332 				reg_type_str[state->stack[i].spilled_ptr.type]);
333 		}
334 		if (state->stack[i].slot_type[0] == STACK_ZERO)
335 			verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
336 	}
337 	verbose(env, "\n");
338 }
339 
340 static int copy_stack_state(struct bpf_func_state *dst,
341 			    const struct bpf_func_state *src)
342 {
343 	if (!src->stack)
344 		return 0;
345 	if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
346 		/* internal bug, make state invalid to reject the program */
347 		memset(dst, 0, sizeof(*dst));
348 		return -EFAULT;
349 	}
350 	memcpy(dst->stack, src->stack,
351 	       sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
352 	return 0;
353 }
354 
355 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
356  * make it consume minimal amount of memory. check_stack_write() access from
357  * the program calls into realloc_func_state() to grow the stack size.
358  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
359  * which this function copies over. It points to previous bpf_verifier_state
360  * which is never reallocated
361  */
362 static int realloc_func_state(struct bpf_func_state *state, int size,
363 			      bool copy_old)
364 {
365 	u32 old_size = state->allocated_stack;
366 	struct bpf_stack_state *new_stack;
367 	int slot = size / BPF_REG_SIZE;
368 
369 	if (size <= old_size || !size) {
370 		if (copy_old)
371 			return 0;
372 		state->allocated_stack = slot * BPF_REG_SIZE;
373 		if (!size && old_size) {
374 			kfree(state->stack);
375 			state->stack = NULL;
376 		}
377 		return 0;
378 	}
379 	new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
380 				  GFP_KERNEL);
381 	if (!new_stack)
382 		return -ENOMEM;
383 	if (copy_old) {
384 		if (state->stack)
385 			memcpy(new_stack, state->stack,
386 			       sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
387 		memset(new_stack + old_size / BPF_REG_SIZE, 0,
388 		       sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
389 	}
390 	state->allocated_stack = slot * BPF_REG_SIZE;
391 	kfree(state->stack);
392 	state->stack = new_stack;
393 	return 0;
394 }
395 
396 static void free_func_state(struct bpf_func_state *state)
397 {
398 	if (!state)
399 		return;
400 	kfree(state->stack);
401 	kfree(state);
402 }
403 
404 static void free_verifier_state(struct bpf_verifier_state *state,
405 				bool free_self)
406 {
407 	int i;
408 
409 	for (i = 0; i <= state->curframe; i++) {
410 		free_func_state(state->frame[i]);
411 		state->frame[i] = NULL;
412 	}
413 	if (free_self)
414 		kfree(state);
415 }
416 
417 /* copy verifier state from src to dst growing dst stack space
418  * when necessary to accommodate larger src stack
419  */
420 static int copy_func_state(struct bpf_func_state *dst,
421 			   const struct bpf_func_state *src)
422 {
423 	int err;
424 
425 	err = realloc_func_state(dst, src->allocated_stack, false);
426 	if (err)
427 		return err;
428 	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
429 	return copy_stack_state(dst, src);
430 }
431 
432 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
433 			       const struct bpf_verifier_state *src)
434 {
435 	struct bpf_func_state *dst;
436 	int i, err;
437 
438 	/* if dst has more stack frames then src frame, free them */
439 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
440 		free_func_state(dst_state->frame[i]);
441 		dst_state->frame[i] = NULL;
442 	}
443 	dst_state->curframe = src->curframe;
444 	dst_state->parent = src->parent;
445 	for (i = 0; i <= src->curframe; i++) {
446 		dst = dst_state->frame[i];
447 		if (!dst) {
448 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
449 			if (!dst)
450 				return -ENOMEM;
451 			dst_state->frame[i] = dst;
452 		}
453 		err = copy_func_state(dst, src->frame[i]);
454 		if (err)
455 			return err;
456 	}
457 	return 0;
458 }
459 
460 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
461 		     int *insn_idx)
462 {
463 	struct bpf_verifier_state *cur = env->cur_state;
464 	struct bpf_verifier_stack_elem *elem, *head = env->head;
465 	int err;
466 
467 	if (env->head == NULL)
468 		return -ENOENT;
469 
470 	if (cur) {
471 		err = copy_verifier_state(cur, &head->st);
472 		if (err)
473 			return err;
474 	}
475 	if (insn_idx)
476 		*insn_idx = head->insn_idx;
477 	if (prev_insn_idx)
478 		*prev_insn_idx = head->prev_insn_idx;
479 	elem = head->next;
480 	free_verifier_state(&head->st, false);
481 	kfree(head);
482 	env->head = elem;
483 	env->stack_size--;
484 	return 0;
485 }
486 
487 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
488 					     int insn_idx, int prev_insn_idx)
489 {
490 	struct bpf_verifier_state *cur = env->cur_state;
491 	struct bpf_verifier_stack_elem *elem;
492 	int err;
493 
494 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
495 	if (!elem)
496 		goto err;
497 
498 	elem->insn_idx = insn_idx;
499 	elem->prev_insn_idx = prev_insn_idx;
500 	elem->next = env->head;
501 	env->head = elem;
502 	env->stack_size++;
503 	err = copy_verifier_state(&elem->st, cur);
504 	if (err)
505 		goto err;
506 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
507 		verbose(env, "BPF program is too complex\n");
508 		goto err;
509 	}
510 	return &elem->st;
511 err:
512 	free_verifier_state(env->cur_state, true);
513 	env->cur_state = NULL;
514 	/* pop all elements and return */
515 	while (!pop_stack(env, NULL, NULL));
516 	return NULL;
517 }
518 
519 #define CALLER_SAVED_REGS 6
520 static const int caller_saved[CALLER_SAVED_REGS] = {
521 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
522 };
523 
524 static void __mark_reg_not_init(struct bpf_reg_state *reg);
525 
526 /* Mark the unknown part of a register (variable offset or scalar value) as
527  * known to have the value @imm.
528  */
529 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
530 {
531 	reg->id = 0;
532 	reg->var_off = tnum_const(imm);
533 	reg->smin_value = (s64)imm;
534 	reg->smax_value = (s64)imm;
535 	reg->umin_value = imm;
536 	reg->umax_value = imm;
537 }
538 
539 /* Mark the 'variable offset' part of a register as zero.  This should be
540  * used only on registers holding a pointer type.
541  */
542 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
543 {
544 	__mark_reg_known(reg, 0);
545 }
546 
547 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
548 {
549 	__mark_reg_known(reg, 0);
550 	reg->off = 0;
551 	reg->type = SCALAR_VALUE;
552 }
553 
554 static void mark_reg_known_zero(struct bpf_verifier_env *env,
555 				struct bpf_reg_state *regs, u32 regno)
556 {
557 	if (WARN_ON(regno >= MAX_BPF_REG)) {
558 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
559 		/* Something bad happened, let's kill all regs */
560 		for (regno = 0; regno < MAX_BPF_REG; regno++)
561 			__mark_reg_not_init(regs + regno);
562 		return;
563 	}
564 	__mark_reg_known_zero(regs + regno);
565 }
566 
567 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
568 {
569 	return type_is_pkt_pointer(reg->type);
570 }
571 
572 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
573 {
574 	return reg_is_pkt_pointer(reg) ||
575 	       reg->type == PTR_TO_PACKET_END;
576 }
577 
578 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
579 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
580 				    enum bpf_reg_type which)
581 {
582 	/* The register can already have a range from prior markings.
583 	 * This is fine as long as it hasn't been advanced from its
584 	 * origin.
585 	 */
586 	return reg->type == which &&
587 	       reg->id == 0 &&
588 	       reg->off == 0 &&
589 	       tnum_equals_const(reg->var_off, 0);
590 }
591 
592 /* Attempts to improve min/max values based on var_off information */
593 static void __update_reg_bounds(struct bpf_reg_state *reg)
594 {
595 	/* min signed is max(sign bit) | min(other bits) */
596 	reg->smin_value = max_t(s64, reg->smin_value,
597 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
598 	/* max signed is min(sign bit) | max(other bits) */
599 	reg->smax_value = min_t(s64, reg->smax_value,
600 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
601 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
602 	reg->umax_value = min(reg->umax_value,
603 			      reg->var_off.value | reg->var_off.mask);
604 }
605 
606 /* Uses signed min/max values to inform unsigned, and vice-versa */
607 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
608 {
609 	/* Learn sign from signed bounds.
610 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
611 	 * are the same, so combine.  This works even in the negative case, e.g.
612 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
613 	 */
614 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
615 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
616 							  reg->umin_value);
617 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
618 							  reg->umax_value);
619 		return;
620 	}
621 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
622 	 * boundary, so we must be careful.
623 	 */
624 	if ((s64)reg->umax_value >= 0) {
625 		/* Positive.  We can't learn anything from the smin, but smax
626 		 * is positive, hence safe.
627 		 */
628 		reg->smin_value = reg->umin_value;
629 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
630 							  reg->umax_value);
631 	} else if ((s64)reg->umin_value < 0) {
632 		/* Negative.  We can't learn anything from the smax, but smin
633 		 * is negative, hence safe.
634 		 */
635 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
636 							  reg->umin_value);
637 		reg->smax_value = reg->umax_value;
638 	}
639 }
640 
641 /* Attempts to improve var_off based on unsigned min/max information */
642 static void __reg_bound_offset(struct bpf_reg_state *reg)
643 {
644 	reg->var_off = tnum_intersect(reg->var_off,
645 				      tnum_range(reg->umin_value,
646 						 reg->umax_value));
647 }
648 
649 /* Reset the min/max bounds of a register */
650 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
651 {
652 	reg->smin_value = S64_MIN;
653 	reg->smax_value = S64_MAX;
654 	reg->umin_value = 0;
655 	reg->umax_value = U64_MAX;
656 }
657 
658 /* Mark a register as having a completely unknown (scalar) value. */
659 static void __mark_reg_unknown(struct bpf_reg_state *reg)
660 {
661 	reg->type = SCALAR_VALUE;
662 	reg->id = 0;
663 	reg->off = 0;
664 	reg->var_off = tnum_unknown;
665 	reg->frameno = 0;
666 	__mark_reg_unbounded(reg);
667 }
668 
669 static void mark_reg_unknown(struct bpf_verifier_env *env,
670 			     struct bpf_reg_state *regs, u32 regno)
671 {
672 	if (WARN_ON(regno >= MAX_BPF_REG)) {
673 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
674 		/* Something bad happened, let's kill all regs except FP */
675 		for (regno = 0; regno < BPF_REG_FP; regno++)
676 			__mark_reg_not_init(regs + regno);
677 		return;
678 	}
679 	__mark_reg_unknown(regs + regno);
680 }
681 
682 static void __mark_reg_not_init(struct bpf_reg_state *reg)
683 {
684 	__mark_reg_unknown(reg);
685 	reg->type = NOT_INIT;
686 }
687 
688 static void mark_reg_not_init(struct bpf_verifier_env *env,
689 			      struct bpf_reg_state *regs, u32 regno)
690 {
691 	if (WARN_ON(regno >= MAX_BPF_REG)) {
692 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
693 		/* Something bad happened, let's kill all regs except FP */
694 		for (regno = 0; regno < BPF_REG_FP; regno++)
695 			__mark_reg_not_init(regs + regno);
696 		return;
697 	}
698 	__mark_reg_not_init(regs + regno);
699 }
700 
701 static void init_reg_state(struct bpf_verifier_env *env,
702 			   struct bpf_func_state *state)
703 {
704 	struct bpf_reg_state *regs = state->regs;
705 	int i;
706 
707 	for (i = 0; i < MAX_BPF_REG; i++) {
708 		mark_reg_not_init(env, regs, i);
709 		regs[i].live = REG_LIVE_NONE;
710 	}
711 
712 	/* frame pointer */
713 	regs[BPF_REG_FP].type = PTR_TO_STACK;
714 	mark_reg_known_zero(env, regs, BPF_REG_FP);
715 	regs[BPF_REG_FP].frameno = state->frameno;
716 
717 	/* 1st arg to a function */
718 	regs[BPF_REG_1].type = PTR_TO_CTX;
719 	mark_reg_known_zero(env, regs, BPF_REG_1);
720 }
721 
722 #define BPF_MAIN_FUNC (-1)
723 static void init_func_state(struct bpf_verifier_env *env,
724 			    struct bpf_func_state *state,
725 			    int callsite, int frameno, int subprogno)
726 {
727 	state->callsite = callsite;
728 	state->frameno = frameno;
729 	state->subprogno = subprogno;
730 	init_reg_state(env, state);
731 }
732 
733 enum reg_arg_type {
734 	SRC_OP,		/* register is used as source operand */
735 	DST_OP,		/* register is used as destination operand */
736 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
737 };
738 
739 static int cmp_subprogs(const void *a, const void *b)
740 {
741 	return *(int *)a - *(int *)b;
742 }
743 
744 static int find_subprog(struct bpf_verifier_env *env, int off)
745 {
746 	u32 *p;
747 
748 	p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
749 		    sizeof(env->subprog_starts[0]), cmp_subprogs);
750 	if (!p)
751 		return -ENOENT;
752 	return p - env->subprog_starts;
753 
754 }
755 
756 static int add_subprog(struct bpf_verifier_env *env, int off)
757 {
758 	int insn_cnt = env->prog->len;
759 	int ret;
760 
761 	if (off >= insn_cnt || off < 0) {
762 		verbose(env, "call to invalid destination\n");
763 		return -EINVAL;
764 	}
765 	ret = find_subprog(env, off);
766 	if (ret >= 0)
767 		return 0;
768 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
769 		verbose(env, "too many subprograms\n");
770 		return -E2BIG;
771 	}
772 	env->subprog_starts[env->subprog_cnt++] = off;
773 	sort(env->subprog_starts, env->subprog_cnt,
774 	     sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
775 	return 0;
776 }
777 
778 static int check_subprogs(struct bpf_verifier_env *env)
779 {
780 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
781 	struct bpf_insn *insn = env->prog->insnsi;
782 	int insn_cnt = env->prog->len;
783 
784 	/* determine subprog starts. The end is one before the next starts */
785 	for (i = 0; i < insn_cnt; i++) {
786 		if (insn[i].code != (BPF_JMP | BPF_CALL))
787 			continue;
788 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
789 			continue;
790 		if (!env->allow_ptr_leaks) {
791 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
792 			return -EPERM;
793 		}
794 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
795 			verbose(env, "function calls in offloaded programs are not supported yet\n");
796 			return -EINVAL;
797 		}
798 		ret = add_subprog(env, i + insn[i].imm + 1);
799 		if (ret < 0)
800 			return ret;
801 	}
802 
803 	if (env->log.level > 1)
804 		for (i = 0; i < env->subprog_cnt; i++)
805 			verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
806 
807 	/* now check that all jumps are within the same subprog */
808 	subprog_start = 0;
809 	if (env->subprog_cnt == cur_subprog)
810 		subprog_end = insn_cnt;
811 	else
812 		subprog_end = env->subprog_starts[cur_subprog++];
813 	for (i = 0; i < insn_cnt; i++) {
814 		u8 code = insn[i].code;
815 
816 		if (BPF_CLASS(code) != BPF_JMP)
817 			goto next;
818 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
819 			goto next;
820 		off = i + insn[i].off + 1;
821 		if (off < subprog_start || off >= subprog_end) {
822 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
823 			return -EINVAL;
824 		}
825 next:
826 		if (i == subprog_end - 1) {
827 			/* to avoid fall-through from one subprog into another
828 			 * the last insn of the subprog should be either exit
829 			 * or unconditional jump back
830 			 */
831 			if (code != (BPF_JMP | BPF_EXIT) &&
832 			    code != (BPF_JMP | BPF_JA)) {
833 				verbose(env, "last insn is not an exit or jmp\n");
834 				return -EINVAL;
835 			}
836 			subprog_start = subprog_end;
837 			if (env->subprog_cnt == cur_subprog)
838 				subprog_end = insn_cnt;
839 			else
840 				subprog_end = env->subprog_starts[cur_subprog++];
841 		}
842 	}
843 	return 0;
844 }
845 
846 static
847 struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
848 				       const struct bpf_verifier_state *state,
849 				       struct bpf_verifier_state *parent,
850 				       u32 regno)
851 {
852 	struct bpf_verifier_state *tmp = NULL;
853 
854 	/* 'parent' could be a state of caller and
855 	 * 'state' could be a state of callee. In such case
856 	 * parent->curframe < state->curframe
857 	 * and it's ok for r1 - r5 registers
858 	 *
859 	 * 'parent' could be a callee's state after it bpf_exit-ed.
860 	 * In such case parent->curframe > state->curframe
861 	 * and it's ok for r0 only
862 	 */
863 	if (parent->curframe == state->curframe ||
864 	    (parent->curframe < state->curframe &&
865 	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
866 	    (parent->curframe > state->curframe &&
867 	       regno == BPF_REG_0))
868 		return parent;
869 
870 	if (parent->curframe > state->curframe &&
871 	    regno >= BPF_REG_6) {
872 		/* for callee saved regs we have to skip the whole chain
873 		 * of states that belong to callee and mark as LIVE_READ
874 		 * the registers before the call
875 		 */
876 		tmp = parent;
877 		while (tmp && tmp->curframe != state->curframe) {
878 			tmp = tmp->parent;
879 		}
880 		if (!tmp)
881 			goto bug;
882 		parent = tmp;
883 	} else {
884 		goto bug;
885 	}
886 	return parent;
887 bug:
888 	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
889 	verbose(env, "regno %d parent frame %d current frame %d\n",
890 		regno, parent->curframe, state->curframe);
891 	return NULL;
892 }
893 
894 static int mark_reg_read(struct bpf_verifier_env *env,
895 			 const struct bpf_verifier_state *state,
896 			 struct bpf_verifier_state *parent,
897 			 u32 regno)
898 {
899 	bool writes = parent == state->parent; /* Observe write marks */
900 
901 	if (regno == BPF_REG_FP)
902 		/* We don't need to worry about FP liveness because it's read-only */
903 		return 0;
904 
905 	while (parent) {
906 		/* if read wasn't screened by an earlier write ... */
907 		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
908 			break;
909 		parent = skip_callee(env, state, parent, regno);
910 		if (!parent)
911 			return -EFAULT;
912 		/* ... then we depend on parent's value */
913 		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
914 		state = parent;
915 		parent = state->parent;
916 		writes = true;
917 	}
918 	return 0;
919 }
920 
921 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
922 			 enum reg_arg_type t)
923 {
924 	struct bpf_verifier_state *vstate = env->cur_state;
925 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
926 	struct bpf_reg_state *regs = state->regs;
927 
928 	if (regno >= MAX_BPF_REG) {
929 		verbose(env, "R%d is invalid\n", regno);
930 		return -EINVAL;
931 	}
932 
933 	if (t == SRC_OP) {
934 		/* check whether register used as source operand can be read */
935 		if (regs[regno].type == NOT_INIT) {
936 			verbose(env, "R%d !read_ok\n", regno);
937 			return -EACCES;
938 		}
939 		return mark_reg_read(env, vstate, vstate->parent, regno);
940 	} else {
941 		/* check whether register used as dest operand can be written to */
942 		if (regno == BPF_REG_FP) {
943 			verbose(env, "frame pointer is read only\n");
944 			return -EACCES;
945 		}
946 		regs[regno].live |= REG_LIVE_WRITTEN;
947 		if (t == DST_OP)
948 			mark_reg_unknown(env, regs, regno);
949 	}
950 	return 0;
951 }
952 
953 static bool is_spillable_regtype(enum bpf_reg_type type)
954 {
955 	switch (type) {
956 	case PTR_TO_MAP_VALUE:
957 	case PTR_TO_MAP_VALUE_OR_NULL:
958 	case PTR_TO_STACK:
959 	case PTR_TO_CTX:
960 	case PTR_TO_PACKET:
961 	case PTR_TO_PACKET_META:
962 	case PTR_TO_PACKET_END:
963 	case CONST_PTR_TO_MAP:
964 		return true;
965 	default:
966 		return false;
967 	}
968 }
969 
970 /* Does this register contain a constant zero? */
971 static bool register_is_null(struct bpf_reg_state *reg)
972 {
973 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
974 }
975 
976 /* check_stack_read/write functions track spill/fill of registers,
977  * stack boundary and alignment are checked in check_mem_access()
978  */
979 static int check_stack_write(struct bpf_verifier_env *env,
980 			     struct bpf_func_state *state, /* func where register points to */
981 			     int off, int size, int value_regno)
982 {
983 	struct bpf_func_state *cur; /* state of the current function */
984 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
985 	enum bpf_reg_type type;
986 
987 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
988 				 true);
989 	if (err)
990 		return err;
991 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
992 	 * so it's aligned access and [off, off + size) are within stack limits
993 	 */
994 	if (!env->allow_ptr_leaks &&
995 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
996 	    size != BPF_REG_SIZE) {
997 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
998 		return -EACCES;
999 	}
1000 
1001 	cur = env->cur_state->frame[env->cur_state->curframe];
1002 	if (value_regno >= 0 &&
1003 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
1004 
1005 		/* register containing pointer is being spilled into stack */
1006 		if (size != BPF_REG_SIZE) {
1007 			verbose(env, "invalid size of register spill\n");
1008 			return -EACCES;
1009 		}
1010 
1011 		if (state != cur && type == PTR_TO_STACK) {
1012 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1013 			return -EINVAL;
1014 		}
1015 
1016 		/* save register state */
1017 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
1018 		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1019 
1020 		for (i = 0; i < BPF_REG_SIZE; i++)
1021 			state->stack[spi].slot_type[i] = STACK_SPILL;
1022 	} else {
1023 		u8 type = STACK_MISC;
1024 
1025 		/* regular write of data into stack */
1026 		state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
1027 
1028 		/* only mark the slot as written if all 8 bytes were written
1029 		 * otherwise read propagation may incorrectly stop too soon
1030 		 * when stack slots are partially written.
1031 		 * This heuristic means that read propagation will be
1032 		 * conservative, since it will add reg_live_read marks
1033 		 * to stack slots all the way to first state when programs
1034 		 * writes+reads less than 8 bytes
1035 		 */
1036 		if (size == BPF_REG_SIZE)
1037 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1038 
1039 		/* when we zero initialize stack slots mark them as such */
1040 		if (value_regno >= 0 &&
1041 		    register_is_null(&cur->regs[value_regno]))
1042 			type = STACK_ZERO;
1043 
1044 		for (i = 0; i < size; i++)
1045 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1046 				type;
1047 	}
1048 	return 0;
1049 }
1050 
1051 /* registers of every function are unique and mark_reg_read() propagates
1052  * the liveness in the following cases:
1053  * - from callee into caller for R1 - R5 that were used as arguments
1054  * - from caller into callee for R0 that used as result of the call
1055  * - from caller to the same caller skipping states of the callee for R6 - R9,
1056  *   since R6 - R9 are callee saved by implicit function prologue and
1057  *   caller's R6 != callee's R6, so when we propagate liveness up to
1058  *   parent states we need to skip callee states for R6 - R9.
1059  *
1060  * stack slot marking is different, since stacks of caller and callee are
1061  * accessible in both (since caller can pass a pointer to caller's stack to
1062  * callee which can pass it to another function), hence mark_stack_slot_read()
1063  * has to propagate the stack liveness to all parent states at given frame number.
1064  * Consider code:
1065  * f1() {
1066  *   ptr = fp - 8;
1067  *   *ptr = ctx;
1068  *   call f2 {
1069  *      .. = *ptr;
1070  *   }
1071  *   .. = *ptr;
1072  * }
1073  * First *ptr is reading from f1's stack and mark_stack_slot_read() has
1074  * to mark liveness at the f1's frame and not f2's frame.
1075  * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
1076  * to propagate liveness to f2 states at f1's frame level and further into
1077  * f1 states at f1's frame level until write into that stack slot
1078  */
1079 static void mark_stack_slot_read(struct bpf_verifier_env *env,
1080 				 const struct bpf_verifier_state *state,
1081 				 struct bpf_verifier_state *parent,
1082 				 int slot, int frameno)
1083 {
1084 	bool writes = parent == state->parent; /* Observe write marks */
1085 
1086 	while (parent) {
1087 		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
1088 			/* since LIVE_WRITTEN mark is only done for full 8-byte
1089 			 * write the read marks are conservative and parent
1090 			 * state may not even have the stack allocated. In such case
1091 			 * end the propagation, since the loop reached beginning
1092 			 * of the function
1093 			 */
1094 			break;
1095 		/* if read wasn't screened by an earlier write ... */
1096 		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
1097 			break;
1098 		/* ... then we depend on parent's value */
1099 		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
1100 		state = parent;
1101 		parent = state->parent;
1102 		writes = true;
1103 	}
1104 }
1105 
1106 static int check_stack_read(struct bpf_verifier_env *env,
1107 			    struct bpf_func_state *reg_state /* func where register points to */,
1108 			    int off, int size, int value_regno)
1109 {
1110 	struct bpf_verifier_state *vstate = env->cur_state;
1111 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1112 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1113 	u8 *stype;
1114 
1115 	if (reg_state->allocated_stack <= slot) {
1116 		verbose(env, "invalid read from stack off %d+0 size %d\n",
1117 			off, size);
1118 		return -EACCES;
1119 	}
1120 	stype = reg_state->stack[spi].slot_type;
1121 
1122 	if (stype[0] == STACK_SPILL) {
1123 		if (size != BPF_REG_SIZE) {
1124 			verbose(env, "invalid size of register spill\n");
1125 			return -EACCES;
1126 		}
1127 		for (i = 1; i < BPF_REG_SIZE; i++) {
1128 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1129 				verbose(env, "corrupted spill memory\n");
1130 				return -EACCES;
1131 			}
1132 		}
1133 
1134 		if (value_regno >= 0) {
1135 			/* restore register state from stack */
1136 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1137 			/* mark reg as written since spilled pointer state likely
1138 			 * has its liveness marks cleared by is_state_visited()
1139 			 * which resets stack/reg liveness for state transitions
1140 			 */
1141 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1142 		}
1143 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
1144 				     reg_state->frameno);
1145 		return 0;
1146 	} else {
1147 		int zeros = 0;
1148 
1149 		for (i = 0; i < size; i++) {
1150 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1151 				continue;
1152 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1153 				zeros++;
1154 				continue;
1155 			}
1156 			verbose(env, "invalid read from stack off %d+%d size %d\n",
1157 				off, i, size);
1158 			return -EACCES;
1159 		}
1160 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
1161 				     reg_state->frameno);
1162 		if (value_regno >= 0) {
1163 			if (zeros == size) {
1164 				/* any size read into register is zero extended,
1165 				 * so the whole register == const_zero
1166 				 */
1167 				__mark_reg_const_zero(&state->regs[value_regno]);
1168 			} else {
1169 				/* have read misc data from the stack */
1170 				mark_reg_unknown(env, state->regs, value_regno);
1171 			}
1172 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1173 		}
1174 		return 0;
1175 	}
1176 }
1177 
1178 /* check read/write into map element returned by bpf_map_lookup_elem() */
1179 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1180 			      int size, bool zero_size_allowed)
1181 {
1182 	struct bpf_reg_state *regs = cur_regs(env);
1183 	struct bpf_map *map = regs[regno].map_ptr;
1184 
1185 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1186 	    off + size > map->value_size) {
1187 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1188 			map->value_size, off, size);
1189 		return -EACCES;
1190 	}
1191 	return 0;
1192 }
1193 
1194 /* check read/write into a map element with possible variable offset */
1195 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1196 			    int off, int size, bool zero_size_allowed)
1197 {
1198 	struct bpf_verifier_state *vstate = env->cur_state;
1199 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1200 	struct bpf_reg_state *reg = &state->regs[regno];
1201 	int err;
1202 
1203 	/* We may have adjusted the register to this map value, so we
1204 	 * need to try adding each of min_value and max_value to off
1205 	 * to make sure our theoretical access will be safe.
1206 	 */
1207 	if (env->log.level)
1208 		print_verifier_state(env, state);
1209 	/* The minimum value is only important with signed
1210 	 * comparisons where we can't assume the floor of a
1211 	 * value is 0.  If we are using signed variables for our
1212 	 * index'es we need to make sure that whatever we use
1213 	 * will have a set floor within our range.
1214 	 */
1215 	if (reg->smin_value < 0) {
1216 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1217 			regno);
1218 		return -EACCES;
1219 	}
1220 	err = __check_map_access(env, regno, reg->smin_value + off, size,
1221 				 zero_size_allowed);
1222 	if (err) {
1223 		verbose(env, "R%d min value is outside of the array range\n",
1224 			regno);
1225 		return err;
1226 	}
1227 
1228 	/* If we haven't set a max value then we need to bail since we can't be
1229 	 * sure we won't do bad things.
1230 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
1231 	 */
1232 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1233 		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1234 			regno);
1235 		return -EACCES;
1236 	}
1237 	err = __check_map_access(env, regno, reg->umax_value + off, size,
1238 				 zero_size_allowed);
1239 	if (err)
1240 		verbose(env, "R%d max value is outside of the array range\n",
1241 			regno);
1242 	return err;
1243 }
1244 
1245 #define MAX_PACKET_OFF 0xffff
1246 
1247 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1248 				       const struct bpf_call_arg_meta *meta,
1249 				       enum bpf_access_type t)
1250 {
1251 	switch (env->prog->type) {
1252 	case BPF_PROG_TYPE_LWT_IN:
1253 	case BPF_PROG_TYPE_LWT_OUT:
1254 		/* dst_input() and dst_output() can't write for now */
1255 		if (t == BPF_WRITE)
1256 			return false;
1257 		/* fallthrough */
1258 	case BPF_PROG_TYPE_SCHED_CLS:
1259 	case BPF_PROG_TYPE_SCHED_ACT:
1260 	case BPF_PROG_TYPE_XDP:
1261 	case BPF_PROG_TYPE_LWT_XMIT:
1262 	case BPF_PROG_TYPE_SK_SKB:
1263 	case BPF_PROG_TYPE_SK_MSG:
1264 		if (meta)
1265 			return meta->pkt_access;
1266 
1267 		env->seen_direct_write = true;
1268 		return true;
1269 	default:
1270 		return false;
1271 	}
1272 }
1273 
1274 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1275 				 int off, int size, bool zero_size_allowed)
1276 {
1277 	struct bpf_reg_state *regs = cur_regs(env);
1278 	struct bpf_reg_state *reg = &regs[regno];
1279 
1280 	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1281 	    (u64)off + size > reg->range) {
1282 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1283 			off, size, regno, reg->id, reg->off, reg->range);
1284 		return -EACCES;
1285 	}
1286 	return 0;
1287 }
1288 
1289 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1290 			       int size, bool zero_size_allowed)
1291 {
1292 	struct bpf_reg_state *regs = cur_regs(env);
1293 	struct bpf_reg_state *reg = &regs[regno];
1294 	int err;
1295 
1296 	/* We may have added a variable offset to the packet pointer; but any
1297 	 * reg->range we have comes after that.  We are only checking the fixed
1298 	 * offset.
1299 	 */
1300 
1301 	/* We don't allow negative numbers, because we aren't tracking enough
1302 	 * detail to prove they're safe.
1303 	 */
1304 	if (reg->smin_value < 0) {
1305 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1306 			regno);
1307 		return -EACCES;
1308 	}
1309 	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1310 	if (err) {
1311 		verbose(env, "R%d offset is outside of the packet\n", regno);
1312 		return err;
1313 	}
1314 	return err;
1315 }
1316 
1317 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
1318 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1319 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
1320 {
1321 	struct bpf_insn_access_aux info = {
1322 		.reg_type = *reg_type,
1323 	};
1324 
1325 	if (env->ops->is_valid_access &&
1326 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1327 		/* A non zero info.ctx_field_size indicates that this field is a
1328 		 * candidate for later verifier transformation to load the whole
1329 		 * field and then apply a mask when accessed with a narrower
1330 		 * access than actual ctx access size. A zero info.ctx_field_size
1331 		 * will only allow for whole field access and rejects any other
1332 		 * type of narrower access.
1333 		 */
1334 		*reg_type = info.reg_type;
1335 
1336 		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1337 		/* remember the offset of last byte accessed in ctx */
1338 		if (env->prog->aux->max_ctx_offset < off + size)
1339 			env->prog->aux->max_ctx_offset = off + size;
1340 		return 0;
1341 	}
1342 
1343 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1344 	return -EACCES;
1345 }
1346 
1347 static bool __is_pointer_value(bool allow_ptr_leaks,
1348 			       const struct bpf_reg_state *reg)
1349 {
1350 	if (allow_ptr_leaks)
1351 		return false;
1352 
1353 	return reg->type != SCALAR_VALUE;
1354 }
1355 
1356 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1357 {
1358 	return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
1359 }
1360 
1361 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1362 {
1363 	const struct bpf_reg_state *reg = cur_regs(env) + regno;
1364 
1365 	return reg->type == PTR_TO_CTX;
1366 }
1367 
1368 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1369 {
1370 	const struct bpf_reg_state *reg = cur_regs(env) + regno;
1371 
1372 	return type_is_pkt_pointer(reg->type);
1373 }
1374 
1375 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1376 				   const struct bpf_reg_state *reg,
1377 				   int off, int size, bool strict)
1378 {
1379 	struct tnum reg_off;
1380 	int ip_align;
1381 
1382 	/* Byte size accesses are always allowed. */
1383 	if (!strict || size == 1)
1384 		return 0;
1385 
1386 	/* For platforms that do not have a Kconfig enabling
1387 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1388 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
1389 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1390 	 * to this code only in strict mode where we want to emulate
1391 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
1392 	 * unconditional IP align value of '2'.
1393 	 */
1394 	ip_align = 2;
1395 
1396 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1397 	if (!tnum_is_aligned(reg_off, size)) {
1398 		char tn_buf[48];
1399 
1400 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1401 		verbose(env,
1402 			"misaligned packet access off %d+%s+%d+%d size %d\n",
1403 			ip_align, tn_buf, reg->off, off, size);
1404 		return -EACCES;
1405 	}
1406 
1407 	return 0;
1408 }
1409 
1410 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1411 				       const struct bpf_reg_state *reg,
1412 				       const char *pointer_desc,
1413 				       int off, int size, bool strict)
1414 {
1415 	struct tnum reg_off;
1416 
1417 	/* Byte size accesses are always allowed. */
1418 	if (!strict || size == 1)
1419 		return 0;
1420 
1421 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1422 	if (!tnum_is_aligned(reg_off, size)) {
1423 		char tn_buf[48];
1424 
1425 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1426 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1427 			pointer_desc, tn_buf, reg->off, off, size);
1428 		return -EACCES;
1429 	}
1430 
1431 	return 0;
1432 }
1433 
1434 static int check_ptr_alignment(struct bpf_verifier_env *env,
1435 			       const struct bpf_reg_state *reg, int off,
1436 			       int size, bool strict_alignment_once)
1437 {
1438 	bool strict = env->strict_alignment || strict_alignment_once;
1439 	const char *pointer_desc = "";
1440 
1441 	switch (reg->type) {
1442 	case PTR_TO_PACKET:
1443 	case PTR_TO_PACKET_META:
1444 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
1445 		 * right in front, treat it the very same way.
1446 		 */
1447 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
1448 	case PTR_TO_MAP_VALUE:
1449 		pointer_desc = "value ";
1450 		break;
1451 	case PTR_TO_CTX:
1452 		pointer_desc = "context ";
1453 		break;
1454 	case PTR_TO_STACK:
1455 		pointer_desc = "stack ";
1456 		/* The stack spill tracking logic in check_stack_write()
1457 		 * and check_stack_read() relies on stack accesses being
1458 		 * aligned.
1459 		 */
1460 		strict = true;
1461 		break;
1462 	default:
1463 		break;
1464 	}
1465 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1466 					   strict);
1467 }
1468 
1469 static int update_stack_depth(struct bpf_verifier_env *env,
1470 			      const struct bpf_func_state *func,
1471 			      int off)
1472 {
1473 	u16 stack = env->subprog_stack_depth[func->subprogno];
1474 
1475 	if (stack >= -off)
1476 		return 0;
1477 
1478 	/* update known max for given subprogram */
1479 	env->subprog_stack_depth[func->subprogno] = -off;
1480 	return 0;
1481 }
1482 
1483 /* starting from main bpf function walk all instructions of the function
1484  * and recursively walk all callees that given function can call.
1485  * Ignore jump and exit insns.
1486  * Since recursion is prevented by check_cfg() this algorithm
1487  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1488  */
1489 static int check_max_stack_depth(struct bpf_verifier_env *env)
1490 {
1491 	int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
1492 	struct bpf_insn *insn = env->prog->insnsi;
1493 	int insn_cnt = env->prog->len;
1494 	int ret_insn[MAX_CALL_FRAMES];
1495 	int ret_prog[MAX_CALL_FRAMES];
1496 
1497 process_func:
1498 	/* round up to 32-bytes, since this is granularity
1499 	 * of interpreter stack size
1500 	 */
1501 	depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1502 	if (depth > MAX_BPF_STACK) {
1503 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
1504 			frame + 1, depth);
1505 		return -EACCES;
1506 	}
1507 continue_func:
1508 	if (env->subprog_cnt == subprog)
1509 		subprog_end = insn_cnt;
1510 	else
1511 		subprog_end = env->subprog_starts[subprog];
1512 	for (; i < subprog_end; i++) {
1513 		if (insn[i].code != (BPF_JMP | BPF_CALL))
1514 			continue;
1515 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1516 			continue;
1517 		/* remember insn and function to return to */
1518 		ret_insn[frame] = i + 1;
1519 		ret_prog[frame] = subprog;
1520 
1521 		/* find the callee */
1522 		i = i + insn[i].imm + 1;
1523 		subprog = find_subprog(env, i);
1524 		if (subprog < 0) {
1525 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1526 				  i);
1527 			return -EFAULT;
1528 		}
1529 		subprog++;
1530 		frame++;
1531 		if (frame >= MAX_CALL_FRAMES) {
1532 			WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1533 			return -EFAULT;
1534 		}
1535 		goto process_func;
1536 	}
1537 	/* end of for() loop means the last insn of the 'subprog'
1538 	 * was reached. Doesn't matter whether it was JA or EXIT
1539 	 */
1540 	if (frame == 0)
1541 		return 0;
1542 	depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1543 	frame--;
1544 	i = ret_insn[frame];
1545 	subprog = ret_prog[frame];
1546 	goto continue_func;
1547 }
1548 
1549 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1550 static int get_callee_stack_depth(struct bpf_verifier_env *env,
1551 				  const struct bpf_insn *insn, int idx)
1552 {
1553 	int start = idx + insn->imm + 1, subprog;
1554 
1555 	subprog = find_subprog(env, start);
1556 	if (subprog < 0) {
1557 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1558 			  start);
1559 		return -EFAULT;
1560 	}
1561 	subprog++;
1562 	return env->subprog_stack_depth[subprog];
1563 }
1564 #endif
1565 
1566 /* truncate register to smaller size (in bytes)
1567  * must be called with size < BPF_REG_SIZE
1568  */
1569 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1570 {
1571 	u64 mask;
1572 
1573 	/* clear high bits in bit representation */
1574 	reg->var_off = tnum_cast(reg->var_off, size);
1575 
1576 	/* fix arithmetic bounds */
1577 	mask = ((u64)1 << (size * 8)) - 1;
1578 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1579 		reg->umin_value &= mask;
1580 		reg->umax_value &= mask;
1581 	} else {
1582 		reg->umin_value = 0;
1583 		reg->umax_value = mask;
1584 	}
1585 	reg->smin_value = reg->umin_value;
1586 	reg->smax_value = reg->umax_value;
1587 }
1588 
1589 /* check whether memory at (regno + off) is accessible for t = (read | write)
1590  * if t==write, value_regno is a register which value is stored into memory
1591  * if t==read, value_regno is a register which will receive the value from memory
1592  * if t==write && value_regno==-1, some unknown value is stored into memory
1593  * if t==read && value_regno==-1, don't care what we read from memory
1594  */
1595 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1596 			    int off, int bpf_size, enum bpf_access_type t,
1597 			    int value_regno, bool strict_alignment_once)
1598 {
1599 	struct bpf_reg_state *regs = cur_regs(env);
1600 	struct bpf_reg_state *reg = regs + regno;
1601 	struct bpf_func_state *state;
1602 	int size, err = 0;
1603 
1604 	size = bpf_size_to_bytes(bpf_size);
1605 	if (size < 0)
1606 		return size;
1607 
1608 	/* alignment checks will add in reg->off themselves */
1609 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
1610 	if (err)
1611 		return err;
1612 
1613 	/* for access checks, reg->off is just part of off */
1614 	off += reg->off;
1615 
1616 	if (reg->type == PTR_TO_MAP_VALUE) {
1617 		if (t == BPF_WRITE && value_regno >= 0 &&
1618 		    is_pointer_value(env, value_regno)) {
1619 			verbose(env, "R%d leaks addr into map\n", value_regno);
1620 			return -EACCES;
1621 		}
1622 
1623 		err = check_map_access(env, regno, off, size, false);
1624 		if (!err && t == BPF_READ && value_regno >= 0)
1625 			mark_reg_unknown(env, regs, value_regno);
1626 
1627 	} else if (reg->type == PTR_TO_CTX) {
1628 		enum bpf_reg_type reg_type = SCALAR_VALUE;
1629 
1630 		if (t == BPF_WRITE && value_regno >= 0 &&
1631 		    is_pointer_value(env, value_regno)) {
1632 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
1633 			return -EACCES;
1634 		}
1635 		/* ctx accesses must be at a fixed offset, so that we can
1636 		 * determine what type of data were returned.
1637 		 */
1638 		if (reg->off) {
1639 			verbose(env,
1640 				"dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1641 				regno, reg->off, off - reg->off);
1642 			return -EACCES;
1643 		}
1644 		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1645 			char tn_buf[48];
1646 
1647 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1648 			verbose(env,
1649 				"variable ctx access var_off=%s off=%d size=%d",
1650 				tn_buf, off, size);
1651 			return -EACCES;
1652 		}
1653 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1654 		if (!err && t == BPF_READ && value_regno >= 0) {
1655 			/* ctx access returns either a scalar, or a
1656 			 * PTR_TO_PACKET[_META,_END]. In the latter
1657 			 * case, we know the offset is zero.
1658 			 */
1659 			if (reg_type == SCALAR_VALUE)
1660 				mark_reg_unknown(env, regs, value_regno);
1661 			else
1662 				mark_reg_known_zero(env, regs,
1663 						    value_regno);
1664 			regs[value_regno].id = 0;
1665 			regs[value_regno].off = 0;
1666 			regs[value_regno].range = 0;
1667 			regs[value_regno].type = reg_type;
1668 		}
1669 
1670 	} else if (reg->type == PTR_TO_STACK) {
1671 		/* stack accesses must be at a fixed offset, so that we can
1672 		 * determine what type of data were returned.
1673 		 * See check_stack_read().
1674 		 */
1675 		if (!tnum_is_const(reg->var_off)) {
1676 			char tn_buf[48];
1677 
1678 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1679 			verbose(env, "variable stack access var_off=%s off=%d size=%d",
1680 				tn_buf, off, size);
1681 			return -EACCES;
1682 		}
1683 		off += reg->var_off.value;
1684 		if (off >= 0 || off < -MAX_BPF_STACK) {
1685 			verbose(env, "invalid stack off=%d size=%d\n", off,
1686 				size);
1687 			return -EACCES;
1688 		}
1689 
1690 		state = func(env, reg);
1691 		err = update_stack_depth(env, state, off);
1692 		if (err)
1693 			return err;
1694 
1695 		if (t == BPF_WRITE)
1696 			err = check_stack_write(env, state, off, size,
1697 						value_regno);
1698 		else
1699 			err = check_stack_read(env, state, off, size,
1700 					       value_regno);
1701 	} else if (reg_is_pkt_pointer(reg)) {
1702 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
1703 			verbose(env, "cannot write into packet\n");
1704 			return -EACCES;
1705 		}
1706 		if (t == BPF_WRITE && value_regno >= 0 &&
1707 		    is_pointer_value(env, value_regno)) {
1708 			verbose(env, "R%d leaks addr into packet\n",
1709 				value_regno);
1710 			return -EACCES;
1711 		}
1712 		err = check_packet_access(env, regno, off, size, false);
1713 		if (!err && t == BPF_READ && value_regno >= 0)
1714 			mark_reg_unknown(env, regs, value_regno);
1715 	} else {
1716 		verbose(env, "R%d invalid mem access '%s'\n", regno,
1717 			reg_type_str[reg->type]);
1718 		return -EACCES;
1719 	}
1720 
1721 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1722 	    regs[value_regno].type == SCALAR_VALUE) {
1723 		/* b/h/w load zero-extends, mark upper bits as known 0 */
1724 		coerce_reg_to_size(&regs[value_regno], size);
1725 	}
1726 	return err;
1727 }
1728 
1729 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
1730 {
1731 	int err;
1732 
1733 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1734 	    insn->imm != 0) {
1735 		verbose(env, "BPF_XADD uses reserved fields\n");
1736 		return -EINVAL;
1737 	}
1738 
1739 	/* check src1 operand */
1740 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
1741 	if (err)
1742 		return err;
1743 
1744 	/* check src2 operand */
1745 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
1746 	if (err)
1747 		return err;
1748 
1749 	if (is_pointer_value(env, insn->src_reg)) {
1750 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
1751 		return -EACCES;
1752 	}
1753 
1754 	if (is_ctx_reg(env, insn->dst_reg) ||
1755 	    is_pkt_reg(env, insn->dst_reg)) {
1756 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
1757 			insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
1758 			"context" : "packet");
1759 		return -EACCES;
1760 	}
1761 
1762 	/* check whether atomic_add can read the memory */
1763 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1764 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
1765 	if (err)
1766 		return err;
1767 
1768 	/* check whether atomic_add can write into the same memory */
1769 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1770 				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
1771 }
1772 
1773 /* when register 'regno' is passed into function that will read 'access_size'
1774  * bytes from that pointer, make sure that it's within stack boundary
1775  * and all elements of stack are initialized.
1776  * Unlike most pointer bounds-checking functions, this one doesn't take an
1777  * 'off' argument, so it has to add in reg->off itself.
1778  */
1779 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1780 				int access_size, bool zero_size_allowed,
1781 				struct bpf_call_arg_meta *meta)
1782 {
1783 	struct bpf_reg_state *reg = cur_regs(env) + regno;
1784 	struct bpf_func_state *state = func(env, reg);
1785 	int off, i, slot, spi;
1786 
1787 	if (reg->type != PTR_TO_STACK) {
1788 		/* Allow zero-byte read from NULL, regardless of pointer type */
1789 		if (zero_size_allowed && access_size == 0 &&
1790 		    register_is_null(reg))
1791 			return 0;
1792 
1793 		verbose(env, "R%d type=%s expected=%s\n", regno,
1794 			reg_type_str[reg->type],
1795 			reg_type_str[PTR_TO_STACK]);
1796 		return -EACCES;
1797 	}
1798 
1799 	/* Only allow fixed-offset stack reads */
1800 	if (!tnum_is_const(reg->var_off)) {
1801 		char tn_buf[48];
1802 
1803 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1804 		verbose(env, "invalid variable stack read R%d var_off=%s\n",
1805 			regno, tn_buf);
1806 		return -EACCES;
1807 	}
1808 	off = reg->off + reg->var_off.value;
1809 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1810 	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1811 		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1812 			regno, off, access_size);
1813 		return -EACCES;
1814 	}
1815 
1816 	if (meta && meta->raw_mode) {
1817 		meta->access_size = access_size;
1818 		meta->regno = regno;
1819 		return 0;
1820 	}
1821 
1822 	for (i = 0; i < access_size; i++) {
1823 		u8 *stype;
1824 
1825 		slot = -(off + i) - 1;
1826 		spi = slot / BPF_REG_SIZE;
1827 		if (state->allocated_stack <= slot)
1828 			goto err;
1829 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
1830 		if (*stype == STACK_MISC)
1831 			goto mark;
1832 		if (*stype == STACK_ZERO) {
1833 			/* helper can write anything into the stack */
1834 			*stype = STACK_MISC;
1835 			goto mark;
1836 		}
1837 err:
1838 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1839 			off, i, access_size);
1840 		return -EACCES;
1841 mark:
1842 		/* reading any byte out of 8-byte 'spill_slot' will cause
1843 		 * the whole slot to be marked as 'read'
1844 		 */
1845 		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
1846 				     spi, state->frameno);
1847 	}
1848 	return update_stack_depth(env, state, off);
1849 }
1850 
1851 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1852 				   int access_size, bool zero_size_allowed,
1853 				   struct bpf_call_arg_meta *meta)
1854 {
1855 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1856 
1857 	switch (reg->type) {
1858 	case PTR_TO_PACKET:
1859 	case PTR_TO_PACKET_META:
1860 		return check_packet_access(env, regno, reg->off, access_size,
1861 					   zero_size_allowed);
1862 	case PTR_TO_MAP_VALUE:
1863 		return check_map_access(env, regno, reg->off, access_size,
1864 					zero_size_allowed);
1865 	default: /* scalar_value|ptr_to_stack or invalid ptr */
1866 		return check_stack_boundary(env, regno, access_size,
1867 					    zero_size_allowed, meta);
1868 	}
1869 }
1870 
1871 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
1872 {
1873 	return type == ARG_PTR_TO_MEM ||
1874 	       type == ARG_PTR_TO_MEM_OR_NULL ||
1875 	       type == ARG_PTR_TO_UNINIT_MEM;
1876 }
1877 
1878 static bool arg_type_is_mem_size(enum bpf_arg_type type)
1879 {
1880 	return type == ARG_CONST_SIZE ||
1881 	       type == ARG_CONST_SIZE_OR_ZERO;
1882 }
1883 
1884 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1885 			  enum bpf_arg_type arg_type,
1886 			  struct bpf_call_arg_meta *meta)
1887 {
1888 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1889 	enum bpf_reg_type expected_type, type = reg->type;
1890 	int err = 0;
1891 
1892 	if (arg_type == ARG_DONTCARE)
1893 		return 0;
1894 
1895 	err = check_reg_arg(env, regno, SRC_OP);
1896 	if (err)
1897 		return err;
1898 
1899 	if (arg_type == ARG_ANYTHING) {
1900 		if (is_pointer_value(env, regno)) {
1901 			verbose(env, "R%d leaks addr into helper function\n",
1902 				regno);
1903 			return -EACCES;
1904 		}
1905 		return 0;
1906 	}
1907 
1908 	if (type_is_pkt_pointer(type) &&
1909 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1910 		verbose(env, "helper access to the packet is not allowed\n");
1911 		return -EACCES;
1912 	}
1913 
1914 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
1915 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
1916 		expected_type = PTR_TO_STACK;
1917 		if (!type_is_pkt_pointer(type) &&
1918 		    type != expected_type)
1919 			goto err_type;
1920 	} else if (arg_type == ARG_CONST_SIZE ||
1921 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
1922 		expected_type = SCALAR_VALUE;
1923 		if (type != expected_type)
1924 			goto err_type;
1925 	} else if (arg_type == ARG_CONST_MAP_PTR) {
1926 		expected_type = CONST_PTR_TO_MAP;
1927 		if (type != expected_type)
1928 			goto err_type;
1929 	} else if (arg_type == ARG_PTR_TO_CTX) {
1930 		expected_type = PTR_TO_CTX;
1931 		if (type != expected_type)
1932 			goto err_type;
1933 	} else if (arg_type_is_mem_ptr(arg_type)) {
1934 		expected_type = PTR_TO_STACK;
1935 		/* One exception here. In case function allows for NULL to be
1936 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
1937 		 * happens during stack boundary checking.
1938 		 */
1939 		if (register_is_null(reg) &&
1940 		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
1941 			/* final test in check_stack_boundary() */;
1942 		else if (!type_is_pkt_pointer(type) &&
1943 			 type != PTR_TO_MAP_VALUE &&
1944 			 type != expected_type)
1945 			goto err_type;
1946 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1947 	} else {
1948 		verbose(env, "unsupported arg_type %d\n", arg_type);
1949 		return -EFAULT;
1950 	}
1951 
1952 	if (arg_type == ARG_CONST_MAP_PTR) {
1953 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1954 		meta->map_ptr = reg->map_ptr;
1955 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1956 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
1957 		 * check that [key, key + map->key_size) are within
1958 		 * stack limits and initialized
1959 		 */
1960 		if (!meta->map_ptr) {
1961 			/* in function declaration map_ptr must come before
1962 			 * map_key, so that it's verified and known before
1963 			 * we have to check map_key here. Otherwise it means
1964 			 * that kernel subsystem misconfigured verifier
1965 			 */
1966 			verbose(env, "invalid map_ptr to access map->key\n");
1967 			return -EACCES;
1968 		}
1969 		if (type_is_pkt_pointer(type))
1970 			err = check_packet_access(env, regno, reg->off,
1971 						  meta->map_ptr->key_size,
1972 						  false);
1973 		else
1974 			err = check_stack_boundary(env, regno,
1975 						   meta->map_ptr->key_size,
1976 						   false, NULL);
1977 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1978 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
1979 		 * check [value, value + map->value_size) validity
1980 		 */
1981 		if (!meta->map_ptr) {
1982 			/* kernel subsystem misconfigured verifier */
1983 			verbose(env, "invalid map_ptr to access map->value\n");
1984 			return -EACCES;
1985 		}
1986 		if (type_is_pkt_pointer(type))
1987 			err = check_packet_access(env, regno, reg->off,
1988 						  meta->map_ptr->value_size,
1989 						  false);
1990 		else
1991 			err = check_stack_boundary(env, regno,
1992 						   meta->map_ptr->value_size,
1993 						   false, NULL);
1994 	} else if (arg_type_is_mem_size(arg_type)) {
1995 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1996 
1997 		/* The register is SCALAR_VALUE; the access check
1998 		 * happens using its boundaries.
1999 		 */
2000 		if (!tnum_is_const(reg->var_off))
2001 			/* For unprivileged variable accesses, disable raw
2002 			 * mode so that the program is required to
2003 			 * initialize all the memory that the helper could
2004 			 * just partially fill up.
2005 			 */
2006 			meta = NULL;
2007 
2008 		if (reg->smin_value < 0) {
2009 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
2010 				regno);
2011 			return -EACCES;
2012 		}
2013 
2014 		if (reg->umin_value == 0) {
2015 			err = check_helper_mem_access(env, regno - 1, 0,
2016 						      zero_size_allowed,
2017 						      meta);
2018 			if (err)
2019 				return err;
2020 		}
2021 
2022 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
2023 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
2024 				regno);
2025 			return -EACCES;
2026 		}
2027 		err = check_helper_mem_access(env, regno - 1,
2028 					      reg->umax_value,
2029 					      zero_size_allowed, meta);
2030 	}
2031 
2032 	return err;
2033 err_type:
2034 	verbose(env, "R%d type=%s expected=%s\n", regno,
2035 		reg_type_str[type], reg_type_str[expected_type]);
2036 	return -EACCES;
2037 }
2038 
2039 static int check_map_func_compatibility(struct bpf_verifier_env *env,
2040 					struct bpf_map *map, int func_id)
2041 {
2042 	if (!map)
2043 		return 0;
2044 
2045 	/* We need a two way check, first is from map perspective ... */
2046 	switch (map->map_type) {
2047 	case BPF_MAP_TYPE_PROG_ARRAY:
2048 		if (func_id != BPF_FUNC_tail_call)
2049 			goto error;
2050 		break;
2051 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2052 		if (func_id != BPF_FUNC_perf_event_read &&
2053 		    func_id != BPF_FUNC_perf_event_output &&
2054 		    func_id != BPF_FUNC_perf_event_read_value)
2055 			goto error;
2056 		break;
2057 	case BPF_MAP_TYPE_STACK_TRACE:
2058 		if (func_id != BPF_FUNC_get_stackid)
2059 			goto error;
2060 		break;
2061 	case BPF_MAP_TYPE_CGROUP_ARRAY:
2062 		if (func_id != BPF_FUNC_skb_under_cgroup &&
2063 		    func_id != BPF_FUNC_current_task_under_cgroup)
2064 			goto error;
2065 		break;
2066 	/* devmap returns a pointer to a live net_device ifindex that we cannot
2067 	 * allow to be modified from bpf side. So do not allow lookup elements
2068 	 * for now.
2069 	 */
2070 	case BPF_MAP_TYPE_DEVMAP:
2071 		if (func_id != BPF_FUNC_redirect_map)
2072 			goto error;
2073 		break;
2074 	/* Restrict bpf side of cpumap, open when use-cases appear */
2075 	case BPF_MAP_TYPE_CPUMAP:
2076 		if (func_id != BPF_FUNC_redirect_map)
2077 			goto error;
2078 		break;
2079 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2080 	case BPF_MAP_TYPE_HASH_OF_MAPS:
2081 		if (func_id != BPF_FUNC_map_lookup_elem)
2082 			goto error;
2083 		break;
2084 	case BPF_MAP_TYPE_SOCKMAP:
2085 		if (func_id != BPF_FUNC_sk_redirect_map &&
2086 		    func_id != BPF_FUNC_sock_map_update &&
2087 		    func_id != BPF_FUNC_map_delete_elem &&
2088 		    func_id != BPF_FUNC_msg_redirect_map)
2089 			goto error;
2090 		break;
2091 	default:
2092 		break;
2093 	}
2094 
2095 	/* ... and second from the function itself. */
2096 	switch (func_id) {
2097 	case BPF_FUNC_tail_call:
2098 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2099 			goto error;
2100 		if (env->subprog_cnt) {
2101 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2102 			return -EINVAL;
2103 		}
2104 		break;
2105 	case BPF_FUNC_perf_event_read:
2106 	case BPF_FUNC_perf_event_output:
2107 	case BPF_FUNC_perf_event_read_value:
2108 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2109 			goto error;
2110 		break;
2111 	case BPF_FUNC_get_stackid:
2112 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2113 			goto error;
2114 		break;
2115 	case BPF_FUNC_current_task_under_cgroup:
2116 	case BPF_FUNC_skb_under_cgroup:
2117 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2118 			goto error;
2119 		break;
2120 	case BPF_FUNC_redirect_map:
2121 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2122 		    map->map_type != BPF_MAP_TYPE_CPUMAP)
2123 			goto error;
2124 		break;
2125 	case BPF_FUNC_sk_redirect_map:
2126 	case BPF_FUNC_msg_redirect_map:
2127 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2128 			goto error;
2129 		break;
2130 	case BPF_FUNC_sock_map_update:
2131 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2132 			goto error;
2133 		break;
2134 	default:
2135 		break;
2136 	}
2137 
2138 	return 0;
2139 error:
2140 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
2141 		map->map_type, func_id_name(func_id), func_id);
2142 	return -EINVAL;
2143 }
2144 
2145 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
2146 {
2147 	int count = 0;
2148 
2149 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2150 		count++;
2151 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2152 		count++;
2153 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2154 		count++;
2155 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2156 		count++;
2157 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2158 		count++;
2159 
2160 	/* We only support one arg being in raw mode at the moment,
2161 	 * which is sufficient for the helper functions we have
2162 	 * right now.
2163 	 */
2164 	return count <= 1;
2165 }
2166 
2167 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2168 				    enum bpf_arg_type arg_next)
2169 {
2170 	return (arg_type_is_mem_ptr(arg_curr) &&
2171 	        !arg_type_is_mem_size(arg_next)) ||
2172 	       (!arg_type_is_mem_ptr(arg_curr) &&
2173 		arg_type_is_mem_size(arg_next));
2174 }
2175 
2176 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2177 {
2178 	/* bpf_xxx(..., buf, len) call will access 'len'
2179 	 * bytes from memory 'buf'. Both arg types need
2180 	 * to be paired, so make sure there's no buggy
2181 	 * helper function specification.
2182 	 */
2183 	if (arg_type_is_mem_size(fn->arg1_type) ||
2184 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
2185 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2186 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2187 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2188 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2189 		return false;
2190 
2191 	return true;
2192 }
2193 
2194 static int check_func_proto(const struct bpf_func_proto *fn)
2195 {
2196 	return check_raw_mode_ok(fn) &&
2197 	       check_arg_pair_ok(fn) ? 0 : -EINVAL;
2198 }
2199 
2200 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2201  * are now invalid, so turn them into unknown SCALAR_VALUE.
2202  */
2203 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2204 				     struct bpf_func_state *state)
2205 {
2206 	struct bpf_reg_state *regs = state->regs, *reg;
2207 	int i;
2208 
2209 	for (i = 0; i < MAX_BPF_REG; i++)
2210 		if (reg_is_pkt_pointer_any(&regs[i]))
2211 			mark_reg_unknown(env, regs, i);
2212 
2213 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2214 		if (state->stack[i].slot_type[0] != STACK_SPILL)
2215 			continue;
2216 		reg = &state->stack[i].spilled_ptr;
2217 		if (reg_is_pkt_pointer_any(reg))
2218 			__mark_reg_unknown(reg);
2219 	}
2220 }
2221 
2222 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2223 {
2224 	struct bpf_verifier_state *vstate = env->cur_state;
2225 	int i;
2226 
2227 	for (i = 0; i <= vstate->curframe; i++)
2228 		__clear_all_pkt_pointers(env, vstate->frame[i]);
2229 }
2230 
2231 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2232 			   int *insn_idx)
2233 {
2234 	struct bpf_verifier_state *state = env->cur_state;
2235 	struct bpf_func_state *caller, *callee;
2236 	int i, subprog, target_insn;
2237 
2238 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
2239 		verbose(env, "the call stack of %d frames is too deep\n",
2240 			state->curframe + 2);
2241 		return -E2BIG;
2242 	}
2243 
2244 	target_insn = *insn_idx + insn->imm;
2245 	subprog = find_subprog(env, target_insn + 1);
2246 	if (subprog < 0) {
2247 		verbose(env, "verifier bug. No program starts at insn %d\n",
2248 			target_insn + 1);
2249 		return -EFAULT;
2250 	}
2251 
2252 	caller = state->frame[state->curframe];
2253 	if (state->frame[state->curframe + 1]) {
2254 		verbose(env, "verifier bug. Frame %d already allocated\n",
2255 			state->curframe + 1);
2256 		return -EFAULT;
2257 	}
2258 
2259 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2260 	if (!callee)
2261 		return -ENOMEM;
2262 	state->frame[state->curframe + 1] = callee;
2263 
2264 	/* callee cannot access r0, r6 - r9 for reading and has to write
2265 	 * into its own stack before reading from it.
2266 	 * callee can read/write into caller's stack
2267 	 */
2268 	init_func_state(env, callee,
2269 			/* remember the callsite, it will be used by bpf_exit */
2270 			*insn_idx /* callsite */,
2271 			state->curframe + 1 /* frameno within this callchain */,
2272 			subprog + 1 /* subprog number within this prog */);
2273 
2274 	/* copy r1 - r5 args that callee can access */
2275 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2276 		callee->regs[i] = caller->regs[i];
2277 
2278 	/* after the call regsiters r0 - r5 were scratched */
2279 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
2280 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
2281 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2282 	}
2283 
2284 	/* only increment it after check_reg_arg() finished */
2285 	state->curframe++;
2286 
2287 	/* and go analyze first insn of the callee */
2288 	*insn_idx = target_insn;
2289 
2290 	if (env->log.level) {
2291 		verbose(env, "caller:\n");
2292 		print_verifier_state(env, caller);
2293 		verbose(env, "callee:\n");
2294 		print_verifier_state(env, callee);
2295 	}
2296 	return 0;
2297 }
2298 
2299 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2300 {
2301 	struct bpf_verifier_state *state = env->cur_state;
2302 	struct bpf_func_state *caller, *callee;
2303 	struct bpf_reg_state *r0;
2304 
2305 	callee = state->frame[state->curframe];
2306 	r0 = &callee->regs[BPF_REG_0];
2307 	if (r0->type == PTR_TO_STACK) {
2308 		/* technically it's ok to return caller's stack pointer
2309 		 * (or caller's caller's pointer) back to the caller,
2310 		 * since these pointers are valid. Only current stack
2311 		 * pointer will be invalid as soon as function exits,
2312 		 * but let's be conservative
2313 		 */
2314 		verbose(env, "cannot return stack pointer to the caller\n");
2315 		return -EINVAL;
2316 	}
2317 
2318 	state->curframe--;
2319 	caller = state->frame[state->curframe];
2320 	/* return to the caller whatever r0 had in the callee */
2321 	caller->regs[BPF_REG_0] = *r0;
2322 
2323 	*insn_idx = callee->callsite + 1;
2324 	if (env->log.level) {
2325 		verbose(env, "returning from callee:\n");
2326 		print_verifier_state(env, callee);
2327 		verbose(env, "to caller at %d:\n", *insn_idx);
2328 		print_verifier_state(env, caller);
2329 	}
2330 	/* clear everything in the callee */
2331 	free_func_state(callee);
2332 	state->frame[state->curframe + 1] = NULL;
2333 	return 0;
2334 }
2335 
2336 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
2337 {
2338 	const struct bpf_func_proto *fn = NULL;
2339 	struct bpf_reg_state *regs;
2340 	struct bpf_call_arg_meta meta;
2341 	bool changes_data;
2342 	int i, err;
2343 
2344 	/* find function prototype */
2345 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
2346 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2347 			func_id);
2348 		return -EINVAL;
2349 	}
2350 
2351 	if (env->ops->get_func_proto)
2352 		fn = env->ops->get_func_proto(func_id, env->prog);
2353 	if (!fn) {
2354 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2355 			func_id);
2356 		return -EINVAL;
2357 	}
2358 
2359 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
2360 	if (!env->prog->gpl_compatible && fn->gpl_only) {
2361 		verbose(env, "cannot call GPL only function from proprietary program\n");
2362 		return -EINVAL;
2363 	}
2364 
2365 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
2366 	changes_data = bpf_helper_changes_pkt_data(fn->func);
2367 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2368 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2369 			func_id_name(func_id), func_id);
2370 		return -EINVAL;
2371 	}
2372 
2373 	memset(&meta, 0, sizeof(meta));
2374 	meta.pkt_access = fn->pkt_access;
2375 
2376 	err = check_func_proto(fn);
2377 	if (err) {
2378 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
2379 			func_id_name(func_id), func_id);
2380 		return err;
2381 	}
2382 
2383 	/* check args */
2384 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2385 	if (err)
2386 		return err;
2387 	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
2388 	if (err)
2389 		return err;
2390 	if (func_id == BPF_FUNC_tail_call) {
2391 		if (meta.map_ptr == NULL) {
2392 			verbose(env, "verifier bug\n");
2393 			return -EINVAL;
2394 		}
2395 		env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
2396 	}
2397 	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
2398 	if (err)
2399 		return err;
2400 	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
2401 	if (err)
2402 		return err;
2403 	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
2404 	if (err)
2405 		return err;
2406 
2407 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
2408 	 * is inferred from register state.
2409 	 */
2410 	for (i = 0; i < meta.access_size; i++) {
2411 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
2412 				       BPF_WRITE, -1, false);
2413 		if (err)
2414 			return err;
2415 	}
2416 
2417 	regs = cur_regs(env);
2418 	/* reset caller saved regs */
2419 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
2420 		mark_reg_not_init(env, regs, caller_saved[i]);
2421 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2422 	}
2423 
2424 	/* update return register (already marked as written above) */
2425 	if (fn->ret_type == RET_INTEGER) {
2426 		/* sets type to SCALAR_VALUE */
2427 		mark_reg_unknown(env, regs, BPF_REG_0);
2428 	} else if (fn->ret_type == RET_VOID) {
2429 		regs[BPF_REG_0].type = NOT_INIT;
2430 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
2431 		struct bpf_insn_aux_data *insn_aux;
2432 
2433 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2434 		/* There is no offset yet applied, variable or fixed */
2435 		mark_reg_known_zero(env, regs, BPF_REG_0);
2436 		regs[BPF_REG_0].off = 0;
2437 		/* remember map_ptr, so that check_map_access()
2438 		 * can check 'value_size' boundary of memory access
2439 		 * to map element returned from bpf_map_lookup_elem()
2440 		 */
2441 		if (meta.map_ptr == NULL) {
2442 			verbose(env,
2443 				"kernel subsystem misconfigured verifier\n");
2444 			return -EINVAL;
2445 		}
2446 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
2447 		regs[BPF_REG_0].id = ++env->id_gen;
2448 		insn_aux = &env->insn_aux_data[insn_idx];
2449 		if (!insn_aux->map_ptr)
2450 			insn_aux->map_ptr = meta.map_ptr;
2451 		else if (insn_aux->map_ptr != meta.map_ptr)
2452 			insn_aux->map_ptr = BPF_MAP_PTR_POISON;
2453 	} else {
2454 		verbose(env, "unknown return type %d of func %s#%d\n",
2455 			fn->ret_type, func_id_name(func_id), func_id);
2456 		return -EINVAL;
2457 	}
2458 
2459 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
2460 	if (err)
2461 		return err;
2462 
2463 	if (changes_data)
2464 		clear_all_pkt_pointers(env);
2465 	return 0;
2466 }
2467 
2468 static bool signed_add_overflows(s64 a, s64 b)
2469 {
2470 	/* Do the add in u64, where overflow is well-defined */
2471 	s64 res = (s64)((u64)a + (u64)b);
2472 
2473 	if (b < 0)
2474 		return res > a;
2475 	return res < a;
2476 }
2477 
2478 static bool signed_sub_overflows(s64 a, s64 b)
2479 {
2480 	/* Do the sub in u64, where overflow is well-defined */
2481 	s64 res = (s64)((u64)a - (u64)b);
2482 
2483 	if (b < 0)
2484 		return res < a;
2485 	return res > a;
2486 }
2487 
2488 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2489 				  const struct bpf_reg_state *reg,
2490 				  enum bpf_reg_type type)
2491 {
2492 	bool known = tnum_is_const(reg->var_off);
2493 	s64 val = reg->var_off.value;
2494 	s64 smin = reg->smin_value;
2495 
2496 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2497 		verbose(env, "math between %s pointer and %lld is not allowed\n",
2498 			reg_type_str[type], val);
2499 		return false;
2500 	}
2501 
2502 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2503 		verbose(env, "%s pointer offset %d is not allowed\n",
2504 			reg_type_str[type], reg->off);
2505 		return false;
2506 	}
2507 
2508 	if (smin == S64_MIN) {
2509 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2510 			reg_type_str[type]);
2511 		return false;
2512 	}
2513 
2514 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2515 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
2516 			smin, reg_type_str[type]);
2517 		return false;
2518 	}
2519 
2520 	return true;
2521 }
2522 
2523 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2524  * Caller should also handle BPF_MOV case separately.
2525  * If we return -EACCES, caller may want to try again treating pointer as a
2526  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
2527  */
2528 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2529 				   struct bpf_insn *insn,
2530 				   const struct bpf_reg_state *ptr_reg,
2531 				   const struct bpf_reg_state *off_reg)
2532 {
2533 	struct bpf_verifier_state *vstate = env->cur_state;
2534 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2535 	struct bpf_reg_state *regs = state->regs, *dst_reg;
2536 	bool known = tnum_is_const(off_reg->var_off);
2537 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2538 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2539 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
2540 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
2541 	u8 opcode = BPF_OP(insn->code);
2542 	u32 dst = insn->dst_reg;
2543 
2544 	dst_reg = &regs[dst];
2545 
2546 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
2547 	    smin_val > smax_val || umin_val > umax_val) {
2548 		/* Taint dst register if offset had invalid bounds derived from
2549 		 * e.g. dead branches.
2550 		 */
2551 		__mark_reg_unknown(dst_reg);
2552 		return 0;
2553 	}
2554 
2555 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
2556 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
2557 		verbose(env,
2558 			"R%d 32-bit pointer arithmetic prohibited\n",
2559 			dst);
2560 		return -EACCES;
2561 	}
2562 
2563 	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2564 		verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2565 			dst);
2566 		return -EACCES;
2567 	}
2568 	if (ptr_reg->type == CONST_PTR_TO_MAP) {
2569 		verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2570 			dst);
2571 		return -EACCES;
2572 	}
2573 	if (ptr_reg->type == PTR_TO_PACKET_END) {
2574 		verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2575 			dst);
2576 		return -EACCES;
2577 	}
2578 
2579 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2580 	 * The id may be overwritten later if we create a new variable offset.
2581 	 */
2582 	dst_reg->type = ptr_reg->type;
2583 	dst_reg->id = ptr_reg->id;
2584 
2585 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
2586 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
2587 		return -EINVAL;
2588 
2589 	switch (opcode) {
2590 	case BPF_ADD:
2591 		/* We can take a fixed offset as long as it doesn't overflow
2592 		 * the s32 'off' field
2593 		 */
2594 		if (known && (ptr_reg->off + smin_val ==
2595 			      (s64)(s32)(ptr_reg->off + smin_val))) {
2596 			/* pointer += K.  Accumulate it into fixed offset */
2597 			dst_reg->smin_value = smin_ptr;
2598 			dst_reg->smax_value = smax_ptr;
2599 			dst_reg->umin_value = umin_ptr;
2600 			dst_reg->umax_value = umax_ptr;
2601 			dst_reg->var_off = ptr_reg->var_off;
2602 			dst_reg->off = ptr_reg->off + smin_val;
2603 			dst_reg->range = ptr_reg->range;
2604 			break;
2605 		}
2606 		/* A new variable offset is created.  Note that off_reg->off
2607 		 * == 0, since it's a scalar.
2608 		 * dst_reg gets the pointer type and since some positive
2609 		 * integer value was added to the pointer, give it a new 'id'
2610 		 * if it's a PTR_TO_PACKET.
2611 		 * this creates a new 'base' pointer, off_reg (variable) gets
2612 		 * added into the variable offset, and we copy the fixed offset
2613 		 * from ptr_reg.
2614 		 */
2615 		if (signed_add_overflows(smin_ptr, smin_val) ||
2616 		    signed_add_overflows(smax_ptr, smax_val)) {
2617 			dst_reg->smin_value = S64_MIN;
2618 			dst_reg->smax_value = S64_MAX;
2619 		} else {
2620 			dst_reg->smin_value = smin_ptr + smin_val;
2621 			dst_reg->smax_value = smax_ptr + smax_val;
2622 		}
2623 		if (umin_ptr + umin_val < umin_ptr ||
2624 		    umax_ptr + umax_val < umax_ptr) {
2625 			dst_reg->umin_value = 0;
2626 			dst_reg->umax_value = U64_MAX;
2627 		} else {
2628 			dst_reg->umin_value = umin_ptr + umin_val;
2629 			dst_reg->umax_value = umax_ptr + umax_val;
2630 		}
2631 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
2632 		dst_reg->off = ptr_reg->off;
2633 		if (reg_is_pkt_pointer(ptr_reg)) {
2634 			dst_reg->id = ++env->id_gen;
2635 			/* something was added to pkt_ptr, set range to zero */
2636 			dst_reg->range = 0;
2637 		}
2638 		break;
2639 	case BPF_SUB:
2640 		if (dst_reg == off_reg) {
2641 			/* scalar -= pointer.  Creates an unknown scalar */
2642 			verbose(env, "R%d tried to subtract pointer from scalar\n",
2643 				dst);
2644 			return -EACCES;
2645 		}
2646 		/* We don't allow subtraction from FP, because (according to
2647 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
2648 		 * be able to deal with it.
2649 		 */
2650 		if (ptr_reg->type == PTR_TO_STACK) {
2651 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
2652 				dst);
2653 			return -EACCES;
2654 		}
2655 		if (known && (ptr_reg->off - smin_val ==
2656 			      (s64)(s32)(ptr_reg->off - smin_val))) {
2657 			/* pointer -= K.  Subtract it from fixed offset */
2658 			dst_reg->smin_value = smin_ptr;
2659 			dst_reg->smax_value = smax_ptr;
2660 			dst_reg->umin_value = umin_ptr;
2661 			dst_reg->umax_value = umax_ptr;
2662 			dst_reg->var_off = ptr_reg->var_off;
2663 			dst_reg->id = ptr_reg->id;
2664 			dst_reg->off = ptr_reg->off - smin_val;
2665 			dst_reg->range = ptr_reg->range;
2666 			break;
2667 		}
2668 		/* A new variable offset is created.  If the subtrahend is known
2669 		 * nonnegative, then any reg->range we had before is still good.
2670 		 */
2671 		if (signed_sub_overflows(smin_ptr, smax_val) ||
2672 		    signed_sub_overflows(smax_ptr, smin_val)) {
2673 			/* Overflow possible, we know nothing */
2674 			dst_reg->smin_value = S64_MIN;
2675 			dst_reg->smax_value = S64_MAX;
2676 		} else {
2677 			dst_reg->smin_value = smin_ptr - smax_val;
2678 			dst_reg->smax_value = smax_ptr - smin_val;
2679 		}
2680 		if (umin_ptr < umax_val) {
2681 			/* Overflow possible, we know nothing */
2682 			dst_reg->umin_value = 0;
2683 			dst_reg->umax_value = U64_MAX;
2684 		} else {
2685 			/* Cannot overflow (as long as bounds are consistent) */
2686 			dst_reg->umin_value = umin_ptr - umax_val;
2687 			dst_reg->umax_value = umax_ptr - umin_val;
2688 		}
2689 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
2690 		dst_reg->off = ptr_reg->off;
2691 		if (reg_is_pkt_pointer(ptr_reg)) {
2692 			dst_reg->id = ++env->id_gen;
2693 			/* something was added to pkt_ptr, set range to zero */
2694 			if (smin_val < 0)
2695 				dst_reg->range = 0;
2696 		}
2697 		break;
2698 	case BPF_AND:
2699 	case BPF_OR:
2700 	case BPF_XOR:
2701 		/* bitwise ops on pointers are troublesome, prohibit. */
2702 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2703 			dst, bpf_alu_string[opcode >> 4]);
2704 		return -EACCES;
2705 	default:
2706 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
2707 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2708 			dst, bpf_alu_string[opcode >> 4]);
2709 		return -EACCES;
2710 	}
2711 
2712 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2713 		return -EINVAL;
2714 
2715 	__update_reg_bounds(dst_reg);
2716 	__reg_deduce_bounds(dst_reg);
2717 	__reg_bound_offset(dst_reg);
2718 	return 0;
2719 }
2720 
2721 /* WARNING: This function does calculations on 64-bit values, but the actual
2722  * execution may occur on 32-bit values. Therefore, things like bitshifts
2723  * need extra checks in the 32-bit case.
2724  */
2725 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2726 				      struct bpf_insn *insn,
2727 				      struct bpf_reg_state *dst_reg,
2728 				      struct bpf_reg_state src_reg)
2729 {
2730 	struct bpf_reg_state *regs = cur_regs(env);
2731 	u8 opcode = BPF_OP(insn->code);
2732 	bool src_known, dst_known;
2733 	s64 smin_val, smax_val;
2734 	u64 umin_val, umax_val;
2735 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2736 
2737 	smin_val = src_reg.smin_value;
2738 	smax_val = src_reg.smax_value;
2739 	umin_val = src_reg.umin_value;
2740 	umax_val = src_reg.umax_value;
2741 	src_known = tnum_is_const(src_reg.var_off);
2742 	dst_known = tnum_is_const(dst_reg->var_off);
2743 
2744 	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
2745 	    smin_val > smax_val || umin_val > umax_val) {
2746 		/* Taint dst register if offset had invalid bounds derived from
2747 		 * e.g. dead branches.
2748 		 */
2749 		__mark_reg_unknown(dst_reg);
2750 		return 0;
2751 	}
2752 
2753 	if (!src_known &&
2754 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2755 		__mark_reg_unknown(dst_reg);
2756 		return 0;
2757 	}
2758 
2759 	switch (opcode) {
2760 	case BPF_ADD:
2761 		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
2762 		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
2763 			dst_reg->smin_value = S64_MIN;
2764 			dst_reg->smax_value = S64_MAX;
2765 		} else {
2766 			dst_reg->smin_value += smin_val;
2767 			dst_reg->smax_value += smax_val;
2768 		}
2769 		if (dst_reg->umin_value + umin_val < umin_val ||
2770 		    dst_reg->umax_value + umax_val < umax_val) {
2771 			dst_reg->umin_value = 0;
2772 			dst_reg->umax_value = U64_MAX;
2773 		} else {
2774 			dst_reg->umin_value += umin_val;
2775 			dst_reg->umax_value += umax_val;
2776 		}
2777 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
2778 		break;
2779 	case BPF_SUB:
2780 		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
2781 		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
2782 			/* Overflow possible, we know nothing */
2783 			dst_reg->smin_value = S64_MIN;
2784 			dst_reg->smax_value = S64_MAX;
2785 		} else {
2786 			dst_reg->smin_value -= smax_val;
2787 			dst_reg->smax_value -= smin_val;
2788 		}
2789 		if (dst_reg->umin_value < umax_val) {
2790 			/* Overflow possible, we know nothing */
2791 			dst_reg->umin_value = 0;
2792 			dst_reg->umax_value = U64_MAX;
2793 		} else {
2794 			/* Cannot overflow (as long as bounds are consistent) */
2795 			dst_reg->umin_value -= umax_val;
2796 			dst_reg->umax_value -= umin_val;
2797 		}
2798 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
2799 		break;
2800 	case BPF_MUL:
2801 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
2802 		if (smin_val < 0 || dst_reg->smin_value < 0) {
2803 			/* Ain't nobody got time to multiply that sign */
2804 			__mark_reg_unbounded(dst_reg);
2805 			__update_reg_bounds(dst_reg);
2806 			break;
2807 		}
2808 		/* Both values are positive, so we can work with unsigned and
2809 		 * copy the result to signed (unless it exceeds S64_MAX).
2810 		 */
2811 		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
2812 			/* Potential overflow, we know nothing */
2813 			__mark_reg_unbounded(dst_reg);
2814 			/* (except what we can learn from the var_off) */
2815 			__update_reg_bounds(dst_reg);
2816 			break;
2817 		}
2818 		dst_reg->umin_value *= umin_val;
2819 		dst_reg->umax_value *= umax_val;
2820 		if (dst_reg->umax_value > S64_MAX) {
2821 			/* Overflow possible, we know nothing */
2822 			dst_reg->smin_value = S64_MIN;
2823 			dst_reg->smax_value = S64_MAX;
2824 		} else {
2825 			dst_reg->smin_value = dst_reg->umin_value;
2826 			dst_reg->smax_value = dst_reg->umax_value;
2827 		}
2828 		break;
2829 	case BPF_AND:
2830 		if (src_known && dst_known) {
2831 			__mark_reg_known(dst_reg, dst_reg->var_off.value &
2832 						  src_reg.var_off.value);
2833 			break;
2834 		}
2835 		/* We get our minimum from the var_off, since that's inherently
2836 		 * bitwise.  Our maximum is the minimum of the operands' maxima.
2837 		 */
2838 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
2839 		dst_reg->umin_value = dst_reg->var_off.value;
2840 		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
2841 		if (dst_reg->smin_value < 0 || smin_val < 0) {
2842 			/* Lose signed bounds when ANDing negative numbers,
2843 			 * ain't nobody got time for that.
2844 			 */
2845 			dst_reg->smin_value = S64_MIN;
2846 			dst_reg->smax_value = S64_MAX;
2847 		} else {
2848 			/* ANDing two positives gives a positive, so safe to
2849 			 * cast result into s64.
2850 			 */
2851 			dst_reg->smin_value = dst_reg->umin_value;
2852 			dst_reg->smax_value = dst_reg->umax_value;
2853 		}
2854 		/* We may learn something more from the var_off */
2855 		__update_reg_bounds(dst_reg);
2856 		break;
2857 	case BPF_OR:
2858 		if (src_known && dst_known) {
2859 			__mark_reg_known(dst_reg, dst_reg->var_off.value |
2860 						  src_reg.var_off.value);
2861 			break;
2862 		}
2863 		/* We get our maximum from the var_off, and our minimum is the
2864 		 * maximum of the operands' minima
2865 		 */
2866 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
2867 		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
2868 		dst_reg->umax_value = dst_reg->var_off.value |
2869 				      dst_reg->var_off.mask;
2870 		if (dst_reg->smin_value < 0 || smin_val < 0) {
2871 			/* Lose signed bounds when ORing negative numbers,
2872 			 * ain't nobody got time for that.
2873 			 */
2874 			dst_reg->smin_value = S64_MIN;
2875 			dst_reg->smax_value = S64_MAX;
2876 		} else {
2877 			/* ORing two positives gives a positive, so safe to
2878 			 * cast result into s64.
2879 			 */
2880 			dst_reg->smin_value = dst_reg->umin_value;
2881 			dst_reg->smax_value = dst_reg->umax_value;
2882 		}
2883 		/* We may learn something more from the var_off */
2884 		__update_reg_bounds(dst_reg);
2885 		break;
2886 	case BPF_LSH:
2887 		if (umax_val >= insn_bitness) {
2888 			/* Shifts greater than 31 or 63 are undefined.
2889 			 * This includes shifts by a negative number.
2890 			 */
2891 			mark_reg_unknown(env, regs, insn->dst_reg);
2892 			break;
2893 		}
2894 		/* We lose all sign bit information (except what we can pick
2895 		 * up from var_off)
2896 		 */
2897 		dst_reg->smin_value = S64_MIN;
2898 		dst_reg->smax_value = S64_MAX;
2899 		/* If we might shift our top bit out, then we know nothing */
2900 		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
2901 			dst_reg->umin_value = 0;
2902 			dst_reg->umax_value = U64_MAX;
2903 		} else {
2904 			dst_reg->umin_value <<= umin_val;
2905 			dst_reg->umax_value <<= umax_val;
2906 		}
2907 		if (src_known)
2908 			dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
2909 		else
2910 			dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
2911 		/* We may learn something more from the var_off */
2912 		__update_reg_bounds(dst_reg);
2913 		break;
2914 	case BPF_RSH:
2915 		if (umax_val >= insn_bitness) {
2916 			/* Shifts greater than 31 or 63 are undefined.
2917 			 * This includes shifts by a negative number.
2918 			 */
2919 			mark_reg_unknown(env, regs, insn->dst_reg);
2920 			break;
2921 		}
2922 		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
2923 		 * be negative, then either:
2924 		 * 1) src_reg might be zero, so the sign bit of the result is
2925 		 *    unknown, so we lose our signed bounds
2926 		 * 2) it's known negative, thus the unsigned bounds capture the
2927 		 *    signed bounds
2928 		 * 3) the signed bounds cross zero, so they tell us nothing
2929 		 *    about the result
2930 		 * If the value in dst_reg is known nonnegative, then again the
2931 		 * unsigned bounts capture the signed bounds.
2932 		 * Thus, in all cases it suffices to blow away our signed bounds
2933 		 * and rely on inferring new ones from the unsigned bounds and
2934 		 * var_off of the result.
2935 		 */
2936 		dst_reg->smin_value = S64_MIN;
2937 		dst_reg->smax_value = S64_MAX;
2938 		if (src_known)
2939 			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2940 						       umin_val);
2941 		else
2942 			dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
2943 		dst_reg->umin_value >>= umax_val;
2944 		dst_reg->umax_value >>= umin_val;
2945 		/* We may learn something more from the var_off */
2946 		__update_reg_bounds(dst_reg);
2947 		break;
2948 	default:
2949 		mark_reg_unknown(env, regs, insn->dst_reg);
2950 		break;
2951 	}
2952 
2953 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
2954 		/* 32-bit ALU ops are (32,32)->32 */
2955 		coerce_reg_to_size(dst_reg, 4);
2956 		coerce_reg_to_size(&src_reg, 4);
2957 	}
2958 
2959 	__reg_deduce_bounds(dst_reg);
2960 	__reg_bound_offset(dst_reg);
2961 	return 0;
2962 }
2963 
2964 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2965  * and var_off.
2966  */
2967 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2968 				   struct bpf_insn *insn)
2969 {
2970 	struct bpf_verifier_state *vstate = env->cur_state;
2971 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2972 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2973 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2974 	u8 opcode = BPF_OP(insn->code);
2975 
2976 	dst_reg = &regs[insn->dst_reg];
2977 	src_reg = NULL;
2978 	if (dst_reg->type != SCALAR_VALUE)
2979 		ptr_reg = dst_reg;
2980 	if (BPF_SRC(insn->code) == BPF_X) {
2981 		src_reg = &regs[insn->src_reg];
2982 		if (src_reg->type != SCALAR_VALUE) {
2983 			if (dst_reg->type != SCALAR_VALUE) {
2984 				/* Combining two pointers by any ALU op yields
2985 				 * an arbitrary scalar. Disallow all math except
2986 				 * pointer subtraction
2987 				 */
2988 				if (opcode == BPF_SUB){
2989 					mark_reg_unknown(env, regs, insn->dst_reg);
2990 					return 0;
2991 				}
2992 				verbose(env, "R%d pointer %s pointer prohibited\n",
2993 					insn->dst_reg,
2994 					bpf_alu_string[opcode >> 4]);
2995 				return -EACCES;
2996 			} else {
2997 				/* scalar += pointer
2998 				 * This is legal, but we have to reverse our
2999 				 * src/dest handling in computing the range
3000 				 */
3001 				return adjust_ptr_min_max_vals(env, insn,
3002 							       src_reg, dst_reg);
3003 			}
3004 		} else if (ptr_reg) {
3005 			/* pointer += scalar */
3006 			return adjust_ptr_min_max_vals(env, insn,
3007 						       dst_reg, src_reg);
3008 		}
3009 	} else {
3010 		/* Pretend the src is a reg with a known value, since we only
3011 		 * need to be able to read from this state.
3012 		 */
3013 		off_reg.type = SCALAR_VALUE;
3014 		__mark_reg_known(&off_reg, insn->imm);
3015 		src_reg = &off_reg;
3016 		if (ptr_reg) /* pointer += K */
3017 			return adjust_ptr_min_max_vals(env, insn,
3018 						       ptr_reg, src_reg);
3019 	}
3020 
3021 	/* Got here implies adding two SCALAR_VALUEs */
3022 	if (WARN_ON_ONCE(ptr_reg)) {
3023 		print_verifier_state(env, state);
3024 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
3025 		return -EINVAL;
3026 	}
3027 	if (WARN_ON(!src_reg)) {
3028 		print_verifier_state(env, state);
3029 		verbose(env, "verifier internal error: no src_reg\n");
3030 		return -EINVAL;
3031 	}
3032 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
3033 }
3034 
3035 /* check validity of 32-bit and 64-bit arithmetic operations */
3036 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
3037 {
3038 	struct bpf_reg_state *regs = cur_regs(env);
3039 	u8 opcode = BPF_OP(insn->code);
3040 	int err;
3041 
3042 	if (opcode == BPF_END || opcode == BPF_NEG) {
3043 		if (opcode == BPF_NEG) {
3044 			if (BPF_SRC(insn->code) != 0 ||
3045 			    insn->src_reg != BPF_REG_0 ||
3046 			    insn->off != 0 || insn->imm != 0) {
3047 				verbose(env, "BPF_NEG uses reserved fields\n");
3048 				return -EINVAL;
3049 			}
3050 		} else {
3051 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
3052 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3053 			    BPF_CLASS(insn->code) == BPF_ALU64) {
3054 				verbose(env, "BPF_END uses reserved fields\n");
3055 				return -EINVAL;
3056 			}
3057 		}
3058 
3059 		/* check src operand */
3060 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3061 		if (err)
3062 			return err;
3063 
3064 		if (is_pointer_value(env, insn->dst_reg)) {
3065 			verbose(env, "R%d pointer arithmetic prohibited\n",
3066 				insn->dst_reg);
3067 			return -EACCES;
3068 		}
3069 
3070 		/* check dest operand */
3071 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
3072 		if (err)
3073 			return err;
3074 
3075 	} else if (opcode == BPF_MOV) {
3076 
3077 		if (BPF_SRC(insn->code) == BPF_X) {
3078 			if (insn->imm != 0 || insn->off != 0) {
3079 				verbose(env, "BPF_MOV uses reserved fields\n");
3080 				return -EINVAL;
3081 			}
3082 
3083 			/* check src operand */
3084 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
3085 			if (err)
3086 				return err;
3087 		} else {
3088 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
3089 				verbose(env, "BPF_MOV uses reserved fields\n");
3090 				return -EINVAL;
3091 			}
3092 		}
3093 
3094 		/* check dest operand */
3095 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
3096 		if (err)
3097 			return err;
3098 
3099 		if (BPF_SRC(insn->code) == BPF_X) {
3100 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
3101 				/* case: R1 = R2
3102 				 * copy register state to dest reg
3103 				 */
3104 				regs[insn->dst_reg] = regs[insn->src_reg];
3105 				regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
3106 			} else {
3107 				/* R1 = (u32) R2 */
3108 				if (is_pointer_value(env, insn->src_reg)) {
3109 					verbose(env,
3110 						"R%d partial copy of pointer\n",
3111 						insn->src_reg);
3112 					return -EACCES;
3113 				}
3114 				mark_reg_unknown(env, regs, insn->dst_reg);
3115 				coerce_reg_to_size(&regs[insn->dst_reg], 4);
3116 			}
3117 		} else {
3118 			/* case: R = imm
3119 			 * remember the value we stored into this reg
3120 			 */
3121 			regs[insn->dst_reg].type = SCALAR_VALUE;
3122 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
3123 				__mark_reg_known(regs + insn->dst_reg,
3124 						 insn->imm);
3125 			} else {
3126 				__mark_reg_known(regs + insn->dst_reg,
3127 						 (u32)insn->imm);
3128 			}
3129 		}
3130 
3131 	} else if (opcode > BPF_END) {
3132 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
3133 		return -EINVAL;
3134 
3135 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
3136 
3137 		if (BPF_SRC(insn->code) == BPF_X) {
3138 			if (insn->imm != 0 || insn->off != 0) {
3139 				verbose(env, "BPF_ALU uses reserved fields\n");
3140 				return -EINVAL;
3141 			}
3142 			/* check src1 operand */
3143 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
3144 			if (err)
3145 				return err;
3146 		} else {
3147 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
3148 				verbose(env, "BPF_ALU uses reserved fields\n");
3149 				return -EINVAL;
3150 			}
3151 		}
3152 
3153 		/* check src2 operand */
3154 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3155 		if (err)
3156 			return err;
3157 
3158 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3159 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
3160 			verbose(env, "div by zero\n");
3161 			return -EINVAL;
3162 		}
3163 
3164 		if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
3165 			verbose(env, "BPF_ARSH not supported for 32 bit ALU\n");
3166 			return -EINVAL;
3167 		}
3168 
3169 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
3170 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
3171 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
3172 
3173 			if (insn->imm < 0 || insn->imm >= size) {
3174 				verbose(env, "invalid shift %d\n", insn->imm);
3175 				return -EINVAL;
3176 			}
3177 		}
3178 
3179 		/* check dest operand */
3180 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
3181 		if (err)
3182 			return err;
3183 
3184 		return adjust_reg_min_max_vals(env, insn);
3185 	}
3186 
3187 	return 0;
3188 }
3189 
3190 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
3191 				   struct bpf_reg_state *dst_reg,
3192 				   enum bpf_reg_type type,
3193 				   bool range_right_open)
3194 {
3195 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3196 	struct bpf_reg_state *regs = state->regs, *reg;
3197 	u16 new_range;
3198 	int i, j;
3199 
3200 	if (dst_reg->off < 0 ||
3201 	    (dst_reg->off == 0 && range_right_open))
3202 		/* This doesn't give us any range */
3203 		return;
3204 
3205 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
3206 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
3207 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
3208 		 * than pkt_end, but that's because it's also less than pkt.
3209 		 */
3210 		return;
3211 
3212 	new_range = dst_reg->off;
3213 	if (range_right_open)
3214 		new_range--;
3215 
3216 	/* Examples for register markings:
3217 	 *
3218 	 * pkt_data in dst register:
3219 	 *
3220 	 *   r2 = r3;
3221 	 *   r2 += 8;
3222 	 *   if (r2 > pkt_end) goto <handle exception>
3223 	 *   <access okay>
3224 	 *
3225 	 *   r2 = r3;
3226 	 *   r2 += 8;
3227 	 *   if (r2 < pkt_end) goto <access okay>
3228 	 *   <handle exception>
3229 	 *
3230 	 *   Where:
3231 	 *     r2 == dst_reg, pkt_end == src_reg
3232 	 *     r2=pkt(id=n,off=8,r=0)
3233 	 *     r3=pkt(id=n,off=0,r=0)
3234 	 *
3235 	 * pkt_data in src register:
3236 	 *
3237 	 *   r2 = r3;
3238 	 *   r2 += 8;
3239 	 *   if (pkt_end >= r2) goto <access okay>
3240 	 *   <handle exception>
3241 	 *
3242 	 *   r2 = r3;
3243 	 *   r2 += 8;
3244 	 *   if (pkt_end <= r2) goto <handle exception>
3245 	 *   <access okay>
3246 	 *
3247 	 *   Where:
3248 	 *     pkt_end == dst_reg, r2 == src_reg
3249 	 *     r2=pkt(id=n,off=8,r=0)
3250 	 *     r3=pkt(id=n,off=0,r=0)
3251 	 *
3252 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
3253 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3254 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
3255 	 * the check.
3256 	 */
3257 
3258 	/* If our ids match, then we must have the same max_value.  And we
3259 	 * don't care about the other reg's fixed offset, since if it's too big
3260 	 * the range won't allow anything.
3261 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3262 	 */
3263 	for (i = 0; i < MAX_BPF_REG; i++)
3264 		if (regs[i].type == type && regs[i].id == dst_reg->id)
3265 			/* keep the maximum range already checked */
3266 			regs[i].range = max(regs[i].range, new_range);
3267 
3268 	for (j = 0; j <= vstate->curframe; j++) {
3269 		state = vstate->frame[j];
3270 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3271 			if (state->stack[i].slot_type[0] != STACK_SPILL)
3272 				continue;
3273 			reg = &state->stack[i].spilled_ptr;
3274 			if (reg->type == type && reg->id == dst_reg->id)
3275 				reg->range = max(reg->range, new_range);
3276 		}
3277 	}
3278 }
3279 
3280 /* Adjusts the register min/max values in the case that the dst_reg is the
3281  * variable register that we are working on, and src_reg is a constant or we're
3282  * simply doing a BPF_K check.
3283  * In JEQ/JNE cases we also adjust the var_off values.
3284  */
3285 static void reg_set_min_max(struct bpf_reg_state *true_reg,
3286 			    struct bpf_reg_state *false_reg, u64 val,
3287 			    u8 opcode)
3288 {
3289 	/* If the dst_reg is a pointer, we can't learn anything about its
3290 	 * variable offset from the compare (unless src_reg were a pointer into
3291 	 * the same object, but we don't bother with that.
3292 	 * Since false_reg and true_reg have the same type by construction, we
3293 	 * only need to check one of them for pointerness.
3294 	 */
3295 	if (__is_pointer_value(false, false_reg))
3296 		return;
3297 
3298 	switch (opcode) {
3299 	case BPF_JEQ:
3300 		/* If this is false then we know nothing Jon Snow, but if it is
3301 		 * true then we know for sure.
3302 		 */
3303 		__mark_reg_known(true_reg, val);
3304 		break;
3305 	case BPF_JNE:
3306 		/* If this is true we know nothing Jon Snow, but if it is false
3307 		 * we know the value for sure;
3308 		 */
3309 		__mark_reg_known(false_reg, val);
3310 		break;
3311 	case BPF_JGT:
3312 		false_reg->umax_value = min(false_reg->umax_value, val);
3313 		true_reg->umin_value = max(true_reg->umin_value, val + 1);
3314 		break;
3315 	case BPF_JSGT:
3316 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3317 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3318 		break;
3319 	case BPF_JLT:
3320 		false_reg->umin_value = max(false_reg->umin_value, val);
3321 		true_reg->umax_value = min(true_reg->umax_value, val - 1);
3322 		break;
3323 	case BPF_JSLT:
3324 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3325 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3326 		break;
3327 	case BPF_JGE:
3328 		false_reg->umax_value = min(false_reg->umax_value, val - 1);
3329 		true_reg->umin_value = max(true_reg->umin_value, val);
3330 		break;
3331 	case BPF_JSGE:
3332 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3333 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3334 		break;
3335 	case BPF_JLE:
3336 		false_reg->umin_value = max(false_reg->umin_value, val + 1);
3337 		true_reg->umax_value = min(true_reg->umax_value, val);
3338 		break;
3339 	case BPF_JSLE:
3340 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3341 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3342 		break;
3343 	default:
3344 		break;
3345 	}
3346 
3347 	__reg_deduce_bounds(false_reg);
3348 	__reg_deduce_bounds(true_reg);
3349 	/* We might have learned some bits from the bounds. */
3350 	__reg_bound_offset(false_reg);
3351 	__reg_bound_offset(true_reg);
3352 	/* Intersecting with the old var_off might have improved our bounds
3353 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3354 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3355 	 */
3356 	__update_reg_bounds(false_reg);
3357 	__update_reg_bounds(true_reg);
3358 }
3359 
3360 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
3361  * the variable reg.
3362  */
3363 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3364 				struct bpf_reg_state *false_reg, u64 val,
3365 				u8 opcode)
3366 {
3367 	if (__is_pointer_value(false, false_reg))
3368 		return;
3369 
3370 	switch (opcode) {
3371 	case BPF_JEQ:
3372 		/* If this is false then we know nothing Jon Snow, but if it is
3373 		 * true then we know for sure.
3374 		 */
3375 		__mark_reg_known(true_reg, val);
3376 		break;
3377 	case BPF_JNE:
3378 		/* If this is true we know nothing Jon Snow, but if it is false
3379 		 * we know the value for sure;
3380 		 */
3381 		__mark_reg_known(false_reg, val);
3382 		break;
3383 	case BPF_JGT:
3384 		true_reg->umax_value = min(true_reg->umax_value, val - 1);
3385 		false_reg->umin_value = max(false_reg->umin_value, val);
3386 		break;
3387 	case BPF_JSGT:
3388 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3389 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3390 		break;
3391 	case BPF_JLT:
3392 		true_reg->umin_value = max(true_reg->umin_value, val + 1);
3393 		false_reg->umax_value = min(false_reg->umax_value, val);
3394 		break;
3395 	case BPF_JSLT:
3396 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3397 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3398 		break;
3399 	case BPF_JGE:
3400 		true_reg->umax_value = min(true_reg->umax_value, val);
3401 		false_reg->umin_value = max(false_reg->umin_value, val + 1);
3402 		break;
3403 	case BPF_JSGE:
3404 		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3405 		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3406 		break;
3407 	case BPF_JLE:
3408 		true_reg->umin_value = max(true_reg->umin_value, val);
3409 		false_reg->umax_value = min(false_reg->umax_value, val - 1);
3410 		break;
3411 	case BPF_JSLE:
3412 		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3413 		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3414 		break;
3415 	default:
3416 		break;
3417 	}
3418 
3419 	__reg_deduce_bounds(false_reg);
3420 	__reg_deduce_bounds(true_reg);
3421 	/* We might have learned some bits from the bounds. */
3422 	__reg_bound_offset(false_reg);
3423 	__reg_bound_offset(true_reg);
3424 	/* Intersecting with the old var_off might have improved our bounds
3425 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3426 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3427 	 */
3428 	__update_reg_bounds(false_reg);
3429 	__update_reg_bounds(true_reg);
3430 }
3431 
3432 /* Regs are known to be equal, so intersect their min/max/var_off */
3433 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3434 				  struct bpf_reg_state *dst_reg)
3435 {
3436 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
3437 							dst_reg->umin_value);
3438 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
3439 							dst_reg->umax_value);
3440 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
3441 							dst_reg->smin_value);
3442 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
3443 							dst_reg->smax_value);
3444 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
3445 							     dst_reg->var_off);
3446 	/* We might have learned new bounds from the var_off. */
3447 	__update_reg_bounds(src_reg);
3448 	__update_reg_bounds(dst_reg);
3449 	/* We might have learned something about the sign bit. */
3450 	__reg_deduce_bounds(src_reg);
3451 	__reg_deduce_bounds(dst_reg);
3452 	/* We might have learned some bits from the bounds. */
3453 	__reg_bound_offset(src_reg);
3454 	__reg_bound_offset(dst_reg);
3455 	/* Intersecting with the old var_off might have improved our bounds
3456 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3457 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
3458 	 */
3459 	__update_reg_bounds(src_reg);
3460 	__update_reg_bounds(dst_reg);
3461 }
3462 
3463 static void reg_combine_min_max(struct bpf_reg_state *true_src,
3464 				struct bpf_reg_state *true_dst,
3465 				struct bpf_reg_state *false_src,
3466 				struct bpf_reg_state *false_dst,
3467 				u8 opcode)
3468 {
3469 	switch (opcode) {
3470 	case BPF_JEQ:
3471 		__reg_combine_min_max(true_src, true_dst);
3472 		break;
3473 	case BPF_JNE:
3474 		__reg_combine_min_max(false_src, false_dst);
3475 		break;
3476 	}
3477 }
3478 
3479 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
3480 			 bool is_null)
3481 {
3482 	struct bpf_reg_state *reg = &regs[regno];
3483 
3484 	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
3485 		/* Old offset (both fixed and variable parts) should
3486 		 * have been known-zero, because we don't allow pointer
3487 		 * arithmetic on pointers that might be NULL.
3488 		 */
3489 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
3490 				 !tnum_equals_const(reg->var_off, 0) ||
3491 				 reg->off)) {
3492 			__mark_reg_known_zero(reg);
3493 			reg->off = 0;
3494 		}
3495 		if (is_null) {
3496 			reg->type = SCALAR_VALUE;
3497 		} else if (reg->map_ptr->inner_map_meta) {
3498 			reg->type = CONST_PTR_TO_MAP;
3499 			reg->map_ptr = reg->map_ptr->inner_map_meta;
3500 		} else {
3501 			reg->type = PTR_TO_MAP_VALUE;
3502 		}
3503 		/* We don't need id from this point onwards anymore, thus we
3504 		 * should better reset it, so that state pruning has chances
3505 		 * to take effect.
3506 		 */
3507 		reg->id = 0;
3508 	}
3509 }
3510 
3511 /* The logic is similar to find_good_pkt_pointers(), both could eventually
3512  * be folded together at some point.
3513  */
3514 static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
3515 			  bool is_null)
3516 {
3517 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3518 	struct bpf_reg_state *regs = state->regs;
3519 	u32 id = regs[regno].id;
3520 	int i, j;
3521 
3522 	for (i = 0; i < MAX_BPF_REG; i++)
3523 		mark_map_reg(regs, i, id, is_null);
3524 
3525 	for (j = 0; j <= vstate->curframe; j++) {
3526 		state = vstate->frame[j];
3527 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3528 			if (state->stack[i].slot_type[0] != STACK_SPILL)
3529 				continue;
3530 			mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
3531 		}
3532 	}
3533 }
3534 
3535 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
3536 				   struct bpf_reg_state *dst_reg,
3537 				   struct bpf_reg_state *src_reg,
3538 				   struct bpf_verifier_state *this_branch,
3539 				   struct bpf_verifier_state *other_branch)
3540 {
3541 	if (BPF_SRC(insn->code) != BPF_X)
3542 		return false;
3543 
3544 	switch (BPF_OP(insn->code)) {
3545 	case BPF_JGT:
3546 		if ((dst_reg->type == PTR_TO_PACKET &&
3547 		     src_reg->type == PTR_TO_PACKET_END) ||
3548 		    (dst_reg->type == PTR_TO_PACKET_META &&
3549 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3550 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
3551 			find_good_pkt_pointers(this_branch, dst_reg,
3552 					       dst_reg->type, false);
3553 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3554 			    src_reg->type == PTR_TO_PACKET) ||
3555 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3556 			    src_reg->type == PTR_TO_PACKET_META)) {
3557 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
3558 			find_good_pkt_pointers(other_branch, src_reg,
3559 					       src_reg->type, true);
3560 		} else {
3561 			return false;
3562 		}
3563 		break;
3564 	case BPF_JLT:
3565 		if ((dst_reg->type == PTR_TO_PACKET &&
3566 		     src_reg->type == PTR_TO_PACKET_END) ||
3567 		    (dst_reg->type == PTR_TO_PACKET_META &&
3568 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3569 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
3570 			find_good_pkt_pointers(other_branch, dst_reg,
3571 					       dst_reg->type, true);
3572 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3573 			    src_reg->type == PTR_TO_PACKET) ||
3574 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3575 			    src_reg->type == PTR_TO_PACKET_META)) {
3576 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
3577 			find_good_pkt_pointers(this_branch, src_reg,
3578 					       src_reg->type, false);
3579 		} else {
3580 			return false;
3581 		}
3582 		break;
3583 	case BPF_JGE:
3584 		if ((dst_reg->type == PTR_TO_PACKET &&
3585 		     src_reg->type == PTR_TO_PACKET_END) ||
3586 		    (dst_reg->type == PTR_TO_PACKET_META &&
3587 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3588 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
3589 			find_good_pkt_pointers(this_branch, dst_reg,
3590 					       dst_reg->type, true);
3591 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3592 			    src_reg->type == PTR_TO_PACKET) ||
3593 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3594 			    src_reg->type == PTR_TO_PACKET_META)) {
3595 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
3596 			find_good_pkt_pointers(other_branch, src_reg,
3597 					       src_reg->type, false);
3598 		} else {
3599 			return false;
3600 		}
3601 		break;
3602 	case BPF_JLE:
3603 		if ((dst_reg->type == PTR_TO_PACKET &&
3604 		     src_reg->type == PTR_TO_PACKET_END) ||
3605 		    (dst_reg->type == PTR_TO_PACKET_META &&
3606 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3607 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
3608 			find_good_pkt_pointers(other_branch, dst_reg,
3609 					       dst_reg->type, false);
3610 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
3611 			    src_reg->type == PTR_TO_PACKET) ||
3612 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3613 			    src_reg->type == PTR_TO_PACKET_META)) {
3614 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
3615 			find_good_pkt_pointers(this_branch, src_reg,
3616 					       src_reg->type, true);
3617 		} else {
3618 			return false;
3619 		}
3620 		break;
3621 	default:
3622 		return false;
3623 	}
3624 
3625 	return true;
3626 }
3627 
3628 static int check_cond_jmp_op(struct bpf_verifier_env *env,
3629 			     struct bpf_insn *insn, int *insn_idx)
3630 {
3631 	struct bpf_verifier_state *this_branch = env->cur_state;
3632 	struct bpf_verifier_state *other_branch;
3633 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
3634 	struct bpf_reg_state *dst_reg, *other_branch_regs;
3635 	u8 opcode = BPF_OP(insn->code);
3636 	int err;
3637 
3638 	if (opcode > BPF_JSLE) {
3639 		verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
3640 		return -EINVAL;
3641 	}
3642 
3643 	if (BPF_SRC(insn->code) == BPF_X) {
3644 		if (insn->imm != 0) {
3645 			verbose(env, "BPF_JMP uses reserved fields\n");
3646 			return -EINVAL;
3647 		}
3648 
3649 		/* check src1 operand */
3650 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
3651 		if (err)
3652 			return err;
3653 
3654 		if (is_pointer_value(env, insn->src_reg)) {
3655 			verbose(env, "R%d pointer comparison prohibited\n",
3656 				insn->src_reg);
3657 			return -EACCES;
3658 		}
3659 	} else {
3660 		if (insn->src_reg != BPF_REG_0) {
3661 			verbose(env, "BPF_JMP uses reserved fields\n");
3662 			return -EINVAL;
3663 		}
3664 	}
3665 
3666 	/* check src2 operand */
3667 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3668 	if (err)
3669 		return err;
3670 
3671 	dst_reg = &regs[insn->dst_reg];
3672 
3673 	/* detect if R == 0 where R was initialized to zero earlier */
3674 	if (BPF_SRC(insn->code) == BPF_K &&
3675 	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3676 	    dst_reg->type == SCALAR_VALUE &&
3677 	    tnum_is_const(dst_reg->var_off)) {
3678 		if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
3679 		    (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
3680 			/* if (imm == imm) goto pc+off;
3681 			 * only follow the goto, ignore fall-through
3682 			 */
3683 			*insn_idx += insn->off;
3684 			return 0;
3685 		} else {
3686 			/* if (imm != imm) goto pc+off;
3687 			 * only follow fall-through branch, since
3688 			 * that's where the program will go
3689 			 */
3690 			return 0;
3691 		}
3692 	}
3693 
3694 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
3695 	if (!other_branch)
3696 		return -EFAULT;
3697 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
3698 
3699 	/* detect if we are comparing against a constant value so we can adjust
3700 	 * our min/max values for our dst register.
3701 	 * this is only legit if both are scalars (or pointers to the same
3702 	 * object, I suppose, but we don't support that right now), because
3703 	 * otherwise the different base pointers mean the offsets aren't
3704 	 * comparable.
3705 	 */
3706 	if (BPF_SRC(insn->code) == BPF_X) {
3707 		if (dst_reg->type == SCALAR_VALUE &&
3708 		    regs[insn->src_reg].type == SCALAR_VALUE) {
3709 			if (tnum_is_const(regs[insn->src_reg].var_off))
3710 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
3711 						dst_reg, regs[insn->src_reg].var_off.value,
3712 						opcode);
3713 			else if (tnum_is_const(dst_reg->var_off))
3714 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
3715 						    &regs[insn->src_reg],
3716 						    dst_reg->var_off.value, opcode);
3717 			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
3718 				/* Comparing for equality, we can combine knowledge */
3719 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
3720 						    &other_branch_regs[insn->dst_reg],
3721 						    &regs[insn->src_reg],
3722 						    &regs[insn->dst_reg], opcode);
3723 		}
3724 	} else if (dst_reg->type == SCALAR_VALUE) {
3725 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
3726 					dst_reg, insn->imm, opcode);
3727 	}
3728 
3729 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
3730 	if (BPF_SRC(insn->code) == BPF_K &&
3731 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3732 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3733 		/* Mark all identical map registers in each branch as either
3734 		 * safe or unknown depending R == 0 or R != 0 conditional.
3735 		 */
3736 		mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
3737 		mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
3738 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
3739 					   this_branch, other_branch) &&
3740 		   is_pointer_value(env, insn->dst_reg)) {
3741 		verbose(env, "R%d pointer comparison prohibited\n",
3742 			insn->dst_reg);
3743 		return -EACCES;
3744 	}
3745 	if (env->log.level)
3746 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
3747 	return 0;
3748 }
3749 
3750 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
3751 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
3752 {
3753 	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
3754 
3755 	return (struct bpf_map *) (unsigned long) imm64;
3756 }
3757 
3758 /* verify BPF_LD_IMM64 instruction */
3759 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
3760 {
3761 	struct bpf_reg_state *regs = cur_regs(env);
3762 	int err;
3763 
3764 	if (BPF_SIZE(insn->code) != BPF_DW) {
3765 		verbose(env, "invalid BPF_LD_IMM insn\n");
3766 		return -EINVAL;
3767 	}
3768 	if (insn->off != 0) {
3769 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
3770 		return -EINVAL;
3771 	}
3772 
3773 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
3774 	if (err)
3775 		return err;
3776 
3777 	if (insn->src_reg == 0) {
3778 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
3779 
3780 		regs[insn->dst_reg].type = SCALAR_VALUE;
3781 		__mark_reg_known(&regs[insn->dst_reg], imm);
3782 		return 0;
3783 	}
3784 
3785 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
3786 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
3787 
3788 	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
3789 	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
3790 	return 0;
3791 }
3792 
3793 static bool may_access_skb(enum bpf_prog_type type)
3794 {
3795 	switch (type) {
3796 	case BPF_PROG_TYPE_SOCKET_FILTER:
3797 	case BPF_PROG_TYPE_SCHED_CLS:
3798 	case BPF_PROG_TYPE_SCHED_ACT:
3799 		return true;
3800 	default:
3801 		return false;
3802 	}
3803 }
3804 
3805 /* verify safety of LD_ABS|LD_IND instructions:
3806  * - they can only appear in the programs where ctx == skb
3807  * - since they are wrappers of function calls, they scratch R1-R5 registers,
3808  *   preserve R6-R9, and store return value into R0
3809  *
3810  * Implicit input:
3811  *   ctx == skb == R6 == CTX
3812  *
3813  * Explicit input:
3814  *   SRC == any register
3815  *   IMM == 32-bit immediate
3816  *
3817  * Output:
3818  *   R0 - 8/16/32-bit skb data converted to cpu endianness
3819  */
3820 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
3821 {
3822 	struct bpf_reg_state *regs = cur_regs(env);
3823 	u8 mode = BPF_MODE(insn->code);
3824 	int i, err;
3825 
3826 	if (!may_access_skb(env->prog->type)) {
3827 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
3828 		return -EINVAL;
3829 	}
3830 
3831 	if (env->subprog_cnt) {
3832 		/* when program has LD_ABS insn JITs and interpreter assume
3833 		 * that r1 == ctx == skb which is not the case for callees
3834 		 * that can have arbitrary arguments. It's problematic
3835 		 * for main prog as well since JITs would need to analyze
3836 		 * all functions in order to make proper register save/restore
3837 		 * decisions in the main prog. Hence disallow LD_ABS with calls
3838 		 */
3839 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
3840 		return -EINVAL;
3841 	}
3842 
3843 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
3844 	    BPF_SIZE(insn->code) == BPF_DW ||
3845 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
3846 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
3847 		return -EINVAL;
3848 	}
3849 
3850 	/* check whether implicit source operand (register R6) is readable */
3851 	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
3852 	if (err)
3853 		return err;
3854 
3855 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
3856 		verbose(env,
3857 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
3858 		return -EINVAL;
3859 	}
3860 
3861 	if (mode == BPF_IND) {
3862 		/* check explicit source operand */
3863 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
3864 		if (err)
3865 			return err;
3866 	}
3867 
3868 	/* reset caller saved regs to unreadable */
3869 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
3870 		mark_reg_not_init(env, regs, caller_saved[i]);
3871 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3872 	}
3873 
3874 	/* mark destination R0 register as readable, since it contains
3875 	 * the value fetched from the packet.
3876 	 * Already marked as written above.
3877 	 */
3878 	mark_reg_unknown(env, regs, BPF_REG_0);
3879 	return 0;
3880 }
3881 
3882 static int check_return_code(struct bpf_verifier_env *env)
3883 {
3884 	struct bpf_reg_state *reg;
3885 	struct tnum range = tnum_range(0, 1);
3886 
3887 	switch (env->prog->type) {
3888 	case BPF_PROG_TYPE_CGROUP_SKB:
3889 	case BPF_PROG_TYPE_CGROUP_SOCK:
3890 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3891 	case BPF_PROG_TYPE_SOCK_OPS:
3892 	case BPF_PROG_TYPE_CGROUP_DEVICE:
3893 		break;
3894 	default:
3895 		return 0;
3896 	}
3897 
3898 	reg = cur_regs(env) + BPF_REG_0;
3899 	if (reg->type != SCALAR_VALUE) {
3900 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
3901 			reg_type_str[reg->type]);
3902 		return -EINVAL;
3903 	}
3904 
3905 	if (!tnum_in(range, reg->var_off)) {
3906 		verbose(env, "At program exit the register R0 ");
3907 		if (!tnum_is_unknown(reg->var_off)) {
3908 			char tn_buf[48];
3909 
3910 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3911 			verbose(env, "has value %s", tn_buf);
3912 		} else {
3913 			verbose(env, "has unknown scalar value");
3914 		}
3915 		verbose(env, " should have been 0 or 1\n");
3916 		return -EINVAL;
3917 	}
3918 	return 0;
3919 }
3920 
3921 /* non-recursive DFS pseudo code
3922  * 1  procedure DFS-iterative(G,v):
3923  * 2      label v as discovered
3924  * 3      let S be a stack
3925  * 4      S.push(v)
3926  * 5      while S is not empty
3927  * 6            t <- S.pop()
3928  * 7            if t is what we're looking for:
3929  * 8                return t
3930  * 9            for all edges e in G.adjacentEdges(t) do
3931  * 10               if edge e is already labelled
3932  * 11                   continue with the next edge
3933  * 12               w <- G.adjacentVertex(t,e)
3934  * 13               if vertex w is not discovered and not explored
3935  * 14                   label e as tree-edge
3936  * 15                   label w as discovered
3937  * 16                   S.push(w)
3938  * 17                   continue at 5
3939  * 18               else if vertex w is discovered
3940  * 19                   label e as back-edge
3941  * 20               else
3942  * 21                   // vertex w is explored
3943  * 22                   label e as forward- or cross-edge
3944  * 23           label t as explored
3945  * 24           S.pop()
3946  *
3947  * convention:
3948  * 0x10 - discovered
3949  * 0x11 - discovered and fall-through edge labelled
3950  * 0x12 - discovered and fall-through and branch edges labelled
3951  * 0x20 - explored
3952  */
3953 
3954 enum {
3955 	DISCOVERED = 0x10,
3956 	EXPLORED = 0x20,
3957 	FALLTHROUGH = 1,
3958 	BRANCH = 2,
3959 };
3960 
3961 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
3962 
3963 static int *insn_stack;	/* stack of insns to process */
3964 static int cur_stack;	/* current stack index */
3965 static int *insn_state;
3966 
3967 /* t, w, e - match pseudo-code above:
3968  * t - index of current instruction
3969  * w - next instruction
3970  * e - edge
3971  */
3972 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
3973 {
3974 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
3975 		return 0;
3976 
3977 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
3978 		return 0;
3979 
3980 	if (w < 0 || w >= env->prog->len) {
3981 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
3982 		return -EINVAL;
3983 	}
3984 
3985 	if (e == BRANCH)
3986 		/* mark branch target for state pruning */
3987 		env->explored_states[w] = STATE_LIST_MARK;
3988 
3989 	if (insn_state[w] == 0) {
3990 		/* tree-edge */
3991 		insn_state[t] = DISCOVERED | e;
3992 		insn_state[w] = DISCOVERED;
3993 		if (cur_stack >= env->prog->len)
3994 			return -E2BIG;
3995 		insn_stack[cur_stack++] = w;
3996 		return 1;
3997 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
3998 		verbose(env, "back-edge from insn %d to %d\n", t, w);
3999 		return -EINVAL;
4000 	} else if (insn_state[w] == EXPLORED) {
4001 		/* forward- or cross-edge */
4002 		insn_state[t] = DISCOVERED | e;
4003 	} else {
4004 		verbose(env, "insn state internal bug\n");
4005 		return -EFAULT;
4006 	}
4007 	return 0;
4008 }
4009 
4010 /* non-recursive depth-first-search to detect loops in BPF program
4011  * loop == back-edge in directed graph
4012  */
4013 static int check_cfg(struct bpf_verifier_env *env)
4014 {
4015 	struct bpf_insn *insns = env->prog->insnsi;
4016 	int insn_cnt = env->prog->len;
4017 	int ret = 0;
4018 	int i, t;
4019 
4020 	ret = check_subprogs(env);
4021 	if (ret < 0)
4022 		return ret;
4023 
4024 	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4025 	if (!insn_state)
4026 		return -ENOMEM;
4027 
4028 	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4029 	if (!insn_stack) {
4030 		kfree(insn_state);
4031 		return -ENOMEM;
4032 	}
4033 
4034 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
4035 	insn_stack[0] = 0; /* 0 is the first instruction */
4036 	cur_stack = 1;
4037 
4038 peek_stack:
4039 	if (cur_stack == 0)
4040 		goto check_state;
4041 	t = insn_stack[cur_stack - 1];
4042 
4043 	if (BPF_CLASS(insns[t].code) == BPF_JMP) {
4044 		u8 opcode = BPF_OP(insns[t].code);
4045 
4046 		if (opcode == BPF_EXIT) {
4047 			goto mark_explored;
4048 		} else if (opcode == BPF_CALL) {
4049 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
4050 			if (ret == 1)
4051 				goto peek_stack;
4052 			else if (ret < 0)
4053 				goto err_free;
4054 			if (t + 1 < insn_cnt)
4055 				env->explored_states[t + 1] = STATE_LIST_MARK;
4056 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
4057 				env->explored_states[t] = STATE_LIST_MARK;
4058 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
4059 				if (ret == 1)
4060 					goto peek_stack;
4061 				else if (ret < 0)
4062 					goto err_free;
4063 			}
4064 		} else if (opcode == BPF_JA) {
4065 			if (BPF_SRC(insns[t].code) != BPF_K) {
4066 				ret = -EINVAL;
4067 				goto err_free;
4068 			}
4069 			/* unconditional jump with single edge */
4070 			ret = push_insn(t, t + insns[t].off + 1,
4071 					FALLTHROUGH, env);
4072 			if (ret == 1)
4073 				goto peek_stack;
4074 			else if (ret < 0)
4075 				goto err_free;
4076 			/* tell verifier to check for equivalent states
4077 			 * after every call and jump
4078 			 */
4079 			if (t + 1 < insn_cnt)
4080 				env->explored_states[t + 1] = STATE_LIST_MARK;
4081 		} else {
4082 			/* conditional jump with two edges */
4083 			env->explored_states[t] = STATE_LIST_MARK;
4084 			ret = push_insn(t, t + 1, FALLTHROUGH, env);
4085 			if (ret == 1)
4086 				goto peek_stack;
4087 			else if (ret < 0)
4088 				goto err_free;
4089 
4090 			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
4091 			if (ret == 1)
4092 				goto peek_stack;
4093 			else if (ret < 0)
4094 				goto err_free;
4095 		}
4096 	} else {
4097 		/* all other non-branch instructions with single
4098 		 * fall-through edge
4099 		 */
4100 		ret = push_insn(t, t + 1, FALLTHROUGH, env);
4101 		if (ret == 1)
4102 			goto peek_stack;
4103 		else if (ret < 0)
4104 			goto err_free;
4105 	}
4106 
4107 mark_explored:
4108 	insn_state[t] = EXPLORED;
4109 	if (cur_stack-- <= 0) {
4110 		verbose(env, "pop stack internal bug\n");
4111 		ret = -EFAULT;
4112 		goto err_free;
4113 	}
4114 	goto peek_stack;
4115 
4116 check_state:
4117 	for (i = 0; i < insn_cnt; i++) {
4118 		if (insn_state[i] != EXPLORED) {
4119 			verbose(env, "unreachable insn %d\n", i);
4120 			ret = -EINVAL;
4121 			goto err_free;
4122 		}
4123 	}
4124 	ret = 0; /* cfg looks good */
4125 
4126 err_free:
4127 	kfree(insn_state);
4128 	kfree(insn_stack);
4129 	return ret;
4130 }
4131 
4132 /* check %cur's range satisfies %old's */
4133 static bool range_within(struct bpf_reg_state *old,
4134 			 struct bpf_reg_state *cur)
4135 {
4136 	return old->umin_value <= cur->umin_value &&
4137 	       old->umax_value >= cur->umax_value &&
4138 	       old->smin_value <= cur->smin_value &&
4139 	       old->smax_value >= cur->smax_value;
4140 }
4141 
4142 /* Maximum number of register states that can exist at once */
4143 #define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
4144 struct idpair {
4145 	u32 old;
4146 	u32 cur;
4147 };
4148 
4149 /* If in the old state two registers had the same id, then they need to have
4150  * the same id in the new state as well.  But that id could be different from
4151  * the old state, so we need to track the mapping from old to new ids.
4152  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
4153  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
4154  * regs with a different old id could still have new id 9, we don't care about
4155  * that.
4156  * So we look through our idmap to see if this old id has been seen before.  If
4157  * so, we require the new id to match; otherwise, we add the id pair to the map.
4158  */
4159 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
4160 {
4161 	unsigned int i;
4162 
4163 	for (i = 0; i < ID_MAP_SIZE; i++) {
4164 		if (!idmap[i].old) {
4165 			/* Reached an empty slot; haven't seen this id before */
4166 			idmap[i].old = old_id;
4167 			idmap[i].cur = cur_id;
4168 			return true;
4169 		}
4170 		if (idmap[i].old == old_id)
4171 			return idmap[i].cur == cur_id;
4172 	}
4173 	/* We ran out of idmap slots, which should be impossible */
4174 	WARN_ON_ONCE(1);
4175 	return false;
4176 }
4177 
4178 /* Returns true if (rold safe implies rcur safe) */
4179 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
4180 		    struct idpair *idmap)
4181 {
4182 	bool equal;
4183 
4184 	if (!(rold->live & REG_LIVE_READ))
4185 		/* explored state didn't use this */
4186 		return true;
4187 
4188 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
4189 
4190 	if (rold->type == PTR_TO_STACK)
4191 		/* two stack pointers are equal only if they're pointing to
4192 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
4193 		 */
4194 		return equal && rold->frameno == rcur->frameno;
4195 
4196 	if (equal)
4197 		return true;
4198 
4199 	if (rold->type == NOT_INIT)
4200 		/* explored state can't have used this */
4201 		return true;
4202 	if (rcur->type == NOT_INIT)
4203 		return false;
4204 	switch (rold->type) {
4205 	case SCALAR_VALUE:
4206 		if (rcur->type == SCALAR_VALUE) {
4207 			/* new val must satisfy old val knowledge */
4208 			return range_within(rold, rcur) &&
4209 			       tnum_in(rold->var_off, rcur->var_off);
4210 		} else {
4211 			/* We're trying to use a pointer in place of a scalar.
4212 			 * Even if the scalar was unbounded, this could lead to
4213 			 * pointer leaks because scalars are allowed to leak
4214 			 * while pointers are not. We could make this safe in
4215 			 * special cases if root is calling us, but it's
4216 			 * probably not worth the hassle.
4217 			 */
4218 			return false;
4219 		}
4220 	case PTR_TO_MAP_VALUE:
4221 		/* If the new min/max/var_off satisfy the old ones and
4222 		 * everything else matches, we are OK.
4223 		 * We don't care about the 'id' value, because nothing
4224 		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
4225 		 */
4226 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
4227 		       range_within(rold, rcur) &&
4228 		       tnum_in(rold->var_off, rcur->var_off);
4229 	case PTR_TO_MAP_VALUE_OR_NULL:
4230 		/* a PTR_TO_MAP_VALUE could be safe to use as a
4231 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
4232 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
4233 		 * checked, doing so could have affected others with the same
4234 		 * id, and we can't check for that because we lost the id when
4235 		 * we converted to a PTR_TO_MAP_VALUE.
4236 		 */
4237 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
4238 			return false;
4239 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
4240 			return false;
4241 		/* Check our ids match any regs they're supposed to */
4242 		return check_ids(rold->id, rcur->id, idmap);
4243 	case PTR_TO_PACKET_META:
4244 	case PTR_TO_PACKET:
4245 		if (rcur->type != rold->type)
4246 			return false;
4247 		/* We must have at least as much range as the old ptr
4248 		 * did, so that any accesses which were safe before are
4249 		 * still safe.  This is true even if old range < old off,
4250 		 * since someone could have accessed through (ptr - k), or
4251 		 * even done ptr -= k in a register, to get a safe access.
4252 		 */
4253 		if (rold->range > rcur->range)
4254 			return false;
4255 		/* If the offsets don't match, we can't trust our alignment;
4256 		 * nor can we be sure that we won't fall out of range.
4257 		 */
4258 		if (rold->off != rcur->off)
4259 			return false;
4260 		/* id relations must be preserved */
4261 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
4262 			return false;
4263 		/* new val must satisfy old val knowledge */
4264 		return range_within(rold, rcur) &&
4265 		       tnum_in(rold->var_off, rcur->var_off);
4266 	case PTR_TO_CTX:
4267 	case CONST_PTR_TO_MAP:
4268 	case PTR_TO_PACKET_END:
4269 		/* Only valid matches are exact, which memcmp() above
4270 		 * would have accepted
4271 		 */
4272 	default:
4273 		/* Don't know what's going on, just say it's not safe */
4274 		return false;
4275 	}
4276 
4277 	/* Shouldn't get here; if we do, say it's not safe */
4278 	WARN_ON_ONCE(1);
4279 	return false;
4280 }
4281 
4282 static bool stacksafe(struct bpf_func_state *old,
4283 		      struct bpf_func_state *cur,
4284 		      struct idpair *idmap)
4285 {
4286 	int i, spi;
4287 
4288 	/* if explored stack has more populated slots than current stack
4289 	 * such stacks are not equivalent
4290 	 */
4291 	if (old->allocated_stack > cur->allocated_stack)
4292 		return false;
4293 
4294 	/* walk slots of the explored stack and ignore any additional
4295 	 * slots in the current stack, since explored(safe) state
4296 	 * didn't use them
4297 	 */
4298 	for (i = 0; i < old->allocated_stack; i++) {
4299 		spi = i / BPF_REG_SIZE;
4300 
4301 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
4302 			/* explored state didn't use this */
4303 			continue;
4304 
4305 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
4306 			continue;
4307 		/* if old state was safe with misc data in the stack
4308 		 * it will be safe with zero-initialized stack.
4309 		 * The opposite is not true
4310 		 */
4311 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
4312 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
4313 			continue;
4314 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
4315 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
4316 			/* Ex: old explored (safe) state has STACK_SPILL in
4317 			 * this stack slot, but current has has STACK_MISC ->
4318 			 * this verifier states are not equivalent,
4319 			 * return false to continue verification of this path
4320 			 */
4321 			return false;
4322 		if (i % BPF_REG_SIZE)
4323 			continue;
4324 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
4325 			continue;
4326 		if (!regsafe(&old->stack[spi].spilled_ptr,
4327 			     &cur->stack[spi].spilled_ptr,
4328 			     idmap))
4329 			/* when explored and current stack slot are both storing
4330 			 * spilled registers, check that stored pointers types
4331 			 * are the same as well.
4332 			 * Ex: explored safe path could have stored
4333 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
4334 			 * but current path has stored:
4335 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
4336 			 * such verifier states are not equivalent.
4337 			 * return false to continue verification of this path
4338 			 */
4339 			return false;
4340 	}
4341 	return true;
4342 }
4343 
4344 /* compare two verifier states
4345  *
4346  * all states stored in state_list are known to be valid, since
4347  * verifier reached 'bpf_exit' instruction through them
4348  *
4349  * this function is called when verifier exploring different branches of
4350  * execution popped from the state stack. If it sees an old state that has
4351  * more strict register state and more strict stack state then this execution
4352  * branch doesn't need to be explored further, since verifier already
4353  * concluded that more strict state leads to valid finish.
4354  *
4355  * Therefore two states are equivalent if register state is more conservative
4356  * and explored stack state is more conservative than the current one.
4357  * Example:
4358  *       explored                   current
4359  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
4360  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
4361  *
4362  * In other words if current stack state (one being explored) has more
4363  * valid slots than old one that already passed validation, it means
4364  * the verifier can stop exploring and conclude that current state is valid too
4365  *
4366  * Similarly with registers. If explored state has register type as invalid
4367  * whereas register type in current state is meaningful, it means that
4368  * the current state will reach 'bpf_exit' instruction safely
4369  */
4370 static bool func_states_equal(struct bpf_func_state *old,
4371 			      struct bpf_func_state *cur)
4372 {
4373 	struct idpair *idmap;
4374 	bool ret = false;
4375 	int i;
4376 
4377 	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
4378 	/* If we failed to allocate the idmap, just say it's not safe */
4379 	if (!idmap)
4380 		return false;
4381 
4382 	for (i = 0; i < MAX_BPF_REG; i++) {
4383 		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
4384 			goto out_free;
4385 	}
4386 
4387 	if (!stacksafe(old, cur, idmap))
4388 		goto out_free;
4389 	ret = true;
4390 out_free:
4391 	kfree(idmap);
4392 	return ret;
4393 }
4394 
4395 static bool states_equal(struct bpf_verifier_env *env,
4396 			 struct bpf_verifier_state *old,
4397 			 struct bpf_verifier_state *cur)
4398 {
4399 	int i;
4400 
4401 	if (old->curframe != cur->curframe)
4402 		return false;
4403 
4404 	/* for states to be equal callsites have to be the same
4405 	 * and all frame states need to be equivalent
4406 	 */
4407 	for (i = 0; i <= old->curframe; i++) {
4408 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
4409 			return false;
4410 		if (!func_states_equal(old->frame[i], cur->frame[i]))
4411 			return false;
4412 	}
4413 	return true;
4414 }
4415 
4416 /* A write screens off any subsequent reads; but write marks come from the
4417  * straight-line code between a state and its parent.  When we arrive at an
4418  * equivalent state (jump target or such) we didn't arrive by the straight-line
4419  * code, so read marks in the state must propagate to the parent regardless
4420  * of the state's write marks. That's what 'parent == state->parent' comparison
4421  * in mark_reg_read() and mark_stack_slot_read() is for.
4422  */
4423 static int propagate_liveness(struct bpf_verifier_env *env,
4424 			      const struct bpf_verifier_state *vstate,
4425 			      struct bpf_verifier_state *vparent)
4426 {
4427 	int i, frame, err = 0;
4428 	struct bpf_func_state *state, *parent;
4429 
4430 	if (vparent->curframe != vstate->curframe) {
4431 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
4432 		     vparent->curframe, vstate->curframe);
4433 		return -EFAULT;
4434 	}
4435 	/* Propagate read liveness of registers... */
4436 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
4437 	/* We don't need to worry about FP liveness because it's read-only */
4438 	for (i = 0; i < BPF_REG_FP; i++) {
4439 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
4440 			continue;
4441 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
4442 			err = mark_reg_read(env, vstate, vparent, i);
4443 			if (err)
4444 				return err;
4445 		}
4446 	}
4447 
4448 	/* ... and stack slots */
4449 	for (frame = 0; frame <= vstate->curframe; frame++) {
4450 		state = vstate->frame[frame];
4451 		parent = vparent->frame[frame];
4452 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
4453 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
4454 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
4455 				continue;
4456 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
4457 				mark_stack_slot_read(env, vstate, vparent, i, frame);
4458 		}
4459 	}
4460 	return err;
4461 }
4462 
4463 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
4464 {
4465 	struct bpf_verifier_state_list *new_sl;
4466 	struct bpf_verifier_state_list *sl;
4467 	struct bpf_verifier_state *cur = env->cur_state;
4468 	int i, j, err;
4469 
4470 	sl = env->explored_states[insn_idx];
4471 	if (!sl)
4472 		/* this 'insn_idx' instruction wasn't marked, so we will not
4473 		 * be doing state search here
4474 		 */
4475 		return 0;
4476 
4477 	while (sl != STATE_LIST_MARK) {
4478 		if (states_equal(env, &sl->state, cur)) {
4479 			/* reached equivalent register/stack state,
4480 			 * prune the search.
4481 			 * Registers read by the continuation are read by us.
4482 			 * If we have any write marks in env->cur_state, they
4483 			 * will prevent corresponding reads in the continuation
4484 			 * from reaching our parent (an explored_state).  Our
4485 			 * own state will get the read marks recorded, but
4486 			 * they'll be immediately forgotten as we're pruning
4487 			 * this state and will pop a new one.
4488 			 */
4489 			err = propagate_liveness(env, &sl->state, cur);
4490 			if (err)
4491 				return err;
4492 			return 1;
4493 		}
4494 		sl = sl->next;
4495 	}
4496 
4497 	/* there were no equivalent states, remember current one.
4498 	 * technically the current state is not proven to be safe yet,
4499 	 * but it will either reach outer most bpf_exit (which means it's safe)
4500 	 * or it will be rejected. Since there are no loops, we won't be
4501 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
4502 	 * again on the way to bpf_exit
4503 	 */
4504 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
4505 	if (!new_sl)
4506 		return -ENOMEM;
4507 
4508 	/* add new state to the head of linked list */
4509 	err = copy_verifier_state(&new_sl->state, cur);
4510 	if (err) {
4511 		free_verifier_state(&new_sl->state, false);
4512 		kfree(new_sl);
4513 		return err;
4514 	}
4515 	new_sl->next = env->explored_states[insn_idx];
4516 	env->explored_states[insn_idx] = new_sl;
4517 	/* connect new state to parentage chain */
4518 	cur->parent = &new_sl->state;
4519 	/* clear write marks in current state: the writes we did are not writes
4520 	 * our child did, so they don't screen off its reads from us.
4521 	 * (There are no read marks in current state, because reads always mark
4522 	 * their parent and current state never has children yet.  Only
4523 	 * explored_states can get read marks.)
4524 	 */
4525 	for (i = 0; i < BPF_REG_FP; i++)
4526 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
4527 
4528 	/* all stack frames are accessible from callee, clear them all */
4529 	for (j = 0; j <= cur->curframe; j++) {
4530 		struct bpf_func_state *frame = cur->frame[j];
4531 
4532 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
4533 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
4534 	}
4535 	return 0;
4536 }
4537 
4538 static int do_check(struct bpf_verifier_env *env)
4539 {
4540 	struct bpf_verifier_state *state;
4541 	struct bpf_insn *insns = env->prog->insnsi;
4542 	struct bpf_reg_state *regs;
4543 	int insn_cnt = env->prog->len, i;
4544 	int insn_idx, prev_insn_idx = 0;
4545 	int insn_processed = 0;
4546 	bool do_print_state = false;
4547 
4548 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
4549 	if (!state)
4550 		return -ENOMEM;
4551 	state->curframe = 0;
4552 	state->parent = NULL;
4553 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
4554 	if (!state->frame[0]) {
4555 		kfree(state);
4556 		return -ENOMEM;
4557 	}
4558 	env->cur_state = state;
4559 	init_func_state(env, state->frame[0],
4560 			BPF_MAIN_FUNC /* callsite */,
4561 			0 /* frameno */,
4562 			0 /* subprogno, zero == main subprog */);
4563 	insn_idx = 0;
4564 	for (;;) {
4565 		struct bpf_insn *insn;
4566 		u8 class;
4567 		int err;
4568 
4569 		if (insn_idx >= insn_cnt) {
4570 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
4571 				insn_idx, insn_cnt);
4572 			return -EFAULT;
4573 		}
4574 
4575 		insn = &insns[insn_idx];
4576 		class = BPF_CLASS(insn->code);
4577 
4578 		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
4579 			verbose(env,
4580 				"BPF program is too large. Processed %d insn\n",
4581 				insn_processed);
4582 			return -E2BIG;
4583 		}
4584 
4585 		err = is_state_visited(env, insn_idx);
4586 		if (err < 0)
4587 			return err;
4588 		if (err == 1) {
4589 			/* found equivalent state, can prune the search */
4590 			if (env->log.level) {
4591 				if (do_print_state)
4592 					verbose(env, "\nfrom %d to %d: safe\n",
4593 						prev_insn_idx, insn_idx);
4594 				else
4595 					verbose(env, "%d: safe\n", insn_idx);
4596 			}
4597 			goto process_bpf_exit;
4598 		}
4599 
4600 		if (need_resched())
4601 			cond_resched();
4602 
4603 		if (env->log.level > 1 || (env->log.level && do_print_state)) {
4604 			if (env->log.level > 1)
4605 				verbose(env, "%d:", insn_idx);
4606 			else
4607 				verbose(env, "\nfrom %d to %d:",
4608 					prev_insn_idx, insn_idx);
4609 			print_verifier_state(env, state->frame[state->curframe]);
4610 			do_print_state = false;
4611 		}
4612 
4613 		if (env->log.level) {
4614 			const struct bpf_insn_cbs cbs = {
4615 				.cb_print	= verbose,
4616 				.private_data	= env,
4617 			};
4618 
4619 			verbose(env, "%d: ", insn_idx);
4620 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
4621 		}
4622 
4623 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
4624 			err = bpf_prog_offload_verify_insn(env, insn_idx,
4625 							   prev_insn_idx);
4626 			if (err)
4627 				return err;
4628 		}
4629 
4630 		regs = cur_regs(env);
4631 		env->insn_aux_data[insn_idx].seen = true;
4632 		if (class == BPF_ALU || class == BPF_ALU64) {
4633 			err = check_alu_op(env, insn);
4634 			if (err)
4635 				return err;
4636 
4637 		} else if (class == BPF_LDX) {
4638 			enum bpf_reg_type *prev_src_type, src_reg_type;
4639 
4640 			/* check for reserved fields is already done */
4641 
4642 			/* check src operand */
4643 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4644 			if (err)
4645 				return err;
4646 
4647 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4648 			if (err)
4649 				return err;
4650 
4651 			src_reg_type = regs[insn->src_reg].type;
4652 
4653 			/* check that memory (src_reg + off) is readable,
4654 			 * the state of dst_reg will be updated by this func
4655 			 */
4656 			err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
4657 					       BPF_SIZE(insn->code), BPF_READ,
4658 					       insn->dst_reg, false);
4659 			if (err)
4660 				return err;
4661 
4662 			prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
4663 
4664 			if (*prev_src_type == NOT_INIT) {
4665 				/* saw a valid insn
4666 				 * dst_reg = *(u32 *)(src_reg + off)
4667 				 * save type to validate intersecting paths
4668 				 */
4669 				*prev_src_type = src_reg_type;
4670 
4671 			} else if (src_reg_type != *prev_src_type &&
4672 				   (src_reg_type == PTR_TO_CTX ||
4673 				    *prev_src_type == PTR_TO_CTX)) {
4674 				/* ABuser program is trying to use the same insn
4675 				 * dst_reg = *(u32*) (src_reg + off)
4676 				 * with different pointer types:
4677 				 * src_reg == ctx in one branch and
4678 				 * src_reg == stack|map in some other branch.
4679 				 * Reject it.
4680 				 */
4681 				verbose(env, "same insn cannot be used with different pointers\n");
4682 				return -EINVAL;
4683 			}
4684 
4685 		} else if (class == BPF_STX) {
4686 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
4687 
4688 			if (BPF_MODE(insn->code) == BPF_XADD) {
4689 				err = check_xadd(env, insn_idx, insn);
4690 				if (err)
4691 					return err;
4692 				insn_idx++;
4693 				continue;
4694 			}
4695 
4696 			/* check src1 operand */
4697 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4698 			if (err)
4699 				return err;
4700 			/* check src2 operand */
4701 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4702 			if (err)
4703 				return err;
4704 
4705 			dst_reg_type = regs[insn->dst_reg].type;
4706 
4707 			/* check that memory (dst_reg + off) is writeable */
4708 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4709 					       BPF_SIZE(insn->code), BPF_WRITE,
4710 					       insn->src_reg, false);
4711 			if (err)
4712 				return err;
4713 
4714 			prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
4715 
4716 			if (*prev_dst_type == NOT_INIT) {
4717 				*prev_dst_type = dst_reg_type;
4718 			} else if (dst_reg_type != *prev_dst_type &&
4719 				   (dst_reg_type == PTR_TO_CTX ||
4720 				    *prev_dst_type == PTR_TO_CTX)) {
4721 				verbose(env, "same insn cannot be used with different pointers\n");
4722 				return -EINVAL;
4723 			}
4724 
4725 		} else if (class == BPF_ST) {
4726 			if (BPF_MODE(insn->code) != BPF_MEM ||
4727 			    insn->src_reg != BPF_REG_0) {
4728 				verbose(env, "BPF_ST uses reserved fields\n");
4729 				return -EINVAL;
4730 			}
4731 			/* check src operand */
4732 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4733 			if (err)
4734 				return err;
4735 
4736 			if (is_ctx_reg(env, insn->dst_reg)) {
4737 				verbose(env, "BPF_ST stores into R%d context is not allowed\n",
4738 					insn->dst_reg);
4739 				return -EACCES;
4740 			}
4741 
4742 			/* check that memory (dst_reg + off) is writeable */
4743 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4744 					       BPF_SIZE(insn->code), BPF_WRITE,
4745 					       -1, false);
4746 			if (err)
4747 				return err;
4748 
4749 		} else if (class == BPF_JMP) {
4750 			u8 opcode = BPF_OP(insn->code);
4751 
4752 			if (opcode == BPF_CALL) {
4753 				if (BPF_SRC(insn->code) != BPF_K ||
4754 				    insn->off != 0 ||
4755 				    (insn->src_reg != BPF_REG_0 &&
4756 				     insn->src_reg != BPF_PSEUDO_CALL) ||
4757 				    insn->dst_reg != BPF_REG_0) {
4758 					verbose(env, "BPF_CALL uses reserved fields\n");
4759 					return -EINVAL;
4760 				}
4761 
4762 				if (insn->src_reg == BPF_PSEUDO_CALL)
4763 					err = check_func_call(env, insn, &insn_idx);
4764 				else
4765 					err = check_helper_call(env, insn->imm, insn_idx);
4766 				if (err)
4767 					return err;
4768 
4769 			} else if (opcode == BPF_JA) {
4770 				if (BPF_SRC(insn->code) != BPF_K ||
4771 				    insn->imm != 0 ||
4772 				    insn->src_reg != BPF_REG_0 ||
4773 				    insn->dst_reg != BPF_REG_0) {
4774 					verbose(env, "BPF_JA uses reserved fields\n");
4775 					return -EINVAL;
4776 				}
4777 
4778 				insn_idx += insn->off + 1;
4779 				continue;
4780 
4781 			} else if (opcode == BPF_EXIT) {
4782 				if (BPF_SRC(insn->code) != BPF_K ||
4783 				    insn->imm != 0 ||
4784 				    insn->src_reg != BPF_REG_0 ||
4785 				    insn->dst_reg != BPF_REG_0) {
4786 					verbose(env, "BPF_EXIT uses reserved fields\n");
4787 					return -EINVAL;
4788 				}
4789 
4790 				if (state->curframe) {
4791 					/* exit from nested function */
4792 					prev_insn_idx = insn_idx;
4793 					err = prepare_func_exit(env, &insn_idx);
4794 					if (err)
4795 						return err;
4796 					do_print_state = true;
4797 					continue;
4798 				}
4799 
4800 				/* eBPF calling convetion is such that R0 is used
4801 				 * to return the value from eBPF program.
4802 				 * Make sure that it's readable at this time
4803 				 * of bpf_exit, which means that program wrote
4804 				 * something into it earlier
4805 				 */
4806 				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4807 				if (err)
4808 					return err;
4809 
4810 				if (is_pointer_value(env, BPF_REG_0)) {
4811 					verbose(env, "R0 leaks addr as return value\n");
4812 					return -EACCES;
4813 				}
4814 
4815 				err = check_return_code(env);
4816 				if (err)
4817 					return err;
4818 process_bpf_exit:
4819 				err = pop_stack(env, &prev_insn_idx, &insn_idx);
4820 				if (err < 0) {
4821 					if (err != -ENOENT)
4822 						return err;
4823 					break;
4824 				} else {
4825 					do_print_state = true;
4826 					continue;
4827 				}
4828 			} else {
4829 				err = check_cond_jmp_op(env, insn, &insn_idx);
4830 				if (err)
4831 					return err;
4832 			}
4833 		} else if (class == BPF_LD) {
4834 			u8 mode = BPF_MODE(insn->code);
4835 
4836 			if (mode == BPF_ABS || mode == BPF_IND) {
4837 				err = check_ld_abs(env, insn);
4838 				if (err)
4839 					return err;
4840 
4841 			} else if (mode == BPF_IMM) {
4842 				err = check_ld_imm(env, insn);
4843 				if (err)
4844 					return err;
4845 
4846 				insn_idx++;
4847 				env->insn_aux_data[insn_idx].seen = true;
4848 			} else {
4849 				verbose(env, "invalid BPF_LD mode\n");
4850 				return -EINVAL;
4851 			}
4852 		} else {
4853 			verbose(env, "unknown insn class %d\n", class);
4854 			return -EINVAL;
4855 		}
4856 
4857 		insn_idx++;
4858 	}
4859 
4860 	verbose(env, "processed %d insns (limit %d), stack depth ",
4861 		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
4862 	for (i = 0; i < env->subprog_cnt + 1; i++) {
4863 		u32 depth = env->subprog_stack_depth[i];
4864 
4865 		verbose(env, "%d", depth);
4866 		if (i + 1 < env->subprog_cnt + 1)
4867 			verbose(env, "+");
4868 	}
4869 	verbose(env, "\n");
4870 	env->prog->aux->stack_depth = env->subprog_stack_depth[0];
4871 	return 0;
4872 }
4873 
4874 static int check_map_prealloc(struct bpf_map *map)
4875 {
4876 	return (map->map_type != BPF_MAP_TYPE_HASH &&
4877 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
4878 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
4879 		!(map->map_flags & BPF_F_NO_PREALLOC);
4880 }
4881 
4882 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
4883 					struct bpf_map *map,
4884 					struct bpf_prog *prog)
4885 
4886 {
4887 	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
4888 	 * preallocated hash maps, since doing memory allocation
4889 	 * in overflow_handler can crash depending on where nmi got
4890 	 * triggered.
4891 	 */
4892 	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
4893 		if (!check_map_prealloc(map)) {
4894 			verbose(env, "perf_event programs can only use preallocated hash map\n");
4895 			return -EINVAL;
4896 		}
4897 		if (map->inner_map_meta &&
4898 		    !check_map_prealloc(map->inner_map_meta)) {
4899 			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
4900 			return -EINVAL;
4901 		}
4902 	}
4903 
4904 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
4905 	    !bpf_offload_dev_match(prog, map)) {
4906 		verbose(env, "offload device mismatch between prog and map\n");
4907 		return -EINVAL;
4908 	}
4909 
4910 	return 0;
4911 }
4912 
4913 /* look for pseudo eBPF instructions that access map FDs and
4914  * replace them with actual map pointers
4915  */
4916 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
4917 {
4918 	struct bpf_insn *insn = env->prog->insnsi;
4919 	int insn_cnt = env->prog->len;
4920 	int i, j, err;
4921 
4922 	err = bpf_prog_calc_tag(env->prog);
4923 	if (err)
4924 		return err;
4925 
4926 	for (i = 0; i < insn_cnt; i++, insn++) {
4927 		if (BPF_CLASS(insn->code) == BPF_LDX &&
4928 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
4929 			verbose(env, "BPF_LDX uses reserved fields\n");
4930 			return -EINVAL;
4931 		}
4932 
4933 		if (BPF_CLASS(insn->code) == BPF_STX &&
4934 		    ((BPF_MODE(insn->code) != BPF_MEM &&
4935 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
4936 			verbose(env, "BPF_STX uses reserved fields\n");
4937 			return -EINVAL;
4938 		}
4939 
4940 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
4941 			struct bpf_map *map;
4942 			struct fd f;
4943 
4944 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
4945 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
4946 			    insn[1].off != 0) {
4947 				verbose(env, "invalid bpf_ld_imm64 insn\n");
4948 				return -EINVAL;
4949 			}
4950 
4951 			if (insn->src_reg == 0)
4952 				/* valid generic load 64-bit imm */
4953 				goto next_insn;
4954 
4955 			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
4956 				verbose(env,
4957 					"unrecognized bpf_ld_imm64 insn\n");
4958 				return -EINVAL;
4959 			}
4960 
4961 			f = fdget(insn->imm);
4962 			map = __bpf_map_get(f);
4963 			if (IS_ERR(map)) {
4964 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
4965 					insn->imm);
4966 				return PTR_ERR(map);
4967 			}
4968 
4969 			err = check_map_prog_compatibility(env, map, env->prog);
4970 			if (err) {
4971 				fdput(f);
4972 				return err;
4973 			}
4974 
4975 			/* store map pointer inside BPF_LD_IMM64 instruction */
4976 			insn[0].imm = (u32) (unsigned long) map;
4977 			insn[1].imm = ((u64) (unsigned long) map) >> 32;
4978 
4979 			/* check whether we recorded this map already */
4980 			for (j = 0; j < env->used_map_cnt; j++)
4981 				if (env->used_maps[j] == map) {
4982 					fdput(f);
4983 					goto next_insn;
4984 				}
4985 
4986 			if (env->used_map_cnt >= MAX_USED_MAPS) {
4987 				fdput(f);
4988 				return -E2BIG;
4989 			}
4990 
4991 			/* hold the map. If the program is rejected by verifier,
4992 			 * the map will be released by release_maps() or it
4993 			 * will be used by the valid program until it's unloaded
4994 			 * and all maps are released in free_bpf_prog_info()
4995 			 */
4996 			map = bpf_map_inc(map, false);
4997 			if (IS_ERR(map)) {
4998 				fdput(f);
4999 				return PTR_ERR(map);
5000 			}
5001 			env->used_maps[env->used_map_cnt++] = map;
5002 
5003 			fdput(f);
5004 next_insn:
5005 			insn++;
5006 			i++;
5007 			continue;
5008 		}
5009 
5010 		/* Basic sanity check before we invest more work here. */
5011 		if (!bpf_opcode_in_insntable(insn->code)) {
5012 			verbose(env, "unknown opcode %02x\n", insn->code);
5013 			return -EINVAL;
5014 		}
5015 	}
5016 
5017 	/* now all pseudo BPF_LD_IMM64 instructions load valid
5018 	 * 'struct bpf_map *' into a register instead of user map_fd.
5019 	 * These pointers will be used later by verifier to validate map access.
5020 	 */
5021 	return 0;
5022 }
5023 
5024 /* drop refcnt of maps used by the rejected program */
5025 static void release_maps(struct bpf_verifier_env *env)
5026 {
5027 	int i;
5028 
5029 	for (i = 0; i < env->used_map_cnt; i++)
5030 		bpf_map_put(env->used_maps[i]);
5031 }
5032 
5033 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
5034 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
5035 {
5036 	struct bpf_insn *insn = env->prog->insnsi;
5037 	int insn_cnt = env->prog->len;
5038 	int i;
5039 
5040 	for (i = 0; i < insn_cnt; i++, insn++)
5041 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
5042 			insn->src_reg = 0;
5043 }
5044 
5045 /* single env->prog->insni[off] instruction was replaced with the range
5046  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
5047  * [0, off) and [off, end) to new locations, so the patched range stays zero
5048  */
5049 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
5050 				u32 off, u32 cnt)
5051 {
5052 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
5053 	int i;
5054 
5055 	if (cnt == 1)
5056 		return 0;
5057 	new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
5058 	if (!new_data)
5059 		return -ENOMEM;
5060 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
5061 	memcpy(new_data + off + cnt - 1, old_data + off,
5062 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
5063 	for (i = off; i < off + cnt - 1; i++)
5064 		new_data[i].seen = true;
5065 	env->insn_aux_data = new_data;
5066 	vfree(old_data);
5067 	return 0;
5068 }
5069 
5070 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
5071 {
5072 	int i;
5073 
5074 	if (len == 1)
5075 		return;
5076 	for (i = 0; i < env->subprog_cnt; i++) {
5077 		if (env->subprog_starts[i] < off)
5078 			continue;
5079 		env->subprog_starts[i] += len - 1;
5080 	}
5081 }
5082 
5083 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
5084 					    const struct bpf_insn *patch, u32 len)
5085 {
5086 	struct bpf_prog *new_prog;
5087 
5088 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
5089 	if (!new_prog)
5090 		return NULL;
5091 	if (adjust_insn_aux_data(env, new_prog->len, off, len))
5092 		return NULL;
5093 	adjust_subprog_starts(env, off, len);
5094 	return new_prog;
5095 }
5096 
5097 /* The verifier does more data flow analysis than llvm and will not
5098  * explore branches that are dead at run time. Malicious programs can
5099  * have dead code too. Therefore replace all dead at-run-time code
5100  * with 'ja -1'.
5101  *
5102  * Just nops are not optimal, e.g. if they would sit at the end of the
5103  * program and through another bug we would manage to jump there, then
5104  * we'd execute beyond program memory otherwise. Returning exception
5105  * code also wouldn't work since we can have subprogs where the dead
5106  * code could be located.
5107  */
5108 static void sanitize_dead_code(struct bpf_verifier_env *env)
5109 {
5110 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
5111 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
5112 	struct bpf_insn *insn = env->prog->insnsi;
5113 	const int insn_cnt = env->prog->len;
5114 	int i;
5115 
5116 	for (i = 0; i < insn_cnt; i++) {
5117 		if (aux_data[i].seen)
5118 			continue;
5119 		memcpy(insn + i, &trap, sizeof(trap));
5120 	}
5121 }
5122 
5123 /* convert load instructions that access fields of 'struct __sk_buff'
5124  * into sequence of instructions that access fields of 'struct sk_buff'
5125  */
5126 static int convert_ctx_accesses(struct bpf_verifier_env *env)
5127 {
5128 	const struct bpf_verifier_ops *ops = env->ops;
5129 	int i, cnt, size, ctx_field_size, delta = 0;
5130 	const int insn_cnt = env->prog->len;
5131 	struct bpf_insn insn_buf[16], *insn;
5132 	struct bpf_prog *new_prog;
5133 	enum bpf_access_type type;
5134 	bool is_narrower_load;
5135 	u32 target_size;
5136 
5137 	if (ops->gen_prologue) {
5138 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
5139 					env->prog);
5140 		if (cnt >= ARRAY_SIZE(insn_buf)) {
5141 			verbose(env, "bpf verifier is misconfigured\n");
5142 			return -EINVAL;
5143 		} else if (cnt) {
5144 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
5145 			if (!new_prog)
5146 				return -ENOMEM;
5147 
5148 			env->prog = new_prog;
5149 			delta += cnt - 1;
5150 		}
5151 	}
5152 
5153 	if (!ops->convert_ctx_access)
5154 		return 0;
5155 
5156 	insn = env->prog->insnsi + delta;
5157 
5158 	for (i = 0; i < insn_cnt; i++, insn++) {
5159 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
5160 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
5161 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
5162 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
5163 			type = BPF_READ;
5164 		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
5165 			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
5166 			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
5167 			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
5168 			type = BPF_WRITE;
5169 		else
5170 			continue;
5171 
5172 		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
5173 			continue;
5174 
5175 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
5176 		size = BPF_LDST_BYTES(insn);
5177 
5178 		/* If the read access is a narrower load of the field,
5179 		 * convert to a 4/8-byte load, to minimum program type specific
5180 		 * convert_ctx_access changes. If conversion is successful,
5181 		 * we will apply proper mask to the result.
5182 		 */
5183 		is_narrower_load = size < ctx_field_size;
5184 		if (is_narrower_load) {
5185 			u32 off = insn->off;
5186 			u8 size_code;
5187 
5188 			if (type == BPF_WRITE) {
5189 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
5190 				return -EINVAL;
5191 			}
5192 
5193 			size_code = BPF_H;
5194 			if (ctx_field_size == 4)
5195 				size_code = BPF_W;
5196 			else if (ctx_field_size == 8)
5197 				size_code = BPF_DW;
5198 
5199 			insn->off = off & ~(ctx_field_size - 1);
5200 			insn->code = BPF_LDX | BPF_MEM | size_code;
5201 		}
5202 
5203 		target_size = 0;
5204 		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
5205 					      &target_size);
5206 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
5207 		    (ctx_field_size && !target_size)) {
5208 			verbose(env, "bpf verifier is misconfigured\n");
5209 			return -EINVAL;
5210 		}
5211 
5212 		if (is_narrower_load && size < target_size) {
5213 			if (ctx_field_size <= 4)
5214 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
5215 								(1 << size * 8) - 1);
5216 			else
5217 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
5218 								(1 << size * 8) - 1);
5219 		}
5220 
5221 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
5222 		if (!new_prog)
5223 			return -ENOMEM;
5224 
5225 		delta += cnt - 1;
5226 
5227 		/* keep walking new program and skip insns we just inserted */
5228 		env->prog = new_prog;
5229 		insn      = new_prog->insnsi + i + delta;
5230 	}
5231 
5232 	return 0;
5233 }
5234 
5235 static int jit_subprogs(struct bpf_verifier_env *env)
5236 {
5237 	struct bpf_prog *prog = env->prog, **func, *tmp;
5238 	int i, j, subprog_start, subprog_end = 0, len, subprog;
5239 	struct bpf_insn *insn;
5240 	void *old_bpf_func;
5241 	int err = -ENOMEM;
5242 
5243 	if (env->subprog_cnt == 0)
5244 		return 0;
5245 
5246 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5247 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5248 		    insn->src_reg != BPF_PSEUDO_CALL)
5249 			continue;
5250 		subprog = find_subprog(env, i + insn->imm + 1);
5251 		if (subprog < 0) {
5252 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5253 				  i + insn->imm + 1);
5254 			return -EFAULT;
5255 		}
5256 		/* temporarily remember subprog id inside insn instead of
5257 		 * aux_data, since next loop will split up all insns into funcs
5258 		 */
5259 		insn->off = subprog + 1;
5260 		/* remember original imm in case JIT fails and fallback
5261 		 * to interpreter will be needed
5262 		 */
5263 		env->insn_aux_data[i].call_imm = insn->imm;
5264 		/* point imm to __bpf_call_base+1 from JITs point of view */
5265 		insn->imm = 1;
5266 	}
5267 
5268 	func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
5269 	if (!func)
5270 		return -ENOMEM;
5271 
5272 	for (i = 0; i <= env->subprog_cnt; i++) {
5273 		subprog_start = subprog_end;
5274 		if (env->subprog_cnt == i)
5275 			subprog_end = prog->len;
5276 		else
5277 			subprog_end = env->subprog_starts[i];
5278 
5279 		len = subprog_end - subprog_start;
5280 		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
5281 		if (!func[i])
5282 			goto out_free;
5283 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
5284 		       len * sizeof(struct bpf_insn));
5285 		func[i]->type = prog->type;
5286 		func[i]->len = len;
5287 		if (bpf_prog_calc_tag(func[i]))
5288 			goto out_free;
5289 		func[i]->is_func = 1;
5290 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
5291 		 * Long term would need debug info to populate names
5292 		 */
5293 		func[i]->aux->name[0] = 'F';
5294 		func[i]->aux->stack_depth = env->subprog_stack_depth[i];
5295 		func[i]->jit_requested = 1;
5296 		func[i] = bpf_int_jit_compile(func[i]);
5297 		if (!func[i]->jited) {
5298 			err = -ENOTSUPP;
5299 			goto out_free;
5300 		}
5301 		cond_resched();
5302 	}
5303 	/* at this point all bpf functions were successfully JITed
5304 	 * now populate all bpf_calls with correct addresses and
5305 	 * run last pass of JIT
5306 	 */
5307 	for (i = 0; i <= env->subprog_cnt; i++) {
5308 		insn = func[i]->insnsi;
5309 		for (j = 0; j < func[i]->len; j++, insn++) {
5310 			if (insn->code != (BPF_JMP | BPF_CALL) ||
5311 			    insn->src_reg != BPF_PSEUDO_CALL)
5312 				continue;
5313 			subprog = insn->off;
5314 			insn->off = 0;
5315 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5316 				func[subprog]->bpf_func -
5317 				__bpf_call_base;
5318 		}
5319 	}
5320 	for (i = 0; i <= env->subprog_cnt; i++) {
5321 		old_bpf_func = func[i]->bpf_func;
5322 		tmp = bpf_int_jit_compile(func[i]);
5323 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
5324 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
5325 			err = -EFAULT;
5326 			goto out_free;
5327 		}
5328 		cond_resched();
5329 	}
5330 
5331 	/* finally lock prog and jit images for all functions and
5332 	 * populate kallsysm
5333 	 */
5334 	for (i = 0; i <= env->subprog_cnt; i++) {
5335 		bpf_prog_lock_ro(func[i]);
5336 		bpf_prog_kallsyms_add(func[i]);
5337 	}
5338 
5339 	/* Last step: make now unused interpreter insns from main
5340 	 * prog consistent for later dump requests, so they can
5341 	 * later look the same as if they were interpreted only.
5342 	 */
5343 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5344 		unsigned long addr;
5345 
5346 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5347 		    insn->src_reg != BPF_PSEUDO_CALL)
5348 			continue;
5349 		insn->off = env->insn_aux_data[i].call_imm;
5350 		subprog = find_subprog(env, i + insn->off + 1);
5351 		addr  = (unsigned long)func[subprog + 1]->bpf_func;
5352 		addr &= PAGE_MASK;
5353 		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5354 			    addr - __bpf_call_base;
5355 	}
5356 
5357 	prog->jited = 1;
5358 	prog->bpf_func = func[0]->bpf_func;
5359 	prog->aux->func = func;
5360 	prog->aux->func_cnt = env->subprog_cnt + 1;
5361 	return 0;
5362 out_free:
5363 	for (i = 0; i <= env->subprog_cnt; i++)
5364 		if (func[i])
5365 			bpf_jit_free(func[i]);
5366 	kfree(func);
5367 	/* cleanup main prog to be interpreted */
5368 	prog->jit_requested = 0;
5369 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5370 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5371 		    insn->src_reg != BPF_PSEUDO_CALL)
5372 			continue;
5373 		insn->off = 0;
5374 		insn->imm = env->insn_aux_data[i].call_imm;
5375 	}
5376 	return err;
5377 }
5378 
5379 static int fixup_call_args(struct bpf_verifier_env *env)
5380 {
5381 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
5382 	struct bpf_prog *prog = env->prog;
5383 	struct bpf_insn *insn = prog->insnsi;
5384 	int i, depth;
5385 #endif
5386 	int err;
5387 
5388 	err = 0;
5389 	if (env->prog->jit_requested) {
5390 		err = jit_subprogs(env);
5391 		if (err == 0)
5392 			return 0;
5393 	}
5394 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
5395 	for (i = 0; i < prog->len; i++, insn++) {
5396 		if (insn->code != (BPF_JMP | BPF_CALL) ||
5397 		    insn->src_reg != BPF_PSEUDO_CALL)
5398 			continue;
5399 		depth = get_callee_stack_depth(env, insn, i);
5400 		if (depth < 0)
5401 			return depth;
5402 		bpf_patch_call_args(insn, depth);
5403 	}
5404 	err = 0;
5405 #endif
5406 	return err;
5407 }
5408 
5409 /* fixup insn->imm field of bpf_call instructions
5410  * and inline eligible helpers as explicit sequence of BPF instructions
5411  *
5412  * this function is called after eBPF program passed verification
5413  */
5414 static int fixup_bpf_calls(struct bpf_verifier_env *env)
5415 {
5416 	struct bpf_prog *prog = env->prog;
5417 	struct bpf_insn *insn = prog->insnsi;
5418 	const struct bpf_func_proto *fn;
5419 	const int insn_cnt = prog->len;
5420 	struct bpf_insn insn_buf[16];
5421 	struct bpf_prog *new_prog;
5422 	struct bpf_map *map_ptr;
5423 	int i, cnt, delta = 0;
5424 
5425 	for (i = 0; i < insn_cnt; i++, insn++) {
5426 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
5427 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
5428 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
5429 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
5430 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
5431 			struct bpf_insn mask_and_div[] = {
5432 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
5433 				/* Rx div 0 -> 0 */
5434 				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
5435 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
5436 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
5437 				*insn,
5438 			};
5439 			struct bpf_insn mask_and_mod[] = {
5440 				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
5441 				/* Rx mod 0 -> Rx */
5442 				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
5443 				*insn,
5444 			};
5445 			struct bpf_insn *patchlet;
5446 
5447 			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
5448 			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
5449 				patchlet = mask_and_div + (is64 ? 1 : 0);
5450 				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
5451 			} else {
5452 				patchlet = mask_and_mod + (is64 ? 1 : 0);
5453 				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
5454 			}
5455 
5456 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
5457 			if (!new_prog)
5458 				return -ENOMEM;
5459 
5460 			delta    += cnt - 1;
5461 			env->prog = prog = new_prog;
5462 			insn      = new_prog->insnsi + i + delta;
5463 			continue;
5464 		}
5465 
5466 		if (insn->code != (BPF_JMP | BPF_CALL))
5467 			continue;
5468 		if (insn->src_reg == BPF_PSEUDO_CALL)
5469 			continue;
5470 
5471 		if (insn->imm == BPF_FUNC_get_route_realm)
5472 			prog->dst_needed = 1;
5473 		if (insn->imm == BPF_FUNC_get_prandom_u32)
5474 			bpf_user_rnd_init_once();
5475 		if (insn->imm == BPF_FUNC_override_return)
5476 			prog->kprobe_override = 1;
5477 		if (insn->imm == BPF_FUNC_tail_call) {
5478 			/* If we tail call into other programs, we
5479 			 * cannot make any assumptions since they can
5480 			 * be replaced dynamically during runtime in
5481 			 * the program array.
5482 			 */
5483 			prog->cb_access = 1;
5484 			env->prog->aux->stack_depth = MAX_BPF_STACK;
5485 
5486 			/* mark bpf_tail_call as different opcode to avoid
5487 			 * conditional branch in the interpeter for every normal
5488 			 * call and to prevent accidental JITing by JIT compiler
5489 			 * that doesn't support bpf_tail_call yet
5490 			 */
5491 			insn->imm = 0;
5492 			insn->code = BPF_JMP | BPF_TAIL_CALL;
5493 
5494 			/* instead of changing every JIT dealing with tail_call
5495 			 * emit two extra insns:
5496 			 * if (index >= max_entries) goto out;
5497 			 * index &= array->index_mask;
5498 			 * to avoid out-of-bounds cpu speculation
5499 			 */
5500 			map_ptr = env->insn_aux_data[i + delta].map_ptr;
5501 			if (map_ptr == BPF_MAP_PTR_POISON) {
5502 				verbose(env, "tail_call abusing map_ptr\n");
5503 				return -EINVAL;
5504 			}
5505 			if (!map_ptr->unpriv_array)
5506 				continue;
5507 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
5508 						  map_ptr->max_entries, 2);
5509 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
5510 						    container_of(map_ptr,
5511 								 struct bpf_array,
5512 								 map)->index_mask);
5513 			insn_buf[2] = *insn;
5514 			cnt = 3;
5515 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
5516 			if (!new_prog)
5517 				return -ENOMEM;
5518 
5519 			delta    += cnt - 1;
5520 			env->prog = prog = new_prog;
5521 			insn      = new_prog->insnsi + i + delta;
5522 			continue;
5523 		}
5524 
5525 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
5526 		 * handlers are currently limited to 64 bit only.
5527 		 */
5528 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
5529 		    insn->imm == BPF_FUNC_map_lookup_elem) {
5530 			map_ptr = env->insn_aux_data[i + delta].map_ptr;
5531 			if (map_ptr == BPF_MAP_PTR_POISON ||
5532 			    !map_ptr->ops->map_gen_lookup)
5533 				goto patch_call_imm;
5534 
5535 			cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
5536 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
5537 				verbose(env, "bpf verifier is misconfigured\n");
5538 				return -EINVAL;
5539 			}
5540 
5541 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
5542 						       cnt);
5543 			if (!new_prog)
5544 				return -ENOMEM;
5545 
5546 			delta += cnt - 1;
5547 
5548 			/* keep walking new program and skip insns we just inserted */
5549 			env->prog = prog = new_prog;
5550 			insn      = new_prog->insnsi + i + delta;
5551 			continue;
5552 		}
5553 
5554 		if (insn->imm == BPF_FUNC_redirect_map) {
5555 			/* Note, we cannot use prog directly as imm as subsequent
5556 			 * rewrites would still change the prog pointer. The only
5557 			 * stable address we can use is aux, which also works with
5558 			 * prog clones during blinding.
5559 			 */
5560 			u64 addr = (unsigned long)prog->aux;
5561 			struct bpf_insn r4_ld[] = {
5562 				BPF_LD_IMM64(BPF_REG_4, addr),
5563 				*insn,
5564 			};
5565 			cnt = ARRAY_SIZE(r4_ld);
5566 
5567 			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
5568 			if (!new_prog)
5569 				return -ENOMEM;
5570 
5571 			delta    += cnt - 1;
5572 			env->prog = prog = new_prog;
5573 			insn      = new_prog->insnsi + i + delta;
5574 		}
5575 patch_call_imm:
5576 		fn = env->ops->get_func_proto(insn->imm, env->prog);
5577 		/* all functions that have prototype and verifier allowed
5578 		 * programs to call them, must be real in-kernel functions
5579 		 */
5580 		if (!fn->func) {
5581 			verbose(env,
5582 				"kernel subsystem misconfigured func %s#%d\n",
5583 				func_id_name(insn->imm), insn->imm);
5584 			return -EFAULT;
5585 		}
5586 		insn->imm = fn->func - __bpf_call_base;
5587 	}
5588 
5589 	return 0;
5590 }
5591 
5592 static void free_states(struct bpf_verifier_env *env)
5593 {
5594 	struct bpf_verifier_state_list *sl, *sln;
5595 	int i;
5596 
5597 	if (!env->explored_states)
5598 		return;
5599 
5600 	for (i = 0; i < env->prog->len; i++) {
5601 		sl = env->explored_states[i];
5602 
5603 		if (sl)
5604 			while (sl != STATE_LIST_MARK) {
5605 				sln = sl->next;
5606 				free_verifier_state(&sl->state, false);
5607 				kfree(sl);
5608 				sl = sln;
5609 			}
5610 	}
5611 
5612 	kfree(env->explored_states);
5613 }
5614 
5615 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5616 {
5617 	struct bpf_verifier_env *env;
5618 	struct bpf_verifier_log *log;
5619 	int ret = -EINVAL;
5620 
5621 	/* no program is valid */
5622 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
5623 		return -EINVAL;
5624 
5625 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
5626 	 * allocate/free it every time bpf_check() is called
5627 	 */
5628 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
5629 	if (!env)
5630 		return -ENOMEM;
5631 	log = &env->log;
5632 
5633 	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
5634 				     (*prog)->len);
5635 	ret = -ENOMEM;
5636 	if (!env->insn_aux_data)
5637 		goto err_free_env;
5638 	env->prog = *prog;
5639 	env->ops = bpf_verifier_ops[env->prog->type];
5640 
5641 	/* grab the mutex to protect few globals used by verifier */
5642 	mutex_lock(&bpf_verifier_lock);
5643 
5644 	if (attr->log_level || attr->log_buf || attr->log_size) {
5645 		/* user requested verbose verifier output
5646 		 * and supplied buffer to store the verification trace
5647 		 */
5648 		log->level = attr->log_level;
5649 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
5650 		log->len_total = attr->log_size;
5651 
5652 		ret = -EINVAL;
5653 		/* log attributes have to be sane */
5654 		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
5655 		    !log->level || !log->ubuf)
5656 			goto err_unlock;
5657 	}
5658 
5659 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
5660 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
5661 		env->strict_alignment = true;
5662 
5663 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
5664 		ret = bpf_prog_offload_verifier_prep(env);
5665 		if (ret)
5666 			goto err_unlock;
5667 	}
5668 
5669 	ret = replace_map_fd_with_map_ptr(env);
5670 	if (ret < 0)
5671 		goto skip_full_check;
5672 
5673 	env->explored_states = kcalloc(env->prog->len,
5674 				       sizeof(struct bpf_verifier_state_list *),
5675 				       GFP_USER);
5676 	ret = -ENOMEM;
5677 	if (!env->explored_states)
5678 		goto skip_full_check;
5679 
5680 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
5681 
5682 	ret = check_cfg(env);
5683 	if (ret < 0)
5684 		goto skip_full_check;
5685 
5686 	ret = do_check(env);
5687 	if (env->cur_state) {
5688 		free_verifier_state(env->cur_state, true);
5689 		env->cur_state = NULL;
5690 	}
5691 
5692 skip_full_check:
5693 	while (!pop_stack(env, NULL, NULL));
5694 	free_states(env);
5695 
5696 	if (ret == 0)
5697 		sanitize_dead_code(env);
5698 
5699 	if (ret == 0)
5700 		ret = check_max_stack_depth(env);
5701 
5702 	if (ret == 0)
5703 		/* program is valid, convert *(u32*)(ctx + off) accesses */
5704 		ret = convert_ctx_accesses(env);
5705 
5706 	if (ret == 0)
5707 		ret = fixup_bpf_calls(env);
5708 
5709 	if (ret == 0)
5710 		ret = fixup_call_args(env);
5711 
5712 	if (log->level && bpf_verifier_log_full(log))
5713 		ret = -ENOSPC;
5714 	if (log->level && !log->ubuf) {
5715 		ret = -EFAULT;
5716 		goto err_release_maps;
5717 	}
5718 
5719 	if (ret == 0 && env->used_map_cnt) {
5720 		/* if program passed verifier, update used_maps in bpf_prog_info */
5721 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
5722 							  sizeof(env->used_maps[0]),
5723 							  GFP_KERNEL);
5724 
5725 		if (!env->prog->aux->used_maps) {
5726 			ret = -ENOMEM;
5727 			goto err_release_maps;
5728 		}
5729 
5730 		memcpy(env->prog->aux->used_maps, env->used_maps,
5731 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
5732 		env->prog->aux->used_map_cnt = env->used_map_cnt;
5733 
5734 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
5735 		 * bpf_ld_imm64 instructions
5736 		 */
5737 		convert_pseudo_ld_imm64(env);
5738 	}
5739 
5740 err_release_maps:
5741 	if (!env->prog->aux->used_maps)
5742 		/* if we didn't copy map pointers into bpf_prog_info, release
5743 		 * them now. Otherwise free_bpf_prog_info() will release them.
5744 		 */
5745 		release_maps(env);
5746 	*prog = env->prog;
5747 err_unlock:
5748 	mutex_unlock(&bpf_verifier_lock);
5749 	vfree(env->insn_aux_data);
5750 err_free_env:
5751 	kfree(env);
5752 	return ret;
5753 }
5754