1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 5 */ 6 #include <uapi/linux/btf.h> 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/slab.h> 10 #include <linux/bpf.h> 11 #include <linux/btf.h> 12 #include <linux/bpf_verifier.h> 13 #include <linux/filter.h> 14 #include <net/netlink.h> 15 #include <linux/file.h> 16 #include <linux/vmalloc.h> 17 #include <linux/stringify.h> 18 #include <linux/bsearch.h> 19 #include <linux/sort.h> 20 #include <linux/perf_event.h> 21 #include <linux/ctype.h> 22 23 #include "disasm.h" 24 25 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 26 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 27 [_id] = & _name ## _verifier_ops, 28 #define BPF_MAP_TYPE(_id, _ops) 29 #include <linux/bpf_types.h> 30 #undef BPF_PROG_TYPE 31 #undef BPF_MAP_TYPE 32 }; 33 34 /* bpf_check() is a static code analyzer that walks eBPF program 35 * instruction by instruction and updates register/stack state. 36 * All paths of conditional branches are analyzed until 'bpf_exit' insn. 37 * 38 * The first pass is depth-first-search to check that the program is a DAG. 39 * It rejects the following programs: 40 * - larger than BPF_MAXINSNS insns 41 * - if loop is present (detected via back-edge) 42 * - unreachable insns exist (shouldn't be a forest. program = one function) 43 * - out of bounds or malformed jumps 44 * The second pass is all possible path descent from the 1st insn. 45 * Since it's analyzing all pathes through the program, the length of the 46 * analysis is limited to 64k insn, which may be hit even if total number of 47 * insn is less then 4K, but there are too many branches that change stack/regs. 48 * Number of 'branches to be analyzed' is limited to 1k 49 * 50 * On entry to each instruction, each register has a type, and the instruction 51 * changes the types of the registers depending on instruction semantics. 52 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is 53 * copied to R1. 54 * 55 * All registers are 64-bit. 56 * R0 - return register 57 * R1-R5 argument passing registers 58 * R6-R9 callee saved registers 59 * R10 - frame pointer read-only 60 * 61 * At the start of BPF program the register R1 contains a pointer to bpf_context 62 * and has type PTR_TO_CTX. 63 * 64 * Verifier tracks arithmetic operations on pointers in case: 65 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 66 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), 67 * 1st insn copies R10 (which has FRAME_PTR) type into R1 68 * and 2nd arithmetic instruction is pattern matched to recognize 69 * that it wants to construct a pointer to some element within stack. 70 * So after 2nd insn, the register R1 has type PTR_TO_STACK 71 * (and -20 constant is saved for further stack bounds checking). 72 * Meaning that this reg is a pointer to stack plus known immediate constant. 73 * 74 * Most of the time the registers have SCALAR_VALUE type, which 75 * means the register has some value, but it's not a valid pointer. 76 * (like pointer plus pointer becomes SCALAR_VALUE type) 77 * 78 * When verifier sees load or store instructions the type of base register 79 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are 80 * four pointer types recognized by check_mem_access() function. 81 * 82 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' 83 * and the range of [ptr, ptr + map's value_size) is accessible. 84 * 85 * registers used to pass values to function calls are checked against 86 * function argument constraints. 87 * 88 * ARG_PTR_TO_MAP_KEY is one of such argument constraints. 89 * It means that the register type passed to this function must be 90 * PTR_TO_STACK and it will be used inside the function as 91 * 'pointer to map element key' 92 * 93 * For example the argument constraints for bpf_map_lookup_elem(): 94 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 95 * .arg1_type = ARG_CONST_MAP_PTR, 96 * .arg2_type = ARG_PTR_TO_MAP_KEY, 97 * 98 * ret_type says that this function returns 'pointer to map elem value or null' 99 * function expects 1st argument to be a const pointer to 'struct bpf_map' and 100 * 2nd argument should be a pointer to stack, which will be used inside 101 * the helper function as a pointer to map element key. 102 * 103 * On the kernel side the helper function looks like: 104 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 105 * { 106 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 107 * void *key = (void *) (unsigned long) r2; 108 * void *value; 109 * 110 * here kernel can access 'key' and 'map' pointers safely, knowing that 111 * [key, key + map->key_size) bytes are valid and were initialized on 112 * the stack of eBPF program. 113 * } 114 * 115 * Corresponding eBPF program may look like: 116 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR 117 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK 118 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP 119 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 120 * here verifier looks at prototype of map_lookup_elem() and sees: 121 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, 122 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes 123 * 124 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, 125 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits 126 * and were initialized prior to this call. 127 * If it's ok, then verifier allows this BPF_CALL insn and looks at 128 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets 129 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function 130 * returns ether pointer to map value or NULL. 131 * 132 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' 133 * insn, the register holding that pointer in the true branch changes state to 134 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false 135 * branch. See check_cond_jmp_op(). 136 * 137 * After the call R0 is set to return type of the function and registers R1-R5 138 * are set to NOT_INIT to indicate that they are no longer readable. 139 * 140 * The following reference types represent a potential reference to a kernel 141 * resource which, after first being allocated, must be checked and freed by 142 * the BPF program: 143 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET 144 * 145 * When the verifier sees a helper call return a reference type, it allocates a 146 * pointer id for the reference and stores it in the current function state. 147 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into 148 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type 149 * passes through a NULL-check conditional. For the branch wherein the state is 150 * changed to CONST_IMM, the verifier releases the reference. 151 * 152 * For each helper function that allocates a reference, such as 153 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as 154 * bpf_sk_release(). When a reference type passes into the release function, 155 * the verifier also releases the reference. If any unchecked or unreleased 156 * reference remains at the end of the program, the verifier rejects it. 157 */ 158 159 /* verifier_state + insn_idx are pushed to stack when branch is encountered */ 160 struct bpf_verifier_stack_elem { 161 /* verifer state is 'st' 162 * before processing instruction 'insn_idx' 163 * and after processing instruction 'prev_insn_idx' 164 */ 165 struct bpf_verifier_state st; 166 int insn_idx; 167 int prev_insn_idx; 168 struct bpf_verifier_stack_elem *next; 169 }; 170 171 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 172 #define BPF_COMPLEXITY_LIMIT_STATES 64 173 174 #define BPF_MAP_KEY_POISON (1ULL << 63) 175 #define BPF_MAP_KEY_SEEN (1ULL << 62) 176 177 #define BPF_MAP_PTR_UNPRIV 1UL 178 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ 179 POISON_POINTER_DELTA)) 180 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) 181 182 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 183 { 184 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; 185 } 186 187 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) 188 { 189 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; 190 } 191 192 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 193 const struct bpf_map *map, bool unpriv) 194 { 195 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); 196 unpriv |= bpf_map_ptr_unpriv(aux); 197 aux->map_ptr_state = (unsigned long)map | 198 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); 199 } 200 201 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) 202 { 203 return aux->map_key_state & BPF_MAP_KEY_POISON; 204 } 205 206 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) 207 { 208 return !(aux->map_key_state & BPF_MAP_KEY_SEEN); 209 } 210 211 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) 212 { 213 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); 214 } 215 216 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) 217 { 218 bool poisoned = bpf_map_key_poisoned(aux); 219 220 aux->map_key_state = state | BPF_MAP_KEY_SEEN | 221 (poisoned ? BPF_MAP_KEY_POISON : 0ULL); 222 } 223 224 struct bpf_call_arg_meta { 225 struct bpf_map *map_ptr; 226 bool raw_mode; 227 bool pkt_access; 228 int regno; 229 int access_size; 230 s64 msize_smax_value; 231 u64 msize_umax_value; 232 int ref_obj_id; 233 int func_id; 234 u32 btf_id; 235 }; 236 237 struct btf *btf_vmlinux; 238 239 static DEFINE_MUTEX(bpf_verifier_lock); 240 241 static const struct bpf_line_info * 242 find_linfo(const struct bpf_verifier_env *env, u32 insn_off) 243 { 244 const struct bpf_line_info *linfo; 245 const struct bpf_prog *prog; 246 u32 i, nr_linfo; 247 248 prog = env->prog; 249 nr_linfo = prog->aux->nr_linfo; 250 251 if (!nr_linfo || insn_off >= prog->len) 252 return NULL; 253 254 linfo = prog->aux->linfo; 255 for (i = 1; i < nr_linfo; i++) 256 if (insn_off < linfo[i].insn_off) 257 break; 258 259 return &linfo[i - 1]; 260 } 261 262 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, 263 va_list args) 264 { 265 unsigned int n; 266 267 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 268 269 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 270 "verifier log line truncated - local buffer too short\n"); 271 272 n = min(log->len_total - log->len_used - 1, n); 273 log->kbuf[n] = '\0'; 274 275 if (log->level == BPF_LOG_KERNEL) { 276 pr_err("BPF:%s\n", log->kbuf); 277 return; 278 } 279 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) 280 log->len_used += n; 281 else 282 log->ubuf = NULL; 283 } 284 285 /* log_level controls verbosity level of eBPF verifier. 286 * bpf_verifier_log_write() is used to dump the verification trace to the log, 287 * so the user can figure out what's wrong with the program 288 */ 289 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, 290 const char *fmt, ...) 291 { 292 va_list args; 293 294 if (!bpf_verifier_log_needed(&env->log)) 295 return; 296 297 va_start(args, fmt); 298 bpf_verifier_vlog(&env->log, fmt, args); 299 va_end(args); 300 } 301 EXPORT_SYMBOL_GPL(bpf_verifier_log_write); 302 303 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) 304 { 305 struct bpf_verifier_env *env = private_data; 306 va_list args; 307 308 if (!bpf_verifier_log_needed(&env->log)) 309 return; 310 311 va_start(args, fmt); 312 bpf_verifier_vlog(&env->log, fmt, args); 313 va_end(args); 314 } 315 316 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, 317 const char *fmt, ...) 318 { 319 va_list args; 320 321 if (!bpf_verifier_log_needed(log)) 322 return; 323 324 va_start(args, fmt); 325 bpf_verifier_vlog(log, fmt, args); 326 va_end(args); 327 } 328 329 static const char *ltrim(const char *s) 330 { 331 while (isspace(*s)) 332 s++; 333 334 return s; 335 } 336 337 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, 338 u32 insn_off, 339 const char *prefix_fmt, ...) 340 { 341 const struct bpf_line_info *linfo; 342 343 if (!bpf_verifier_log_needed(&env->log)) 344 return; 345 346 linfo = find_linfo(env, insn_off); 347 if (!linfo || linfo == env->prev_linfo) 348 return; 349 350 if (prefix_fmt) { 351 va_list args; 352 353 va_start(args, prefix_fmt); 354 bpf_verifier_vlog(&env->log, prefix_fmt, args); 355 va_end(args); 356 } 357 358 verbose(env, "%s\n", 359 ltrim(btf_name_by_offset(env->prog->aux->btf, 360 linfo->line_off))); 361 362 env->prev_linfo = linfo; 363 } 364 365 static bool type_is_pkt_pointer(enum bpf_reg_type type) 366 { 367 return type == PTR_TO_PACKET || 368 type == PTR_TO_PACKET_META; 369 } 370 371 static bool type_is_sk_pointer(enum bpf_reg_type type) 372 { 373 return type == PTR_TO_SOCKET || 374 type == PTR_TO_SOCK_COMMON || 375 type == PTR_TO_TCP_SOCK || 376 type == PTR_TO_XDP_SOCK; 377 } 378 379 static bool reg_type_may_be_null(enum bpf_reg_type type) 380 { 381 return type == PTR_TO_MAP_VALUE_OR_NULL || 382 type == PTR_TO_SOCKET_OR_NULL || 383 type == PTR_TO_SOCK_COMMON_OR_NULL || 384 type == PTR_TO_TCP_SOCK_OR_NULL; 385 } 386 387 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 388 { 389 return reg->type == PTR_TO_MAP_VALUE && 390 map_value_has_spin_lock(reg->map_ptr); 391 } 392 393 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) 394 { 395 return type == PTR_TO_SOCKET || 396 type == PTR_TO_SOCKET_OR_NULL || 397 type == PTR_TO_TCP_SOCK || 398 type == PTR_TO_TCP_SOCK_OR_NULL; 399 } 400 401 static bool arg_type_may_be_refcounted(enum bpf_arg_type type) 402 { 403 return type == ARG_PTR_TO_SOCK_COMMON; 404 } 405 406 /* Determine whether the function releases some resources allocated by another 407 * function call. The first reference type argument will be assumed to be 408 * released by release_reference(). 409 */ 410 static bool is_release_function(enum bpf_func_id func_id) 411 { 412 return func_id == BPF_FUNC_sk_release; 413 } 414 415 static bool is_acquire_function(enum bpf_func_id func_id) 416 { 417 return func_id == BPF_FUNC_sk_lookup_tcp || 418 func_id == BPF_FUNC_sk_lookup_udp || 419 func_id == BPF_FUNC_skc_lookup_tcp; 420 } 421 422 static bool is_ptr_cast_function(enum bpf_func_id func_id) 423 { 424 return func_id == BPF_FUNC_tcp_sock || 425 func_id == BPF_FUNC_sk_fullsock; 426 } 427 428 /* string representation of 'enum bpf_reg_type' */ 429 static const char * const reg_type_str[] = { 430 [NOT_INIT] = "?", 431 [SCALAR_VALUE] = "inv", 432 [PTR_TO_CTX] = "ctx", 433 [CONST_PTR_TO_MAP] = "map_ptr", 434 [PTR_TO_MAP_VALUE] = "map_value", 435 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", 436 [PTR_TO_STACK] = "fp", 437 [PTR_TO_PACKET] = "pkt", 438 [PTR_TO_PACKET_META] = "pkt_meta", 439 [PTR_TO_PACKET_END] = "pkt_end", 440 [PTR_TO_FLOW_KEYS] = "flow_keys", 441 [PTR_TO_SOCKET] = "sock", 442 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 443 [PTR_TO_SOCK_COMMON] = "sock_common", 444 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", 445 [PTR_TO_TCP_SOCK] = "tcp_sock", 446 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 447 [PTR_TO_TP_BUFFER] = "tp_buffer", 448 [PTR_TO_XDP_SOCK] = "xdp_sock", 449 [PTR_TO_BTF_ID] = "ptr_", 450 }; 451 452 static char slot_type_char[] = { 453 [STACK_INVALID] = '?', 454 [STACK_SPILL] = 'r', 455 [STACK_MISC] = 'm', 456 [STACK_ZERO] = '0', 457 }; 458 459 static void print_liveness(struct bpf_verifier_env *env, 460 enum bpf_reg_liveness live) 461 { 462 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) 463 verbose(env, "_"); 464 if (live & REG_LIVE_READ) 465 verbose(env, "r"); 466 if (live & REG_LIVE_WRITTEN) 467 verbose(env, "w"); 468 if (live & REG_LIVE_DONE) 469 verbose(env, "D"); 470 } 471 472 static struct bpf_func_state *func(struct bpf_verifier_env *env, 473 const struct bpf_reg_state *reg) 474 { 475 struct bpf_verifier_state *cur = env->cur_state; 476 477 return cur->frame[reg->frameno]; 478 } 479 480 const char *kernel_type_name(u32 id) 481 { 482 return btf_name_by_offset(btf_vmlinux, 483 btf_type_by_id(btf_vmlinux, id)->name_off); 484 } 485 486 static void print_verifier_state(struct bpf_verifier_env *env, 487 const struct bpf_func_state *state) 488 { 489 const struct bpf_reg_state *reg; 490 enum bpf_reg_type t; 491 int i; 492 493 if (state->frameno) 494 verbose(env, " frame%d:", state->frameno); 495 for (i = 0; i < MAX_BPF_REG; i++) { 496 reg = &state->regs[i]; 497 t = reg->type; 498 if (t == NOT_INIT) 499 continue; 500 verbose(env, " R%d", i); 501 print_liveness(env, reg->live); 502 verbose(env, "=%s", reg_type_str[t]); 503 if (t == SCALAR_VALUE && reg->precise) 504 verbose(env, "P"); 505 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && 506 tnum_is_const(reg->var_off)) { 507 /* reg->off should be 0 for SCALAR_VALUE */ 508 verbose(env, "%lld", reg->var_off.value + reg->off); 509 } else { 510 if (t == PTR_TO_BTF_ID) 511 verbose(env, "%s", kernel_type_name(reg->btf_id)); 512 verbose(env, "(id=%d", reg->id); 513 if (reg_type_may_be_refcounted_or_null(t)) 514 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); 515 if (t != SCALAR_VALUE) 516 verbose(env, ",off=%d", reg->off); 517 if (type_is_pkt_pointer(t)) 518 verbose(env, ",r=%d", reg->range); 519 else if (t == CONST_PTR_TO_MAP || 520 t == PTR_TO_MAP_VALUE || 521 t == PTR_TO_MAP_VALUE_OR_NULL) 522 verbose(env, ",ks=%d,vs=%d", 523 reg->map_ptr->key_size, 524 reg->map_ptr->value_size); 525 if (tnum_is_const(reg->var_off)) { 526 /* Typically an immediate SCALAR_VALUE, but 527 * could be a pointer whose offset is too big 528 * for reg->off 529 */ 530 verbose(env, ",imm=%llx", reg->var_off.value); 531 } else { 532 if (reg->smin_value != reg->umin_value && 533 reg->smin_value != S64_MIN) 534 verbose(env, ",smin_value=%lld", 535 (long long)reg->smin_value); 536 if (reg->smax_value != reg->umax_value && 537 reg->smax_value != S64_MAX) 538 verbose(env, ",smax_value=%lld", 539 (long long)reg->smax_value); 540 if (reg->umin_value != 0) 541 verbose(env, ",umin_value=%llu", 542 (unsigned long long)reg->umin_value); 543 if (reg->umax_value != U64_MAX) 544 verbose(env, ",umax_value=%llu", 545 (unsigned long long)reg->umax_value); 546 if (!tnum_is_unknown(reg->var_off)) { 547 char tn_buf[48]; 548 549 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 550 verbose(env, ",var_off=%s", tn_buf); 551 } 552 } 553 verbose(env, ")"); 554 } 555 } 556 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 557 char types_buf[BPF_REG_SIZE + 1]; 558 bool valid = false; 559 int j; 560 561 for (j = 0; j < BPF_REG_SIZE; j++) { 562 if (state->stack[i].slot_type[j] != STACK_INVALID) 563 valid = true; 564 types_buf[j] = slot_type_char[ 565 state->stack[i].slot_type[j]]; 566 } 567 types_buf[BPF_REG_SIZE] = 0; 568 if (!valid) 569 continue; 570 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); 571 print_liveness(env, state->stack[i].spilled_ptr.live); 572 if (state->stack[i].slot_type[0] == STACK_SPILL) { 573 reg = &state->stack[i].spilled_ptr; 574 t = reg->type; 575 verbose(env, "=%s", reg_type_str[t]); 576 if (t == SCALAR_VALUE && reg->precise) 577 verbose(env, "P"); 578 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) 579 verbose(env, "%lld", reg->var_off.value + reg->off); 580 } else { 581 verbose(env, "=%s", types_buf); 582 } 583 } 584 if (state->acquired_refs && state->refs[0].id) { 585 verbose(env, " refs=%d", state->refs[0].id); 586 for (i = 1; i < state->acquired_refs; i++) 587 if (state->refs[i].id) 588 verbose(env, ",%d", state->refs[i].id); 589 } 590 verbose(env, "\n"); 591 } 592 593 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 594 static int copy_##NAME##_state(struct bpf_func_state *dst, \ 595 const struct bpf_func_state *src) \ 596 { \ 597 if (!src->FIELD) \ 598 return 0; \ 599 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ 600 /* internal bug, make state invalid to reject the program */ \ 601 memset(dst, 0, sizeof(*dst)); \ 602 return -EFAULT; \ 603 } \ 604 memcpy(dst->FIELD, src->FIELD, \ 605 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ 606 return 0; \ 607 } 608 /* copy_reference_state() */ 609 COPY_STATE_FN(reference, acquired_refs, refs, 1) 610 /* copy_stack_state() */ 611 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 612 #undef COPY_STATE_FN 613 614 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 615 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ 616 bool copy_old) \ 617 { \ 618 u32 old_size = state->COUNT; \ 619 struct bpf_##NAME##_state *new_##FIELD; \ 620 int slot = size / SIZE; \ 621 \ 622 if (size <= old_size || !size) { \ 623 if (copy_old) \ 624 return 0; \ 625 state->COUNT = slot * SIZE; \ 626 if (!size && old_size) { \ 627 kfree(state->FIELD); \ 628 state->FIELD = NULL; \ 629 } \ 630 return 0; \ 631 } \ 632 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ 633 GFP_KERNEL); \ 634 if (!new_##FIELD) \ 635 return -ENOMEM; \ 636 if (copy_old) { \ 637 if (state->FIELD) \ 638 memcpy(new_##FIELD, state->FIELD, \ 639 sizeof(*new_##FIELD) * (old_size / SIZE)); \ 640 memset(new_##FIELD + old_size / SIZE, 0, \ 641 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ 642 } \ 643 state->COUNT = slot * SIZE; \ 644 kfree(state->FIELD); \ 645 state->FIELD = new_##FIELD; \ 646 return 0; \ 647 } 648 /* realloc_reference_state() */ 649 REALLOC_STATE_FN(reference, acquired_refs, refs, 1) 650 /* realloc_stack_state() */ 651 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 652 #undef REALLOC_STATE_FN 653 654 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to 655 * make it consume minimal amount of memory. check_stack_write() access from 656 * the program calls into realloc_func_state() to grow the stack size. 657 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state 658 * which realloc_stack_state() copies over. It points to previous 659 * bpf_verifier_state which is never reallocated. 660 */ 661 static int realloc_func_state(struct bpf_func_state *state, int stack_size, 662 int refs_size, bool copy_old) 663 { 664 int err = realloc_reference_state(state, refs_size, copy_old); 665 if (err) 666 return err; 667 return realloc_stack_state(state, stack_size, copy_old); 668 } 669 670 /* Acquire a pointer id from the env and update the state->refs to include 671 * this new pointer reference. 672 * On success, returns a valid pointer id to associate with the register 673 * On failure, returns a negative errno. 674 */ 675 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) 676 { 677 struct bpf_func_state *state = cur_func(env); 678 int new_ofs = state->acquired_refs; 679 int id, err; 680 681 err = realloc_reference_state(state, state->acquired_refs + 1, true); 682 if (err) 683 return err; 684 id = ++env->id_gen; 685 state->refs[new_ofs].id = id; 686 state->refs[new_ofs].insn_idx = insn_idx; 687 688 return id; 689 } 690 691 /* release function corresponding to acquire_reference_state(). Idempotent. */ 692 static int release_reference_state(struct bpf_func_state *state, int ptr_id) 693 { 694 int i, last_idx; 695 696 last_idx = state->acquired_refs - 1; 697 for (i = 0; i < state->acquired_refs; i++) { 698 if (state->refs[i].id == ptr_id) { 699 if (last_idx && i != last_idx) 700 memcpy(&state->refs[i], &state->refs[last_idx], 701 sizeof(*state->refs)); 702 memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 703 state->acquired_refs--; 704 return 0; 705 } 706 } 707 return -EINVAL; 708 } 709 710 static int transfer_reference_state(struct bpf_func_state *dst, 711 struct bpf_func_state *src) 712 { 713 int err = realloc_reference_state(dst, src->acquired_refs, false); 714 if (err) 715 return err; 716 err = copy_reference_state(dst, src); 717 if (err) 718 return err; 719 return 0; 720 } 721 722 static void free_func_state(struct bpf_func_state *state) 723 { 724 if (!state) 725 return; 726 kfree(state->refs); 727 kfree(state->stack); 728 kfree(state); 729 } 730 731 static void clear_jmp_history(struct bpf_verifier_state *state) 732 { 733 kfree(state->jmp_history); 734 state->jmp_history = NULL; 735 state->jmp_history_cnt = 0; 736 } 737 738 static void free_verifier_state(struct bpf_verifier_state *state, 739 bool free_self) 740 { 741 int i; 742 743 for (i = 0; i <= state->curframe; i++) { 744 free_func_state(state->frame[i]); 745 state->frame[i] = NULL; 746 } 747 clear_jmp_history(state); 748 if (free_self) 749 kfree(state); 750 } 751 752 /* copy verifier state from src to dst growing dst stack space 753 * when necessary to accommodate larger src stack 754 */ 755 static int copy_func_state(struct bpf_func_state *dst, 756 const struct bpf_func_state *src) 757 { 758 int err; 759 760 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, 761 false); 762 if (err) 763 return err; 764 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); 765 err = copy_reference_state(dst, src); 766 if (err) 767 return err; 768 return copy_stack_state(dst, src); 769 } 770 771 static int copy_verifier_state(struct bpf_verifier_state *dst_state, 772 const struct bpf_verifier_state *src) 773 { 774 struct bpf_func_state *dst; 775 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; 776 int i, err; 777 778 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { 779 kfree(dst_state->jmp_history); 780 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); 781 if (!dst_state->jmp_history) 782 return -ENOMEM; 783 } 784 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); 785 dst_state->jmp_history_cnt = src->jmp_history_cnt; 786 787 /* if dst has more stack frames then src frame, free them */ 788 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 789 free_func_state(dst_state->frame[i]); 790 dst_state->frame[i] = NULL; 791 } 792 dst_state->speculative = src->speculative; 793 dst_state->curframe = src->curframe; 794 dst_state->active_spin_lock = src->active_spin_lock; 795 dst_state->branches = src->branches; 796 dst_state->parent = src->parent; 797 dst_state->first_insn_idx = src->first_insn_idx; 798 dst_state->last_insn_idx = src->last_insn_idx; 799 for (i = 0; i <= src->curframe; i++) { 800 dst = dst_state->frame[i]; 801 if (!dst) { 802 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 803 if (!dst) 804 return -ENOMEM; 805 dst_state->frame[i] = dst; 806 } 807 err = copy_func_state(dst, src->frame[i]); 808 if (err) 809 return err; 810 } 811 return 0; 812 } 813 814 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 815 { 816 while (st) { 817 u32 br = --st->branches; 818 819 /* WARN_ON(br > 1) technically makes sense here, 820 * but see comment in push_stack(), hence: 821 */ 822 WARN_ONCE((int)br < 0, 823 "BUG update_branch_counts:branches_to_explore=%d\n", 824 br); 825 if (br) 826 break; 827 st = st->parent; 828 } 829 } 830 831 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 832 int *insn_idx) 833 { 834 struct bpf_verifier_state *cur = env->cur_state; 835 struct bpf_verifier_stack_elem *elem, *head = env->head; 836 int err; 837 838 if (env->head == NULL) 839 return -ENOENT; 840 841 if (cur) { 842 err = copy_verifier_state(cur, &head->st); 843 if (err) 844 return err; 845 } 846 if (insn_idx) 847 *insn_idx = head->insn_idx; 848 if (prev_insn_idx) 849 *prev_insn_idx = head->prev_insn_idx; 850 elem = head->next; 851 free_verifier_state(&head->st, false); 852 kfree(head); 853 env->head = elem; 854 env->stack_size--; 855 return 0; 856 } 857 858 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 859 int insn_idx, int prev_insn_idx, 860 bool speculative) 861 { 862 struct bpf_verifier_state *cur = env->cur_state; 863 struct bpf_verifier_stack_elem *elem; 864 int err; 865 866 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); 867 if (!elem) 868 goto err; 869 870 elem->insn_idx = insn_idx; 871 elem->prev_insn_idx = prev_insn_idx; 872 elem->next = env->head; 873 env->head = elem; 874 env->stack_size++; 875 err = copy_verifier_state(&elem->st, cur); 876 if (err) 877 goto err; 878 elem->st.speculative |= speculative; 879 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 880 verbose(env, "The sequence of %d jumps is too complex.\n", 881 env->stack_size); 882 goto err; 883 } 884 if (elem->st.parent) { 885 ++elem->st.parent->branches; 886 /* WARN_ON(branches > 2) technically makes sense here, 887 * but 888 * 1. speculative states will bump 'branches' for non-branch 889 * instructions 890 * 2. is_state_visited() heuristics may decide not to create 891 * a new state for a sequence of branches and all such current 892 * and cloned states will be pointing to a single parent state 893 * which might have large 'branches' count. 894 */ 895 } 896 return &elem->st; 897 err: 898 free_verifier_state(env->cur_state, true); 899 env->cur_state = NULL; 900 /* pop all elements and return */ 901 while (!pop_stack(env, NULL, NULL)); 902 return NULL; 903 } 904 905 #define CALLER_SAVED_REGS 6 906 static const int caller_saved[CALLER_SAVED_REGS] = { 907 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 908 }; 909 910 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 911 struct bpf_reg_state *reg); 912 913 /* Mark the unknown part of a register (variable offset or scalar value) as 914 * known to have the value @imm. 915 */ 916 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) 917 { 918 /* Clear id, off, and union(map_ptr, range) */ 919 memset(((u8 *)reg) + sizeof(reg->type), 0, 920 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); 921 reg->var_off = tnum_const(imm); 922 reg->smin_value = (s64)imm; 923 reg->smax_value = (s64)imm; 924 reg->umin_value = imm; 925 reg->umax_value = imm; 926 } 927 928 /* Mark the 'variable offset' part of a register as zero. This should be 929 * used only on registers holding a pointer type. 930 */ 931 static void __mark_reg_known_zero(struct bpf_reg_state *reg) 932 { 933 __mark_reg_known(reg, 0); 934 } 935 936 static void __mark_reg_const_zero(struct bpf_reg_state *reg) 937 { 938 __mark_reg_known(reg, 0); 939 reg->type = SCALAR_VALUE; 940 } 941 942 static void mark_reg_known_zero(struct bpf_verifier_env *env, 943 struct bpf_reg_state *regs, u32 regno) 944 { 945 if (WARN_ON(regno >= MAX_BPF_REG)) { 946 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); 947 /* Something bad happened, let's kill all regs */ 948 for (regno = 0; regno < MAX_BPF_REG; regno++) 949 __mark_reg_not_init(env, regs + regno); 950 return; 951 } 952 __mark_reg_known_zero(regs + regno); 953 } 954 955 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) 956 { 957 return type_is_pkt_pointer(reg->type); 958 } 959 960 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) 961 { 962 return reg_is_pkt_pointer(reg) || 963 reg->type == PTR_TO_PACKET_END; 964 } 965 966 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ 967 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, 968 enum bpf_reg_type which) 969 { 970 /* The register can already have a range from prior markings. 971 * This is fine as long as it hasn't been advanced from its 972 * origin. 973 */ 974 return reg->type == which && 975 reg->id == 0 && 976 reg->off == 0 && 977 tnum_equals_const(reg->var_off, 0); 978 } 979 980 /* Attempts to improve min/max values based on var_off information */ 981 static void __update_reg_bounds(struct bpf_reg_state *reg) 982 { 983 /* min signed is max(sign bit) | min(other bits) */ 984 reg->smin_value = max_t(s64, reg->smin_value, 985 reg->var_off.value | (reg->var_off.mask & S64_MIN)); 986 /* max signed is min(sign bit) | max(other bits) */ 987 reg->smax_value = min_t(s64, reg->smax_value, 988 reg->var_off.value | (reg->var_off.mask & S64_MAX)); 989 reg->umin_value = max(reg->umin_value, reg->var_off.value); 990 reg->umax_value = min(reg->umax_value, 991 reg->var_off.value | reg->var_off.mask); 992 } 993 994 /* Uses signed min/max values to inform unsigned, and vice-versa */ 995 static void __reg_deduce_bounds(struct bpf_reg_state *reg) 996 { 997 /* Learn sign from signed bounds. 998 * If we cannot cross the sign boundary, then signed and unsigned bounds 999 * are the same, so combine. This works even in the negative case, e.g. 1000 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 1001 */ 1002 if (reg->smin_value >= 0 || reg->smax_value < 0) { 1003 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1004 reg->umin_value); 1005 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1006 reg->umax_value); 1007 return; 1008 } 1009 /* Learn sign from unsigned bounds. Signed bounds cross the sign 1010 * boundary, so we must be careful. 1011 */ 1012 if ((s64)reg->umax_value >= 0) { 1013 /* Positive. We can't learn anything from the smin, but smax 1014 * is positive, hence safe. 1015 */ 1016 reg->smin_value = reg->umin_value; 1017 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1018 reg->umax_value); 1019 } else if ((s64)reg->umin_value < 0) { 1020 /* Negative. We can't learn anything from the smax, but smin 1021 * is negative, hence safe. 1022 */ 1023 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1024 reg->umin_value); 1025 reg->smax_value = reg->umax_value; 1026 } 1027 } 1028 1029 /* Attempts to improve var_off based on unsigned min/max information */ 1030 static void __reg_bound_offset(struct bpf_reg_state *reg) 1031 { 1032 reg->var_off = tnum_intersect(reg->var_off, 1033 tnum_range(reg->umin_value, 1034 reg->umax_value)); 1035 } 1036 1037 static void __reg_bound_offset32(struct bpf_reg_state *reg) 1038 { 1039 u64 mask = 0xffffFFFF; 1040 struct tnum range = tnum_range(reg->umin_value & mask, 1041 reg->umax_value & mask); 1042 struct tnum lo32 = tnum_cast(reg->var_off, 4); 1043 struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); 1044 1045 reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); 1046 } 1047 1048 /* Reset the min/max bounds of a register */ 1049 static void __mark_reg_unbounded(struct bpf_reg_state *reg) 1050 { 1051 reg->smin_value = S64_MIN; 1052 reg->smax_value = S64_MAX; 1053 reg->umin_value = 0; 1054 reg->umax_value = U64_MAX; 1055 } 1056 1057 /* Mark a register as having a completely unknown (scalar) value. */ 1058 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 1059 struct bpf_reg_state *reg) 1060 { 1061 /* 1062 * Clear type, id, off, and union(map_ptr, range) and 1063 * padding between 'type' and union 1064 */ 1065 memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); 1066 reg->type = SCALAR_VALUE; 1067 reg->var_off = tnum_unknown; 1068 reg->frameno = 0; 1069 reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? 1070 true : false; 1071 __mark_reg_unbounded(reg); 1072 } 1073 1074 static void mark_reg_unknown(struct bpf_verifier_env *env, 1075 struct bpf_reg_state *regs, u32 regno) 1076 { 1077 if (WARN_ON(regno >= MAX_BPF_REG)) { 1078 verbose(env, "mark_reg_unknown(regs, %u)\n", regno); 1079 /* Something bad happened, let's kill all regs except FP */ 1080 for (regno = 0; regno < BPF_REG_FP; regno++) 1081 __mark_reg_not_init(env, regs + regno); 1082 return; 1083 } 1084 __mark_reg_unknown(env, regs + regno); 1085 } 1086 1087 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 1088 struct bpf_reg_state *reg) 1089 { 1090 __mark_reg_unknown(env, reg); 1091 reg->type = NOT_INIT; 1092 } 1093 1094 static void mark_reg_not_init(struct bpf_verifier_env *env, 1095 struct bpf_reg_state *regs, u32 regno) 1096 { 1097 if (WARN_ON(regno >= MAX_BPF_REG)) { 1098 verbose(env, "mark_reg_not_init(regs, %u)\n", regno); 1099 /* Something bad happened, let's kill all regs except FP */ 1100 for (regno = 0; regno < BPF_REG_FP; regno++) 1101 __mark_reg_not_init(env, regs + regno); 1102 return; 1103 } 1104 __mark_reg_not_init(env, regs + regno); 1105 } 1106 1107 #define DEF_NOT_SUBREG (0) 1108 static void init_reg_state(struct bpf_verifier_env *env, 1109 struct bpf_func_state *state) 1110 { 1111 struct bpf_reg_state *regs = state->regs; 1112 int i; 1113 1114 for (i = 0; i < MAX_BPF_REG; i++) { 1115 mark_reg_not_init(env, regs, i); 1116 regs[i].live = REG_LIVE_NONE; 1117 regs[i].parent = NULL; 1118 regs[i].subreg_def = DEF_NOT_SUBREG; 1119 } 1120 1121 /* frame pointer */ 1122 regs[BPF_REG_FP].type = PTR_TO_STACK; 1123 mark_reg_known_zero(env, regs, BPF_REG_FP); 1124 regs[BPF_REG_FP].frameno = state->frameno; 1125 } 1126 1127 #define BPF_MAIN_FUNC (-1) 1128 static void init_func_state(struct bpf_verifier_env *env, 1129 struct bpf_func_state *state, 1130 int callsite, int frameno, int subprogno) 1131 { 1132 state->callsite = callsite; 1133 state->frameno = frameno; 1134 state->subprogno = subprogno; 1135 init_reg_state(env, state); 1136 } 1137 1138 enum reg_arg_type { 1139 SRC_OP, /* register is used as source operand */ 1140 DST_OP, /* register is used as destination operand */ 1141 DST_OP_NO_MARK /* same as above, check only, don't mark */ 1142 }; 1143 1144 static int cmp_subprogs(const void *a, const void *b) 1145 { 1146 return ((struct bpf_subprog_info *)a)->start - 1147 ((struct bpf_subprog_info *)b)->start; 1148 } 1149 1150 static int find_subprog(struct bpf_verifier_env *env, int off) 1151 { 1152 struct bpf_subprog_info *p; 1153 1154 p = bsearch(&off, env->subprog_info, env->subprog_cnt, 1155 sizeof(env->subprog_info[0]), cmp_subprogs); 1156 if (!p) 1157 return -ENOENT; 1158 return p - env->subprog_info; 1159 1160 } 1161 1162 static int add_subprog(struct bpf_verifier_env *env, int off) 1163 { 1164 int insn_cnt = env->prog->len; 1165 int ret; 1166 1167 if (off >= insn_cnt || off < 0) { 1168 verbose(env, "call to invalid destination\n"); 1169 return -EINVAL; 1170 } 1171 ret = find_subprog(env, off); 1172 if (ret >= 0) 1173 return 0; 1174 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { 1175 verbose(env, "too many subprograms\n"); 1176 return -E2BIG; 1177 } 1178 env->subprog_info[env->subprog_cnt++].start = off; 1179 sort(env->subprog_info, env->subprog_cnt, 1180 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 1181 return 0; 1182 } 1183 1184 static int check_subprogs(struct bpf_verifier_env *env) 1185 { 1186 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; 1187 struct bpf_subprog_info *subprog = env->subprog_info; 1188 struct bpf_insn *insn = env->prog->insnsi; 1189 int insn_cnt = env->prog->len; 1190 1191 /* Add entry function. */ 1192 ret = add_subprog(env, 0); 1193 if (ret < 0) 1194 return ret; 1195 1196 /* determine subprog starts. The end is one before the next starts */ 1197 for (i = 0; i < insn_cnt; i++) { 1198 if (insn[i].code != (BPF_JMP | BPF_CALL)) 1199 continue; 1200 if (insn[i].src_reg != BPF_PSEUDO_CALL) 1201 continue; 1202 if (!env->allow_ptr_leaks) { 1203 verbose(env, "function calls to other bpf functions are allowed for root only\n"); 1204 return -EPERM; 1205 } 1206 ret = add_subprog(env, i + insn[i].imm + 1); 1207 if (ret < 0) 1208 return ret; 1209 } 1210 1211 /* Add a fake 'exit' subprog which could simplify subprog iteration 1212 * logic. 'subprog_cnt' should not be increased. 1213 */ 1214 subprog[env->subprog_cnt].start = insn_cnt; 1215 1216 if (env->log.level & BPF_LOG_LEVEL2) 1217 for (i = 0; i < env->subprog_cnt; i++) 1218 verbose(env, "func#%d @%d\n", i, subprog[i].start); 1219 1220 /* now check that all jumps are within the same subprog */ 1221 subprog_start = subprog[cur_subprog].start; 1222 subprog_end = subprog[cur_subprog + 1].start; 1223 for (i = 0; i < insn_cnt; i++) { 1224 u8 code = insn[i].code; 1225 1226 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) 1227 goto next; 1228 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 1229 goto next; 1230 off = i + insn[i].off + 1; 1231 if (off < subprog_start || off >= subprog_end) { 1232 verbose(env, "jump out of range from insn %d to %d\n", i, off); 1233 return -EINVAL; 1234 } 1235 next: 1236 if (i == subprog_end - 1) { 1237 /* to avoid fall-through from one subprog into another 1238 * the last insn of the subprog should be either exit 1239 * or unconditional jump back 1240 */ 1241 if (code != (BPF_JMP | BPF_EXIT) && 1242 code != (BPF_JMP | BPF_JA)) { 1243 verbose(env, "last insn is not an exit or jmp\n"); 1244 return -EINVAL; 1245 } 1246 subprog_start = subprog_end; 1247 cur_subprog++; 1248 if (cur_subprog < env->subprog_cnt) 1249 subprog_end = subprog[cur_subprog + 1].start; 1250 } 1251 } 1252 return 0; 1253 } 1254 1255 /* Parentage chain of this register (or stack slot) should take care of all 1256 * issues like callee-saved registers, stack slot allocation time, etc. 1257 */ 1258 static int mark_reg_read(struct bpf_verifier_env *env, 1259 const struct bpf_reg_state *state, 1260 struct bpf_reg_state *parent, u8 flag) 1261 { 1262 bool writes = parent == state->parent; /* Observe write marks */ 1263 int cnt = 0; 1264 1265 while (parent) { 1266 /* if read wasn't screened by an earlier write ... */ 1267 if (writes && state->live & REG_LIVE_WRITTEN) 1268 break; 1269 if (parent->live & REG_LIVE_DONE) { 1270 verbose(env, "verifier BUG type %s var_off %lld off %d\n", 1271 reg_type_str[parent->type], 1272 parent->var_off.value, parent->off); 1273 return -EFAULT; 1274 } 1275 /* The first condition is more likely to be true than the 1276 * second, checked it first. 1277 */ 1278 if ((parent->live & REG_LIVE_READ) == flag || 1279 parent->live & REG_LIVE_READ64) 1280 /* The parentage chain never changes and 1281 * this parent was already marked as LIVE_READ. 1282 * There is no need to keep walking the chain again and 1283 * keep re-marking all parents as LIVE_READ. 1284 * This case happens when the same register is read 1285 * multiple times without writes into it in-between. 1286 * Also, if parent has the stronger REG_LIVE_READ64 set, 1287 * then no need to set the weak REG_LIVE_READ32. 1288 */ 1289 break; 1290 /* ... then we depend on parent's value */ 1291 parent->live |= flag; 1292 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ 1293 if (flag == REG_LIVE_READ64) 1294 parent->live &= ~REG_LIVE_READ32; 1295 state = parent; 1296 parent = state->parent; 1297 writes = true; 1298 cnt++; 1299 } 1300 1301 if (env->longest_mark_read_walk < cnt) 1302 env->longest_mark_read_walk = cnt; 1303 return 0; 1304 } 1305 1306 /* This function is supposed to be used by the following 32-bit optimization 1307 * code only. It returns TRUE if the source or destination register operates 1308 * on 64-bit, otherwise return FALSE. 1309 */ 1310 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, 1311 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) 1312 { 1313 u8 code, class, op; 1314 1315 code = insn->code; 1316 class = BPF_CLASS(code); 1317 op = BPF_OP(code); 1318 if (class == BPF_JMP) { 1319 /* BPF_EXIT for "main" will reach here. Return TRUE 1320 * conservatively. 1321 */ 1322 if (op == BPF_EXIT) 1323 return true; 1324 if (op == BPF_CALL) { 1325 /* BPF to BPF call will reach here because of marking 1326 * caller saved clobber with DST_OP_NO_MARK for which we 1327 * don't care the register def because they are anyway 1328 * marked as NOT_INIT already. 1329 */ 1330 if (insn->src_reg == BPF_PSEUDO_CALL) 1331 return false; 1332 /* Helper call will reach here because of arg type 1333 * check, conservatively return TRUE. 1334 */ 1335 if (t == SRC_OP) 1336 return true; 1337 1338 return false; 1339 } 1340 } 1341 1342 if (class == BPF_ALU64 || class == BPF_JMP || 1343 /* BPF_END always use BPF_ALU class. */ 1344 (class == BPF_ALU && op == BPF_END && insn->imm == 64)) 1345 return true; 1346 1347 if (class == BPF_ALU || class == BPF_JMP32) 1348 return false; 1349 1350 if (class == BPF_LDX) { 1351 if (t != SRC_OP) 1352 return BPF_SIZE(code) == BPF_DW; 1353 /* LDX source must be ptr. */ 1354 return true; 1355 } 1356 1357 if (class == BPF_STX) { 1358 if (reg->type != SCALAR_VALUE) 1359 return true; 1360 return BPF_SIZE(code) == BPF_DW; 1361 } 1362 1363 if (class == BPF_LD) { 1364 u8 mode = BPF_MODE(code); 1365 1366 /* LD_IMM64 */ 1367 if (mode == BPF_IMM) 1368 return true; 1369 1370 /* Both LD_IND and LD_ABS return 32-bit data. */ 1371 if (t != SRC_OP) 1372 return false; 1373 1374 /* Implicit ctx ptr. */ 1375 if (regno == BPF_REG_6) 1376 return true; 1377 1378 /* Explicit source could be any width. */ 1379 return true; 1380 } 1381 1382 if (class == BPF_ST) 1383 /* The only source register for BPF_ST is a ptr. */ 1384 return true; 1385 1386 /* Conservatively return true at default. */ 1387 return true; 1388 } 1389 1390 /* Return TRUE if INSN doesn't have explicit value define. */ 1391 static bool insn_no_def(struct bpf_insn *insn) 1392 { 1393 u8 class = BPF_CLASS(insn->code); 1394 1395 return (class == BPF_JMP || class == BPF_JMP32 || 1396 class == BPF_STX || class == BPF_ST); 1397 } 1398 1399 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 1400 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) 1401 { 1402 if (insn_no_def(insn)) 1403 return false; 1404 1405 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); 1406 } 1407 1408 static void mark_insn_zext(struct bpf_verifier_env *env, 1409 struct bpf_reg_state *reg) 1410 { 1411 s32 def_idx = reg->subreg_def; 1412 1413 if (def_idx == DEF_NOT_SUBREG) 1414 return; 1415 1416 env->insn_aux_data[def_idx - 1].zext_dst = true; 1417 /* The dst will be zero extended, so won't be sub-register anymore. */ 1418 reg->subreg_def = DEF_NOT_SUBREG; 1419 } 1420 1421 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 1422 enum reg_arg_type t) 1423 { 1424 struct bpf_verifier_state *vstate = env->cur_state; 1425 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1426 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; 1427 struct bpf_reg_state *reg, *regs = state->regs; 1428 bool rw64; 1429 1430 if (regno >= MAX_BPF_REG) { 1431 verbose(env, "R%d is invalid\n", regno); 1432 return -EINVAL; 1433 } 1434 1435 reg = ®s[regno]; 1436 rw64 = is_reg64(env, insn, regno, reg, t); 1437 if (t == SRC_OP) { 1438 /* check whether register used as source operand can be read */ 1439 if (reg->type == NOT_INIT) { 1440 verbose(env, "R%d !read_ok\n", regno); 1441 return -EACCES; 1442 } 1443 /* We don't need to worry about FP liveness because it's read-only */ 1444 if (regno == BPF_REG_FP) 1445 return 0; 1446 1447 if (rw64) 1448 mark_insn_zext(env, reg); 1449 1450 return mark_reg_read(env, reg, reg->parent, 1451 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); 1452 } else { 1453 /* check whether register used as dest operand can be written to */ 1454 if (regno == BPF_REG_FP) { 1455 verbose(env, "frame pointer is read only\n"); 1456 return -EACCES; 1457 } 1458 reg->live |= REG_LIVE_WRITTEN; 1459 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; 1460 if (t == DST_OP) 1461 mark_reg_unknown(env, regs, regno); 1462 } 1463 return 0; 1464 } 1465 1466 /* for any branch, call, exit record the history of jmps in the given state */ 1467 static int push_jmp_history(struct bpf_verifier_env *env, 1468 struct bpf_verifier_state *cur) 1469 { 1470 u32 cnt = cur->jmp_history_cnt; 1471 struct bpf_idx_pair *p; 1472 1473 cnt++; 1474 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); 1475 if (!p) 1476 return -ENOMEM; 1477 p[cnt - 1].idx = env->insn_idx; 1478 p[cnt - 1].prev_idx = env->prev_insn_idx; 1479 cur->jmp_history = p; 1480 cur->jmp_history_cnt = cnt; 1481 return 0; 1482 } 1483 1484 /* Backtrack one insn at a time. If idx is not at the top of recorded 1485 * history then previous instruction came from straight line execution. 1486 */ 1487 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, 1488 u32 *history) 1489 { 1490 u32 cnt = *history; 1491 1492 if (cnt && st->jmp_history[cnt - 1].idx == i) { 1493 i = st->jmp_history[cnt - 1].prev_idx; 1494 (*history)--; 1495 } else { 1496 i--; 1497 } 1498 return i; 1499 } 1500 1501 /* For given verifier state backtrack_insn() is called from the last insn to 1502 * the first insn. Its purpose is to compute a bitmask of registers and 1503 * stack slots that needs precision in the parent verifier state. 1504 */ 1505 static int backtrack_insn(struct bpf_verifier_env *env, int idx, 1506 u32 *reg_mask, u64 *stack_mask) 1507 { 1508 const struct bpf_insn_cbs cbs = { 1509 .cb_print = verbose, 1510 .private_data = env, 1511 }; 1512 struct bpf_insn *insn = env->prog->insnsi + idx; 1513 u8 class = BPF_CLASS(insn->code); 1514 u8 opcode = BPF_OP(insn->code); 1515 u8 mode = BPF_MODE(insn->code); 1516 u32 dreg = 1u << insn->dst_reg; 1517 u32 sreg = 1u << insn->src_reg; 1518 u32 spi; 1519 1520 if (insn->code == 0) 1521 return 0; 1522 if (env->log.level & BPF_LOG_LEVEL) { 1523 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); 1524 verbose(env, "%d: ", idx); 1525 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 1526 } 1527 1528 if (class == BPF_ALU || class == BPF_ALU64) { 1529 if (!(*reg_mask & dreg)) 1530 return 0; 1531 if (opcode == BPF_MOV) { 1532 if (BPF_SRC(insn->code) == BPF_X) { 1533 /* dreg = sreg 1534 * dreg needs precision after this insn 1535 * sreg needs precision before this insn 1536 */ 1537 *reg_mask &= ~dreg; 1538 *reg_mask |= sreg; 1539 } else { 1540 /* dreg = K 1541 * dreg needs precision after this insn. 1542 * Corresponding register is already marked 1543 * as precise=true in this verifier state. 1544 * No further markings in parent are necessary 1545 */ 1546 *reg_mask &= ~dreg; 1547 } 1548 } else { 1549 if (BPF_SRC(insn->code) == BPF_X) { 1550 /* dreg += sreg 1551 * both dreg and sreg need precision 1552 * before this insn 1553 */ 1554 *reg_mask |= sreg; 1555 } /* else dreg += K 1556 * dreg still needs precision before this insn 1557 */ 1558 } 1559 } else if (class == BPF_LDX) { 1560 if (!(*reg_mask & dreg)) 1561 return 0; 1562 *reg_mask &= ~dreg; 1563 1564 /* scalars can only be spilled into stack w/o losing precision. 1565 * Load from any other memory can be zero extended. 1566 * The desire to keep that precision is already indicated 1567 * by 'precise' mark in corresponding register of this state. 1568 * No further tracking necessary. 1569 */ 1570 if (insn->src_reg != BPF_REG_FP) 1571 return 0; 1572 if (BPF_SIZE(insn->code) != BPF_DW) 1573 return 0; 1574 1575 /* dreg = *(u64 *)[fp - off] was a fill from the stack. 1576 * that [fp - off] slot contains scalar that needs to be 1577 * tracked with precision 1578 */ 1579 spi = (-insn->off - 1) / BPF_REG_SIZE; 1580 if (spi >= 64) { 1581 verbose(env, "BUG spi %d\n", spi); 1582 WARN_ONCE(1, "verifier backtracking bug"); 1583 return -EFAULT; 1584 } 1585 *stack_mask |= 1ull << spi; 1586 } else if (class == BPF_STX || class == BPF_ST) { 1587 if (*reg_mask & dreg) 1588 /* stx & st shouldn't be using _scalar_ dst_reg 1589 * to access memory. It means backtracking 1590 * encountered a case of pointer subtraction. 1591 */ 1592 return -ENOTSUPP; 1593 /* scalars can only be spilled into stack */ 1594 if (insn->dst_reg != BPF_REG_FP) 1595 return 0; 1596 if (BPF_SIZE(insn->code) != BPF_DW) 1597 return 0; 1598 spi = (-insn->off - 1) / BPF_REG_SIZE; 1599 if (spi >= 64) { 1600 verbose(env, "BUG spi %d\n", spi); 1601 WARN_ONCE(1, "verifier backtracking bug"); 1602 return -EFAULT; 1603 } 1604 if (!(*stack_mask & (1ull << spi))) 1605 return 0; 1606 *stack_mask &= ~(1ull << spi); 1607 if (class == BPF_STX) 1608 *reg_mask |= sreg; 1609 } else if (class == BPF_JMP || class == BPF_JMP32) { 1610 if (opcode == BPF_CALL) { 1611 if (insn->src_reg == BPF_PSEUDO_CALL) 1612 return -ENOTSUPP; 1613 /* regular helper call sets R0 */ 1614 *reg_mask &= ~1; 1615 if (*reg_mask & 0x3f) { 1616 /* if backtracing was looking for registers R1-R5 1617 * they should have been found already. 1618 */ 1619 verbose(env, "BUG regs %x\n", *reg_mask); 1620 WARN_ONCE(1, "verifier backtracking bug"); 1621 return -EFAULT; 1622 } 1623 } else if (opcode == BPF_EXIT) { 1624 return -ENOTSUPP; 1625 } 1626 } else if (class == BPF_LD) { 1627 if (!(*reg_mask & dreg)) 1628 return 0; 1629 *reg_mask &= ~dreg; 1630 /* It's ld_imm64 or ld_abs or ld_ind. 1631 * For ld_imm64 no further tracking of precision 1632 * into parent is necessary 1633 */ 1634 if (mode == BPF_IND || mode == BPF_ABS) 1635 /* to be analyzed */ 1636 return -ENOTSUPP; 1637 } 1638 return 0; 1639 } 1640 1641 /* the scalar precision tracking algorithm: 1642 * . at the start all registers have precise=false. 1643 * . scalar ranges are tracked as normal through alu and jmp insns. 1644 * . once precise value of the scalar register is used in: 1645 * . ptr + scalar alu 1646 * . if (scalar cond K|scalar) 1647 * . helper_call(.., scalar, ...) where ARG_CONST is expected 1648 * backtrack through the verifier states and mark all registers and 1649 * stack slots with spilled constants that these scalar regisers 1650 * should be precise. 1651 * . during state pruning two registers (or spilled stack slots) 1652 * are equivalent if both are not precise. 1653 * 1654 * Note the verifier cannot simply walk register parentage chain, 1655 * since many different registers and stack slots could have been 1656 * used to compute single precise scalar. 1657 * 1658 * The approach of starting with precise=true for all registers and then 1659 * backtrack to mark a register as not precise when the verifier detects 1660 * that program doesn't care about specific value (e.g., when helper 1661 * takes register as ARG_ANYTHING parameter) is not safe. 1662 * 1663 * It's ok to walk single parentage chain of the verifier states. 1664 * It's possible that this backtracking will go all the way till 1st insn. 1665 * All other branches will be explored for needing precision later. 1666 * 1667 * The backtracking needs to deal with cases like: 1668 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) 1669 * r9 -= r8 1670 * r5 = r9 1671 * if r5 > 0x79f goto pc+7 1672 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) 1673 * r5 += 1 1674 * ... 1675 * call bpf_perf_event_output#25 1676 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO 1677 * 1678 * and this case: 1679 * r6 = 1 1680 * call foo // uses callee's r6 inside to compute r0 1681 * r0 += r6 1682 * if r0 == 0 goto 1683 * 1684 * to track above reg_mask/stack_mask needs to be independent for each frame. 1685 * 1686 * Also if parent's curframe > frame where backtracking started, 1687 * the verifier need to mark registers in both frames, otherwise callees 1688 * may incorrectly prune callers. This is similar to 1689 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") 1690 * 1691 * For now backtracking falls back into conservative marking. 1692 */ 1693 static void mark_all_scalars_precise(struct bpf_verifier_env *env, 1694 struct bpf_verifier_state *st) 1695 { 1696 struct bpf_func_state *func; 1697 struct bpf_reg_state *reg; 1698 int i, j; 1699 1700 /* big hammer: mark all scalars precise in this path. 1701 * pop_stack may still get !precise scalars. 1702 */ 1703 for (; st; st = st->parent) 1704 for (i = 0; i <= st->curframe; i++) { 1705 func = st->frame[i]; 1706 for (j = 0; j < BPF_REG_FP; j++) { 1707 reg = &func->regs[j]; 1708 if (reg->type != SCALAR_VALUE) 1709 continue; 1710 reg->precise = true; 1711 } 1712 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1713 if (func->stack[j].slot_type[0] != STACK_SPILL) 1714 continue; 1715 reg = &func->stack[j].spilled_ptr; 1716 if (reg->type != SCALAR_VALUE) 1717 continue; 1718 reg->precise = true; 1719 } 1720 } 1721 } 1722 1723 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, 1724 int spi) 1725 { 1726 struct bpf_verifier_state *st = env->cur_state; 1727 int first_idx = st->first_insn_idx; 1728 int last_idx = env->insn_idx; 1729 struct bpf_func_state *func; 1730 struct bpf_reg_state *reg; 1731 u32 reg_mask = regno >= 0 ? 1u << regno : 0; 1732 u64 stack_mask = spi >= 0 ? 1ull << spi : 0; 1733 bool skip_first = true; 1734 bool new_marks = false; 1735 int i, err; 1736 1737 if (!env->allow_ptr_leaks) 1738 /* backtracking is root only for now */ 1739 return 0; 1740 1741 func = st->frame[st->curframe]; 1742 if (regno >= 0) { 1743 reg = &func->regs[regno]; 1744 if (reg->type != SCALAR_VALUE) { 1745 WARN_ONCE(1, "backtracing misuse"); 1746 return -EFAULT; 1747 } 1748 if (!reg->precise) 1749 new_marks = true; 1750 else 1751 reg_mask = 0; 1752 reg->precise = true; 1753 } 1754 1755 while (spi >= 0) { 1756 if (func->stack[spi].slot_type[0] != STACK_SPILL) { 1757 stack_mask = 0; 1758 break; 1759 } 1760 reg = &func->stack[spi].spilled_ptr; 1761 if (reg->type != SCALAR_VALUE) { 1762 stack_mask = 0; 1763 break; 1764 } 1765 if (!reg->precise) 1766 new_marks = true; 1767 else 1768 stack_mask = 0; 1769 reg->precise = true; 1770 break; 1771 } 1772 1773 if (!new_marks) 1774 return 0; 1775 if (!reg_mask && !stack_mask) 1776 return 0; 1777 for (;;) { 1778 DECLARE_BITMAP(mask, 64); 1779 u32 history = st->jmp_history_cnt; 1780 1781 if (env->log.level & BPF_LOG_LEVEL) 1782 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); 1783 for (i = last_idx;;) { 1784 if (skip_first) { 1785 err = 0; 1786 skip_first = false; 1787 } else { 1788 err = backtrack_insn(env, i, ®_mask, &stack_mask); 1789 } 1790 if (err == -ENOTSUPP) { 1791 mark_all_scalars_precise(env, st); 1792 return 0; 1793 } else if (err) { 1794 return err; 1795 } 1796 if (!reg_mask && !stack_mask) 1797 /* Found assignment(s) into tracked register in this state. 1798 * Since this state is already marked, just return. 1799 * Nothing to be tracked further in the parent state. 1800 */ 1801 return 0; 1802 if (i == first_idx) 1803 break; 1804 i = get_prev_insn_idx(st, i, &history); 1805 if (i >= env->prog->len) { 1806 /* This can happen if backtracking reached insn 0 1807 * and there are still reg_mask or stack_mask 1808 * to backtrack. 1809 * It means the backtracking missed the spot where 1810 * particular register was initialized with a constant. 1811 */ 1812 verbose(env, "BUG backtracking idx %d\n", i); 1813 WARN_ONCE(1, "verifier backtracking bug"); 1814 return -EFAULT; 1815 } 1816 } 1817 st = st->parent; 1818 if (!st) 1819 break; 1820 1821 new_marks = false; 1822 func = st->frame[st->curframe]; 1823 bitmap_from_u64(mask, reg_mask); 1824 for_each_set_bit(i, mask, 32) { 1825 reg = &func->regs[i]; 1826 if (reg->type != SCALAR_VALUE) { 1827 reg_mask &= ~(1u << i); 1828 continue; 1829 } 1830 if (!reg->precise) 1831 new_marks = true; 1832 reg->precise = true; 1833 } 1834 1835 bitmap_from_u64(mask, stack_mask); 1836 for_each_set_bit(i, mask, 64) { 1837 if (i >= func->allocated_stack / BPF_REG_SIZE) { 1838 /* the sequence of instructions: 1839 * 2: (bf) r3 = r10 1840 * 3: (7b) *(u64 *)(r3 -8) = r0 1841 * 4: (79) r4 = *(u64 *)(r10 -8) 1842 * doesn't contain jmps. It's backtracked 1843 * as a single block. 1844 * During backtracking insn 3 is not recognized as 1845 * stack access, so at the end of backtracking 1846 * stack slot fp-8 is still marked in stack_mask. 1847 * However the parent state may not have accessed 1848 * fp-8 and it's "unallocated" stack space. 1849 * In such case fallback to conservative. 1850 */ 1851 mark_all_scalars_precise(env, st); 1852 return 0; 1853 } 1854 1855 if (func->stack[i].slot_type[0] != STACK_SPILL) { 1856 stack_mask &= ~(1ull << i); 1857 continue; 1858 } 1859 reg = &func->stack[i].spilled_ptr; 1860 if (reg->type != SCALAR_VALUE) { 1861 stack_mask &= ~(1ull << i); 1862 continue; 1863 } 1864 if (!reg->precise) 1865 new_marks = true; 1866 reg->precise = true; 1867 } 1868 if (env->log.level & BPF_LOG_LEVEL) { 1869 print_verifier_state(env, func); 1870 verbose(env, "parent %s regs=%x stack=%llx marks\n", 1871 new_marks ? "didn't have" : "already had", 1872 reg_mask, stack_mask); 1873 } 1874 1875 if (!reg_mask && !stack_mask) 1876 break; 1877 if (!new_marks) 1878 break; 1879 1880 last_idx = st->last_insn_idx; 1881 first_idx = st->first_insn_idx; 1882 } 1883 return 0; 1884 } 1885 1886 static int mark_chain_precision(struct bpf_verifier_env *env, int regno) 1887 { 1888 return __mark_chain_precision(env, regno, -1); 1889 } 1890 1891 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) 1892 { 1893 return __mark_chain_precision(env, -1, spi); 1894 } 1895 1896 static bool is_spillable_regtype(enum bpf_reg_type type) 1897 { 1898 switch (type) { 1899 case PTR_TO_MAP_VALUE: 1900 case PTR_TO_MAP_VALUE_OR_NULL: 1901 case PTR_TO_STACK: 1902 case PTR_TO_CTX: 1903 case PTR_TO_PACKET: 1904 case PTR_TO_PACKET_META: 1905 case PTR_TO_PACKET_END: 1906 case PTR_TO_FLOW_KEYS: 1907 case CONST_PTR_TO_MAP: 1908 case PTR_TO_SOCKET: 1909 case PTR_TO_SOCKET_OR_NULL: 1910 case PTR_TO_SOCK_COMMON: 1911 case PTR_TO_SOCK_COMMON_OR_NULL: 1912 case PTR_TO_TCP_SOCK: 1913 case PTR_TO_TCP_SOCK_OR_NULL: 1914 case PTR_TO_XDP_SOCK: 1915 case PTR_TO_BTF_ID: 1916 return true; 1917 default: 1918 return false; 1919 } 1920 } 1921 1922 /* Does this register contain a constant zero? */ 1923 static bool register_is_null(struct bpf_reg_state *reg) 1924 { 1925 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 1926 } 1927 1928 static bool register_is_const(struct bpf_reg_state *reg) 1929 { 1930 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); 1931 } 1932 1933 static void save_register_state(struct bpf_func_state *state, 1934 int spi, struct bpf_reg_state *reg) 1935 { 1936 int i; 1937 1938 state->stack[spi].spilled_ptr = *reg; 1939 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 1940 1941 for (i = 0; i < BPF_REG_SIZE; i++) 1942 state->stack[spi].slot_type[i] = STACK_SPILL; 1943 } 1944 1945 /* check_stack_read/write functions track spill/fill of registers, 1946 * stack boundary and alignment are checked in check_mem_access() 1947 */ 1948 static int check_stack_write(struct bpf_verifier_env *env, 1949 struct bpf_func_state *state, /* func where register points to */ 1950 int off, int size, int value_regno, int insn_idx) 1951 { 1952 struct bpf_func_state *cur; /* state of the current function */ 1953 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 1954 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; 1955 struct bpf_reg_state *reg = NULL; 1956 1957 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), 1958 state->acquired_refs, true); 1959 if (err) 1960 return err; 1961 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 1962 * so it's aligned access and [off, off + size) are within stack limits 1963 */ 1964 if (!env->allow_ptr_leaks && 1965 state->stack[spi].slot_type[0] == STACK_SPILL && 1966 size != BPF_REG_SIZE) { 1967 verbose(env, "attempt to corrupt spilled pointer on stack\n"); 1968 return -EACCES; 1969 } 1970 1971 cur = env->cur_state->frame[env->cur_state->curframe]; 1972 if (value_regno >= 0) 1973 reg = &cur->regs[value_regno]; 1974 1975 if (reg && size == BPF_REG_SIZE && register_is_const(reg) && 1976 !register_is_null(reg) && env->allow_ptr_leaks) { 1977 if (dst_reg != BPF_REG_FP) { 1978 /* The backtracking logic can only recognize explicit 1979 * stack slot address like [fp - 8]. Other spill of 1980 * scalar via different register has to be conervative. 1981 * Backtrack from here and mark all registers as precise 1982 * that contributed into 'reg' being a constant. 1983 */ 1984 err = mark_chain_precision(env, value_regno); 1985 if (err) 1986 return err; 1987 } 1988 save_register_state(state, spi, reg); 1989 } else if (reg && is_spillable_regtype(reg->type)) { 1990 /* register containing pointer is being spilled into stack */ 1991 if (size != BPF_REG_SIZE) { 1992 verbose_linfo(env, insn_idx, "; "); 1993 verbose(env, "invalid size of register spill\n"); 1994 return -EACCES; 1995 } 1996 1997 if (state != cur && reg->type == PTR_TO_STACK) { 1998 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 1999 return -EINVAL; 2000 } 2001 2002 if (!env->allow_ptr_leaks) { 2003 bool sanitize = false; 2004 2005 if (state->stack[spi].slot_type[0] == STACK_SPILL && 2006 register_is_const(&state->stack[spi].spilled_ptr)) 2007 sanitize = true; 2008 for (i = 0; i < BPF_REG_SIZE; i++) 2009 if (state->stack[spi].slot_type[i] == STACK_MISC) { 2010 sanitize = true; 2011 break; 2012 } 2013 if (sanitize) { 2014 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; 2015 int soff = (-spi - 1) * BPF_REG_SIZE; 2016 2017 /* detected reuse of integer stack slot with a pointer 2018 * which means either llvm is reusing stack slot or 2019 * an attacker is trying to exploit CVE-2018-3639 2020 * (speculative store bypass) 2021 * Have to sanitize that slot with preemptive 2022 * store of zero. 2023 */ 2024 if (*poff && *poff != soff) { 2025 /* disallow programs where single insn stores 2026 * into two different stack slots, since verifier 2027 * cannot sanitize them 2028 */ 2029 verbose(env, 2030 "insn %d cannot access two stack slots fp%d and fp%d", 2031 insn_idx, *poff, soff); 2032 return -EINVAL; 2033 } 2034 *poff = soff; 2035 } 2036 } 2037 save_register_state(state, spi, reg); 2038 } else { 2039 u8 type = STACK_MISC; 2040 2041 /* regular write of data into stack destroys any spilled ptr */ 2042 state->stack[spi].spilled_ptr.type = NOT_INIT; 2043 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ 2044 if (state->stack[spi].slot_type[0] == STACK_SPILL) 2045 for (i = 0; i < BPF_REG_SIZE; i++) 2046 state->stack[spi].slot_type[i] = STACK_MISC; 2047 2048 /* only mark the slot as written if all 8 bytes were written 2049 * otherwise read propagation may incorrectly stop too soon 2050 * when stack slots are partially written. 2051 * This heuristic means that read propagation will be 2052 * conservative, since it will add reg_live_read marks 2053 * to stack slots all the way to first state when programs 2054 * writes+reads less than 8 bytes 2055 */ 2056 if (size == BPF_REG_SIZE) 2057 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 2058 2059 /* when we zero initialize stack slots mark them as such */ 2060 if (reg && register_is_null(reg)) { 2061 /* backtracking doesn't work for STACK_ZERO yet. */ 2062 err = mark_chain_precision(env, value_regno); 2063 if (err) 2064 return err; 2065 type = STACK_ZERO; 2066 } 2067 2068 /* Mark slots affected by this stack write. */ 2069 for (i = 0; i < size; i++) 2070 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = 2071 type; 2072 } 2073 return 0; 2074 } 2075 2076 static int check_stack_read(struct bpf_verifier_env *env, 2077 struct bpf_func_state *reg_state /* func where register points to */, 2078 int off, int size, int value_regno) 2079 { 2080 struct bpf_verifier_state *vstate = env->cur_state; 2081 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2082 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 2083 struct bpf_reg_state *reg; 2084 u8 *stype; 2085 2086 if (reg_state->allocated_stack <= slot) { 2087 verbose(env, "invalid read from stack off %d+0 size %d\n", 2088 off, size); 2089 return -EACCES; 2090 } 2091 stype = reg_state->stack[spi].slot_type; 2092 reg = ®_state->stack[spi].spilled_ptr; 2093 2094 if (stype[0] == STACK_SPILL) { 2095 if (size != BPF_REG_SIZE) { 2096 if (reg->type != SCALAR_VALUE) { 2097 verbose_linfo(env, env->insn_idx, "; "); 2098 verbose(env, "invalid size of register fill\n"); 2099 return -EACCES; 2100 } 2101 if (value_regno >= 0) { 2102 mark_reg_unknown(env, state->regs, value_regno); 2103 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2104 } 2105 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2106 return 0; 2107 } 2108 for (i = 1; i < BPF_REG_SIZE; i++) { 2109 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { 2110 verbose(env, "corrupted spill memory\n"); 2111 return -EACCES; 2112 } 2113 } 2114 2115 if (value_regno >= 0) { 2116 /* restore register state from stack */ 2117 state->regs[value_regno] = *reg; 2118 /* mark reg as written since spilled pointer state likely 2119 * has its liveness marks cleared by is_state_visited() 2120 * which resets stack/reg liveness for state transitions 2121 */ 2122 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2123 } 2124 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2125 } else { 2126 int zeros = 0; 2127 2128 for (i = 0; i < size; i++) { 2129 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) 2130 continue; 2131 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { 2132 zeros++; 2133 continue; 2134 } 2135 verbose(env, "invalid read from stack off %d+%d size %d\n", 2136 off, i, size); 2137 return -EACCES; 2138 } 2139 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2140 if (value_regno >= 0) { 2141 if (zeros == size) { 2142 /* any size read into register is zero extended, 2143 * so the whole register == const_zero 2144 */ 2145 __mark_reg_const_zero(&state->regs[value_regno]); 2146 /* backtracking doesn't support STACK_ZERO yet, 2147 * so mark it precise here, so that later 2148 * backtracking can stop here. 2149 * Backtracking may not need this if this register 2150 * doesn't participate in pointer adjustment. 2151 * Forward propagation of precise flag is not 2152 * necessary either. This mark is only to stop 2153 * backtracking. Any register that contributed 2154 * to const 0 was marked precise before spill. 2155 */ 2156 state->regs[value_regno].precise = true; 2157 } else { 2158 /* have read misc data from the stack */ 2159 mark_reg_unknown(env, state->regs, value_regno); 2160 } 2161 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2162 } 2163 } 2164 return 0; 2165 } 2166 2167 static int check_stack_access(struct bpf_verifier_env *env, 2168 const struct bpf_reg_state *reg, 2169 int off, int size) 2170 { 2171 /* Stack accesses must be at a fixed offset, so that we 2172 * can determine what type of data were returned. See 2173 * check_stack_read(). 2174 */ 2175 if (!tnum_is_const(reg->var_off)) { 2176 char tn_buf[48]; 2177 2178 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2179 verbose(env, "variable stack access var_off=%s off=%d size=%d\n", 2180 tn_buf, off, size); 2181 return -EACCES; 2182 } 2183 2184 if (off >= 0 || off < -MAX_BPF_STACK) { 2185 verbose(env, "invalid stack off=%d size=%d\n", off, size); 2186 return -EACCES; 2187 } 2188 2189 return 0; 2190 } 2191 2192 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, 2193 int off, int size, enum bpf_access_type type) 2194 { 2195 struct bpf_reg_state *regs = cur_regs(env); 2196 struct bpf_map *map = regs[regno].map_ptr; 2197 u32 cap = bpf_map_flags_to_cap(map); 2198 2199 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { 2200 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", 2201 map->value_size, off, size); 2202 return -EACCES; 2203 } 2204 2205 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { 2206 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", 2207 map->value_size, off, size); 2208 return -EACCES; 2209 } 2210 2211 return 0; 2212 } 2213 2214 /* check read/write into map element returned by bpf_map_lookup_elem() */ 2215 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, 2216 int size, bool zero_size_allowed) 2217 { 2218 struct bpf_reg_state *regs = cur_regs(env); 2219 struct bpf_map *map = regs[regno].map_ptr; 2220 2221 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || 2222 off + size > map->value_size) { 2223 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", 2224 map->value_size, off, size); 2225 return -EACCES; 2226 } 2227 return 0; 2228 } 2229 2230 /* check read/write into a map element with possible variable offset */ 2231 static int check_map_access(struct bpf_verifier_env *env, u32 regno, 2232 int off, int size, bool zero_size_allowed) 2233 { 2234 struct bpf_verifier_state *vstate = env->cur_state; 2235 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2236 struct bpf_reg_state *reg = &state->regs[regno]; 2237 int err; 2238 2239 /* We may have adjusted the register to this map value, so we 2240 * need to try adding each of min_value and max_value to off 2241 * to make sure our theoretical access will be safe. 2242 */ 2243 if (env->log.level & BPF_LOG_LEVEL) 2244 print_verifier_state(env, state); 2245 2246 /* The minimum value is only important with signed 2247 * comparisons where we can't assume the floor of a 2248 * value is 0. If we are using signed variables for our 2249 * index'es we need to make sure that whatever we use 2250 * will have a set floor within our range. 2251 */ 2252 if (reg->smin_value < 0 && 2253 (reg->smin_value == S64_MIN || 2254 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || 2255 reg->smin_value + off < 0)) { 2256 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2257 regno); 2258 return -EACCES; 2259 } 2260 err = __check_map_access(env, regno, reg->smin_value + off, size, 2261 zero_size_allowed); 2262 if (err) { 2263 verbose(env, "R%d min value is outside of the array range\n", 2264 regno); 2265 return err; 2266 } 2267 2268 /* If we haven't set a max value then we need to bail since we can't be 2269 * sure we won't do bad things. 2270 * If reg->umax_value + off could overflow, treat that as unbounded too. 2271 */ 2272 if (reg->umax_value >= BPF_MAX_VAR_OFF) { 2273 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n", 2274 regno); 2275 return -EACCES; 2276 } 2277 err = __check_map_access(env, regno, reg->umax_value + off, size, 2278 zero_size_allowed); 2279 if (err) 2280 verbose(env, "R%d max value is outside of the array range\n", 2281 regno); 2282 2283 if (map_value_has_spin_lock(reg->map_ptr)) { 2284 u32 lock = reg->map_ptr->spin_lock_off; 2285 2286 /* if any part of struct bpf_spin_lock can be touched by 2287 * load/store reject this program. 2288 * To check that [x1, x2) overlaps with [y1, y2) 2289 * it is sufficient to check x1 < y2 && y1 < x2. 2290 */ 2291 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && 2292 lock < reg->umax_value + off + size) { 2293 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); 2294 return -EACCES; 2295 } 2296 } 2297 return err; 2298 } 2299 2300 #define MAX_PACKET_OFF 0xffff 2301 2302 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 2303 const struct bpf_call_arg_meta *meta, 2304 enum bpf_access_type t) 2305 { 2306 switch (env->prog->type) { 2307 /* Program types only with direct read access go here! */ 2308 case BPF_PROG_TYPE_LWT_IN: 2309 case BPF_PROG_TYPE_LWT_OUT: 2310 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2311 case BPF_PROG_TYPE_SK_REUSEPORT: 2312 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2313 case BPF_PROG_TYPE_CGROUP_SKB: 2314 if (t == BPF_WRITE) 2315 return false; 2316 /* fallthrough */ 2317 2318 /* Program types with direct read + write access go here! */ 2319 case BPF_PROG_TYPE_SCHED_CLS: 2320 case BPF_PROG_TYPE_SCHED_ACT: 2321 case BPF_PROG_TYPE_XDP: 2322 case BPF_PROG_TYPE_LWT_XMIT: 2323 case BPF_PROG_TYPE_SK_SKB: 2324 case BPF_PROG_TYPE_SK_MSG: 2325 if (meta) 2326 return meta->pkt_access; 2327 2328 env->seen_direct_write = true; 2329 return true; 2330 2331 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2332 if (t == BPF_WRITE) 2333 env->seen_direct_write = true; 2334 2335 return true; 2336 2337 default: 2338 return false; 2339 } 2340 } 2341 2342 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, 2343 int off, int size, bool zero_size_allowed) 2344 { 2345 struct bpf_reg_state *regs = cur_regs(env); 2346 struct bpf_reg_state *reg = ®s[regno]; 2347 2348 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || 2349 (u64)off + size > reg->range) { 2350 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", 2351 off, size, regno, reg->id, reg->off, reg->range); 2352 return -EACCES; 2353 } 2354 return 0; 2355 } 2356 2357 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, 2358 int size, bool zero_size_allowed) 2359 { 2360 struct bpf_reg_state *regs = cur_regs(env); 2361 struct bpf_reg_state *reg = ®s[regno]; 2362 int err; 2363 2364 /* We may have added a variable offset to the packet pointer; but any 2365 * reg->range we have comes after that. We are only checking the fixed 2366 * offset. 2367 */ 2368 2369 /* We don't allow negative numbers, because we aren't tracking enough 2370 * detail to prove they're safe. 2371 */ 2372 if (reg->smin_value < 0) { 2373 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2374 regno); 2375 return -EACCES; 2376 } 2377 err = __check_packet_access(env, regno, off, size, zero_size_allowed); 2378 if (err) { 2379 verbose(env, "R%d offset is outside of the packet\n", regno); 2380 return err; 2381 } 2382 2383 /* __check_packet_access has made sure "off + size - 1" is within u16. 2384 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, 2385 * otherwise find_good_pkt_pointers would have refused to set range info 2386 * that __check_packet_access would have rejected this pkt access. 2387 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. 2388 */ 2389 env->prog->aux->max_pkt_offset = 2390 max_t(u32, env->prog->aux->max_pkt_offset, 2391 off + reg->umax_value + size - 1); 2392 2393 return err; 2394 } 2395 2396 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 2397 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 2398 enum bpf_access_type t, enum bpf_reg_type *reg_type, 2399 u32 *btf_id) 2400 { 2401 struct bpf_insn_access_aux info = { 2402 .reg_type = *reg_type, 2403 .log = &env->log, 2404 }; 2405 2406 if (env->ops->is_valid_access && 2407 env->ops->is_valid_access(off, size, t, env->prog, &info)) { 2408 /* A non zero info.ctx_field_size indicates that this field is a 2409 * candidate for later verifier transformation to load the whole 2410 * field and then apply a mask when accessed with a narrower 2411 * access than actual ctx access size. A zero info.ctx_field_size 2412 * will only allow for whole field access and rejects any other 2413 * type of narrower access. 2414 */ 2415 *reg_type = info.reg_type; 2416 2417 if (*reg_type == PTR_TO_BTF_ID) 2418 *btf_id = info.btf_id; 2419 else 2420 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 2421 /* remember the offset of last byte accessed in ctx */ 2422 if (env->prog->aux->max_ctx_offset < off + size) 2423 env->prog->aux->max_ctx_offset = off + size; 2424 return 0; 2425 } 2426 2427 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); 2428 return -EACCES; 2429 } 2430 2431 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, 2432 int size) 2433 { 2434 if (size < 0 || off < 0 || 2435 (u64)off + size > sizeof(struct bpf_flow_keys)) { 2436 verbose(env, "invalid access to flow keys off=%d size=%d\n", 2437 off, size); 2438 return -EACCES; 2439 } 2440 return 0; 2441 } 2442 2443 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, 2444 u32 regno, int off, int size, 2445 enum bpf_access_type t) 2446 { 2447 struct bpf_reg_state *regs = cur_regs(env); 2448 struct bpf_reg_state *reg = ®s[regno]; 2449 struct bpf_insn_access_aux info = {}; 2450 bool valid; 2451 2452 if (reg->smin_value < 0) { 2453 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2454 regno); 2455 return -EACCES; 2456 } 2457 2458 switch (reg->type) { 2459 case PTR_TO_SOCK_COMMON: 2460 valid = bpf_sock_common_is_valid_access(off, size, t, &info); 2461 break; 2462 case PTR_TO_SOCKET: 2463 valid = bpf_sock_is_valid_access(off, size, t, &info); 2464 break; 2465 case PTR_TO_TCP_SOCK: 2466 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 2467 break; 2468 case PTR_TO_XDP_SOCK: 2469 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); 2470 break; 2471 default: 2472 valid = false; 2473 } 2474 2475 2476 if (valid) { 2477 env->insn_aux_data[insn_idx].ctx_field_size = 2478 info.ctx_field_size; 2479 return 0; 2480 } 2481 2482 verbose(env, "R%d invalid %s access off=%d size=%d\n", 2483 regno, reg_type_str[reg->type], off, size); 2484 2485 return -EACCES; 2486 } 2487 2488 static bool __is_pointer_value(bool allow_ptr_leaks, 2489 const struct bpf_reg_state *reg) 2490 { 2491 if (allow_ptr_leaks) 2492 return false; 2493 2494 return reg->type != SCALAR_VALUE; 2495 } 2496 2497 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 2498 { 2499 return cur_regs(env) + regno; 2500 } 2501 2502 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 2503 { 2504 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 2505 } 2506 2507 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 2508 { 2509 const struct bpf_reg_state *reg = reg_state(env, regno); 2510 2511 return reg->type == PTR_TO_CTX; 2512 } 2513 2514 static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 2515 { 2516 const struct bpf_reg_state *reg = reg_state(env, regno); 2517 2518 return type_is_sk_pointer(reg->type); 2519 } 2520 2521 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 2522 { 2523 const struct bpf_reg_state *reg = reg_state(env, regno); 2524 2525 return type_is_pkt_pointer(reg->type); 2526 } 2527 2528 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 2529 { 2530 const struct bpf_reg_state *reg = reg_state(env, regno); 2531 2532 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 2533 return reg->type == PTR_TO_FLOW_KEYS; 2534 } 2535 2536 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 2537 const struct bpf_reg_state *reg, 2538 int off, int size, bool strict) 2539 { 2540 struct tnum reg_off; 2541 int ip_align; 2542 2543 /* Byte size accesses are always allowed. */ 2544 if (!strict || size == 1) 2545 return 0; 2546 2547 /* For platforms that do not have a Kconfig enabling 2548 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of 2549 * NET_IP_ALIGN is universally set to '2'. And on platforms 2550 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get 2551 * to this code only in strict mode where we want to emulate 2552 * the NET_IP_ALIGN==2 checking. Therefore use an 2553 * unconditional IP align value of '2'. 2554 */ 2555 ip_align = 2; 2556 2557 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); 2558 if (!tnum_is_aligned(reg_off, size)) { 2559 char tn_buf[48]; 2560 2561 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2562 verbose(env, 2563 "misaligned packet access off %d+%s+%d+%d size %d\n", 2564 ip_align, tn_buf, reg->off, off, size); 2565 return -EACCES; 2566 } 2567 2568 return 0; 2569 } 2570 2571 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, 2572 const struct bpf_reg_state *reg, 2573 const char *pointer_desc, 2574 int off, int size, bool strict) 2575 { 2576 struct tnum reg_off; 2577 2578 /* Byte size accesses are always allowed. */ 2579 if (!strict || size == 1) 2580 return 0; 2581 2582 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); 2583 if (!tnum_is_aligned(reg_off, size)) { 2584 char tn_buf[48]; 2585 2586 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2587 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", 2588 pointer_desc, tn_buf, reg->off, off, size); 2589 return -EACCES; 2590 } 2591 2592 return 0; 2593 } 2594 2595 static int check_ptr_alignment(struct bpf_verifier_env *env, 2596 const struct bpf_reg_state *reg, int off, 2597 int size, bool strict_alignment_once) 2598 { 2599 bool strict = env->strict_alignment || strict_alignment_once; 2600 const char *pointer_desc = ""; 2601 2602 switch (reg->type) { 2603 case PTR_TO_PACKET: 2604 case PTR_TO_PACKET_META: 2605 /* Special case, because of NET_IP_ALIGN. Given metadata sits 2606 * right in front, treat it the very same way. 2607 */ 2608 return check_pkt_ptr_alignment(env, reg, off, size, strict); 2609 case PTR_TO_FLOW_KEYS: 2610 pointer_desc = "flow keys "; 2611 break; 2612 case PTR_TO_MAP_VALUE: 2613 pointer_desc = "value "; 2614 break; 2615 case PTR_TO_CTX: 2616 pointer_desc = "context "; 2617 break; 2618 case PTR_TO_STACK: 2619 pointer_desc = "stack "; 2620 /* The stack spill tracking logic in check_stack_write() 2621 * and check_stack_read() relies on stack accesses being 2622 * aligned. 2623 */ 2624 strict = true; 2625 break; 2626 case PTR_TO_SOCKET: 2627 pointer_desc = "sock "; 2628 break; 2629 case PTR_TO_SOCK_COMMON: 2630 pointer_desc = "sock_common "; 2631 break; 2632 case PTR_TO_TCP_SOCK: 2633 pointer_desc = "tcp_sock "; 2634 break; 2635 case PTR_TO_XDP_SOCK: 2636 pointer_desc = "xdp_sock "; 2637 break; 2638 default: 2639 break; 2640 } 2641 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, 2642 strict); 2643 } 2644 2645 static int update_stack_depth(struct bpf_verifier_env *env, 2646 const struct bpf_func_state *func, 2647 int off) 2648 { 2649 u16 stack = env->subprog_info[func->subprogno].stack_depth; 2650 2651 if (stack >= -off) 2652 return 0; 2653 2654 /* update known max for given subprogram */ 2655 env->subprog_info[func->subprogno].stack_depth = -off; 2656 return 0; 2657 } 2658 2659 /* starting from main bpf function walk all instructions of the function 2660 * and recursively walk all callees that given function can call. 2661 * Ignore jump and exit insns. 2662 * Since recursion is prevented by check_cfg() this algorithm 2663 * only needs a local stack of MAX_CALL_FRAMES to remember callsites 2664 */ 2665 static int check_max_stack_depth(struct bpf_verifier_env *env) 2666 { 2667 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; 2668 struct bpf_subprog_info *subprog = env->subprog_info; 2669 struct bpf_insn *insn = env->prog->insnsi; 2670 int ret_insn[MAX_CALL_FRAMES]; 2671 int ret_prog[MAX_CALL_FRAMES]; 2672 2673 process_func: 2674 /* round up to 32-bytes, since this is granularity 2675 * of interpreter stack size 2676 */ 2677 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 2678 if (depth > MAX_BPF_STACK) { 2679 verbose(env, "combined stack size of %d calls is %d. Too large\n", 2680 frame + 1, depth); 2681 return -EACCES; 2682 } 2683 continue_func: 2684 subprog_end = subprog[idx + 1].start; 2685 for (; i < subprog_end; i++) { 2686 if (insn[i].code != (BPF_JMP | BPF_CALL)) 2687 continue; 2688 if (insn[i].src_reg != BPF_PSEUDO_CALL) 2689 continue; 2690 /* remember insn and function to return to */ 2691 ret_insn[frame] = i + 1; 2692 ret_prog[frame] = idx; 2693 2694 /* find the callee */ 2695 i = i + insn[i].imm + 1; 2696 idx = find_subprog(env, i); 2697 if (idx < 0) { 2698 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 2699 i); 2700 return -EFAULT; 2701 } 2702 frame++; 2703 if (frame >= MAX_CALL_FRAMES) { 2704 verbose(env, "the call stack of %d frames is too deep !\n", 2705 frame); 2706 return -E2BIG; 2707 } 2708 goto process_func; 2709 } 2710 /* end of for() loop means the last insn of the 'subprog' 2711 * was reached. Doesn't matter whether it was JA or EXIT 2712 */ 2713 if (frame == 0) 2714 return 0; 2715 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 2716 frame--; 2717 i = ret_insn[frame]; 2718 idx = ret_prog[frame]; 2719 goto continue_func; 2720 } 2721 2722 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 2723 static int get_callee_stack_depth(struct bpf_verifier_env *env, 2724 const struct bpf_insn *insn, int idx) 2725 { 2726 int start = idx + insn->imm + 1, subprog; 2727 2728 subprog = find_subprog(env, start); 2729 if (subprog < 0) { 2730 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 2731 start); 2732 return -EFAULT; 2733 } 2734 return env->subprog_info[subprog].stack_depth; 2735 } 2736 #endif 2737 2738 int check_ctx_reg(struct bpf_verifier_env *env, 2739 const struct bpf_reg_state *reg, int regno) 2740 { 2741 /* Access to ctx or passing it to a helper is only allowed in 2742 * its original, unmodified form. 2743 */ 2744 2745 if (reg->off) { 2746 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", 2747 regno, reg->off); 2748 return -EACCES; 2749 } 2750 2751 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2752 char tn_buf[48]; 2753 2754 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2755 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); 2756 return -EACCES; 2757 } 2758 2759 return 0; 2760 } 2761 2762 static int check_tp_buffer_access(struct bpf_verifier_env *env, 2763 const struct bpf_reg_state *reg, 2764 int regno, int off, int size) 2765 { 2766 if (off < 0) { 2767 verbose(env, 2768 "R%d invalid tracepoint buffer access: off=%d, size=%d", 2769 regno, off, size); 2770 return -EACCES; 2771 } 2772 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2773 char tn_buf[48]; 2774 2775 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2776 verbose(env, 2777 "R%d invalid variable buffer offset: off=%d, var_off=%s", 2778 regno, off, tn_buf); 2779 return -EACCES; 2780 } 2781 if (off + size > env->prog->aux->max_tp_access) 2782 env->prog->aux->max_tp_access = off + size; 2783 2784 return 0; 2785 } 2786 2787 2788 /* truncate register to smaller size (in bytes) 2789 * must be called with size < BPF_REG_SIZE 2790 */ 2791 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) 2792 { 2793 u64 mask; 2794 2795 /* clear high bits in bit representation */ 2796 reg->var_off = tnum_cast(reg->var_off, size); 2797 2798 /* fix arithmetic bounds */ 2799 mask = ((u64)1 << (size * 8)) - 1; 2800 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { 2801 reg->umin_value &= mask; 2802 reg->umax_value &= mask; 2803 } else { 2804 reg->umin_value = 0; 2805 reg->umax_value = mask; 2806 } 2807 reg->smin_value = reg->umin_value; 2808 reg->smax_value = reg->umax_value; 2809 } 2810 2811 static bool bpf_map_is_rdonly(const struct bpf_map *map) 2812 { 2813 return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; 2814 } 2815 2816 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) 2817 { 2818 void *ptr; 2819 u64 addr; 2820 int err; 2821 2822 err = map->ops->map_direct_value_addr(map, &addr, off); 2823 if (err) 2824 return err; 2825 ptr = (void *)(long)addr + off; 2826 2827 switch (size) { 2828 case sizeof(u8): 2829 *val = (u64)*(u8 *)ptr; 2830 break; 2831 case sizeof(u16): 2832 *val = (u64)*(u16 *)ptr; 2833 break; 2834 case sizeof(u32): 2835 *val = (u64)*(u32 *)ptr; 2836 break; 2837 case sizeof(u64): 2838 *val = *(u64 *)ptr; 2839 break; 2840 default: 2841 return -EINVAL; 2842 } 2843 return 0; 2844 } 2845 2846 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 2847 struct bpf_reg_state *regs, 2848 int regno, int off, int size, 2849 enum bpf_access_type atype, 2850 int value_regno) 2851 { 2852 struct bpf_reg_state *reg = regs + regno; 2853 const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); 2854 const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); 2855 u32 btf_id; 2856 int ret; 2857 2858 if (off < 0) { 2859 verbose(env, 2860 "R%d is ptr_%s invalid negative access: off=%d\n", 2861 regno, tname, off); 2862 return -EACCES; 2863 } 2864 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2865 char tn_buf[48]; 2866 2867 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2868 verbose(env, 2869 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 2870 regno, tname, off, tn_buf); 2871 return -EACCES; 2872 } 2873 2874 if (env->ops->btf_struct_access) { 2875 ret = env->ops->btf_struct_access(&env->log, t, off, size, 2876 atype, &btf_id); 2877 } else { 2878 if (atype != BPF_READ) { 2879 verbose(env, "only read is supported\n"); 2880 return -EACCES; 2881 } 2882 2883 ret = btf_struct_access(&env->log, t, off, size, atype, 2884 &btf_id); 2885 } 2886 2887 if (ret < 0) 2888 return ret; 2889 2890 if (atype == BPF_READ) { 2891 if (ret == SCALAR_VALUE) { 2892 mark_reg_unknown(env, regs, value_regno); 2893 return 0; 2894 } 2895 mark_reg_known_zero(env, regs, value_regno); 2896 regs[value_regno].type = PTR_TO_BTF_ID; 2897 regs[value_regno].btf_id = btf_id; 2898 } 2899 2900 return 0; 2901 } 2902 2903 /* check whether memory at (regno + off) is accessible for t = (read | write) 2904 * if t==write, value_regno is a register which value is stored into memory 2905 * if t==read, value_regno is a register which will receive the value from memory 2906 * if t==write && value_regno==-1, some unknown value is stored into memory 2907 * if t==read && value_regno==-1, don't care what we read from memory 2908 */ 2909 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, 2910 int off, int bpf_size, enum bpf_access_type t, 2911 int value_regno, bool strict_alignment_once) 2912 { 2913 struct bpf_reg_state *regs = cur_regs(env); 2914 struct bpf_reg_state *reg = regs + regno; 2915 struct bpf_func_state *state; 2916 int size, err = 0; 2917 2918 size = bpf_size_to_bytes(bpf_size); 2919 if (size < 0) 2920 return size; 2921 2922 /* alignment checks will add in reg->off themselves */ 2923 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); 2924 if (err) 2925 return err; 2926 2927 /* for access checks, reg->off is just part of off */ 2928 off += reg->off; 2929 2930 if (reg->type == PTR_TO_MAP_VALUE) { 2931 if (t == BPF_WRITE && value_regno >= 0 && 2932 is_pointer_value(env, value_regno)) { 2933 verbose(env, "R%d leaks addr into map\n", value_regno); 2934 return -EACCES; 2935 } 2936 err = check_map_access_type(env, regno, off, size, t); 2937 if (err) 2938 return err; 2939 err = check_map_access(env, regno, off, size, false); 2940 if (!err && t == BPF_READ && value_regno >= 0) { 2941 struct bpf_map *map = reg->map_ptr; 2942 2943 /* if map is read-only, track its contents as scalars */ 2944 if (tnum_is_const(reg->var_off) && 2945 bpf_map_is_rdonly(map) && 2946 map->ops->map_direct_value_addr) { 2947 int map_off = off + reg->var_off.value; 2948 u64 val = 0; 2949 2950 err = bpf_map_direct_read(map, map_off, size, 2951 &val); 2952 if (err) 2953 return err; 2954 2955 regs[value_regno].type = SCALAR_VALUE; 2956 __mark_reg_known(®s[value_regno], val); 2957 } else { 2958 mark_reg_unknown(env, regs, value_regno); 2959 } 2960 } 2961 } else if (reg->type == PTR_TO_CTX) { 2962 enum bpf_reg_type reg_type = SCALAR_VALUE; 2963 u32 btf_id = 0; 2964 2965 if (t == BPF_WRITE && value_regno >= 0 && 2966 is_pointer_value(env, value_regno)) { 2967 verbose(env, "R%d leaks addr into ctx\n", value_regno); 2968 return -EACCES; 2969 } 2970 2971 err = check_ctx_reg(env, reg, regno); 2972 if (err < 0) 2973 return err; 2974 2975 err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id); 2976 if (err) 2977 verbose_linfo(env, insn_idx, "; "); 2978 if (!err && t == BPF_READ && value_regno >= 0) { 2979 /* ctx access returns either a scalar, or a 2980 * PTR_TO_PACKET[_META,_END]. In the latter 2981 * case, we know the offset is zero. 2982 */ 2983 if (reg_type == SCALAR_VALUE) { 2984 mark_reg_unknown(env, regs, value_regno); 2985 } else { 2986 mark_reg_known_zero(env, regs, 2987 value_regno); 2988 if (reg_type_may_be_null(reg_type)) 2989 regs[value_regno].id = ++env->id_gen; 2990 /* A load of ctx field could have different 2991 * actual load size with the one encoded in the 2992 * insn. When the dst is PTR, it is for sure not 2993 * a sub-register. 2994 */ 2995 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 2996 if (reg_type == PTR_TO_BTF_ID) 2997 regs[value_regno].btf_id = btf_id; 2998 } 2999 regs[value_regno].type = reg_type; 3000 } 3001 3002 } else if (reg->type == PTR_TO_STACK) { 3003 off += reg->var_off.value; 3004 err = check_stack_access(env, reg, off, size); 3005 if (err) 3006 return err; 3007 3008 state = func(env, reg); 3009 err = update_stack_depth(env, state, off); 3010 if (err) 3011 return err; 3012 3013 if (t == BPF_WRITE) 3014 err = check_stack_write(env, state, off, size, 3015 value_regno, insn_idx); 3016 else 3017 err = check_stack_read(env, state, off, size, 3018 value_regno); 3019 } else if (reg_is_pkt_pointer(reg)) { 3020 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { 3021 verbose(env, "cannot write into packet\n"); 3022 return -EACCES; 3023 } 3024 if (t == BPF_WRITE && value_regno >= 0 && 3025 is_pointer_value(env, value_regno)) { 3026 verbose(env, "R%d leaks addr into packet\n", 3027 value_regno); 3028 return -EACCES; 3029 } 3030 err = check_packet_access(env, regno, off, size, false); 3031 if (!err && t == BPF_READ && value_regno >= 0) 3032 mark_reg_unknown(env, regs, value_regno); 3033 } else if (reg->type == PTR_TO_FLOW_KEYS) { 3034 if (t == BPF_WRITE && value_regno >= 0 && 3035 is_pointer_value(env, value_regno)) { 3036 verbose(env, "R%d leaks addr into flow keys\n", 3037 value_regno); 3038 return -EACCES; 3039 } 3040 3041 err = check_flow_keys_access(env, off, size); 3042 if (!err && t == BPF_READ && value_regno >= 0) 3043 mark_reg_unknown(env, regs, value_regno); 3044 } else if (type_is_sk_pointer(reg->type)) { 3045 if (t == BPF_WRITE) { 3046 verbose(env, "R%d cannot write into %s\n", 3047 regno, reg_type_str[reg->type]); 3048 return -EACCES; 3049 } 3050 err = check_sock_access(env, insn_idx, regno, off, size, t); 3051 if (!err && value_regno >= 0) 3052 mark_reg_unknown(env, regs, value_regno); 3053 } else if (reg->type == PTR_TO_TP_BUFFER) { 3054 err = check_tp_buffer_access(env, reg, regno, off, size); 3055 if (!err && t == BPF_READ && value_regno >= 0) 3056 mark_reg_unknown(env, regs, value_regno); 3057 } else if (reg->type == PTR_TO_BTF_ID) { 3058 err = check_ptr_to_btf_access(env, regs, regno, off, size, t, 3059 value_regno); 3060 } else { 3061 verbose(env, "R%d invalid mem access '%s'\n", regno, 3062 reg_type_str[reg->type]); 3063 return -EACCES; 3064 } 3065 3066 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 3067 regs[value_regno].type == SCALAR_VALUE) { 3068 /* b/h/w load zero-extends, mark upper bits as known 0 */ 3069 coerce_reg_to_size(®s[value_regno], size); 3070 } 3071 return err; 3072 } 3073 3074 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) 3075 { 3076 int err; 3077 3078 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || 3079 insn->imm != 0) { 3080 verbose(env, "BPF_XADD uses reserved fields\n"); 3081 return -EINVAL; 3082 } 3083 3084 /* check src1 operand */ 3085 err = check_reg_arg(env, insn->src_reg, SRC_OP); 3086 if (err) 3087 return err; 3088 3089 /* check src2 operand */ 3090 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 3091 if (err) 3092 return err; 3093 3094 if (is_pointer_value(env, insn->src_reg)) { 3095 verbose(env, "R%d leaks addr into mem\n", insn->src_reg); 3096 return -EACCES; 3097 } 3098 3099 if (is_ctx_reg(env, insn->dst_reg) || 3100 is_pkt_reg(env, insn->dst_reg) || 3101 is_flow_key_reg(env, insn->dst_reg) || 3102 is_sk_reg(env, insn->dst_reg)) { 3103 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 3104 insn->dst_reg, 3105 reg_type_str[reg_state(env, insn->dst_reg)->type]); 3106 return -EACCES; 3107 } 3108 3109 /* check whether atomic_add can read the memory */ 3110 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3111 BPF_SIZE(insn->code), BPF_READ, -1, true); 3112 if (err) 3113 return err; 3114 3115 /* check whether atomic_add can write into the same memory */ 3116 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3117 BPF_SIZE(insn->code), BPF_WRITE, -1, true); 3118 } 3119 3120 static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, 3121 int off, int access_size, 3122 bool zero_size_allowed) 3123 { 3124 struct bpf_reg_state *reg = reg_state(env, regno); 3125 3126 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 3127 access_size < 0 || (access_size == 0 && !zero_size_allowed)) { 3128 if (tnum_is_const(reg->var_off)) { 3129 verbose(env, "invalid stack type R%d off=%d access_size=%d\n", 3130 regno, off, access_size); 3131 } else { 3132 char tn_buf[48]; 3133 3134 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3135 verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", 3136 regno, tn_buf, access_size); 3137 } 3138 return -EACCES; 3139 } 3140 return 0; 3141 } 3142 3143 /* when register 'regno' is passed into function that will read 'access_size' 3144 * bytes from that pointer, make sure that it's within stack boundary 3145 * and all elements of stack are initialized. 3146 * Unlike most pointer bounds-checking functions, this one doesn't take an 3147 * 'off' argument, so it has to add in reg->off itself. 3148 */ 3149 static int check_stack_boundary(struct bpf_verifier_env *env, int regno, 3150 int access_size, bool zero_size_allowed, 3151 struct bpf_call_arg_meta *meta) 3152 { 3153 struct bpf_reg_state *reg = reg_state(env, regno); 3154 struct bpf_func_state *state = func(env, reg); 3155 int err, min_off, max_off, i, j, slot, spi; 3156 3157 if (reg->type != PTR_TO_STACK) { 3158 /* Allow zero-byte read from NULL, regardless of pointer type */ 3159 if (zero_size_allowed && access_size == 0 && 3160 register_is_null(reg)) 3161 return 0; 3162 3163 verbose(env, "R%d type=%s expected=%s\n", regno, 3164 reg_type_str[reg->type], 3165 reg_type_str[PTR_TO_STACK]); 3166 return -EACCES; 3167 } 3168 3169 if (tnum_is_const(reg->var_off)) { 3170 min_off = max_off = reg->var_off.value + reg->off; 3171 err = __check_stack_boundary(env, regno, min_off, access_size, 3172 zero_size_allowed); 3173 if (err) 3174 return err; 3175 } else { 3176 /* Variable offset is prohibited for unprivileged mode for 3177 * simplicity since it requires corresponding support in 3178 * Spectre masking for stack ALU. 3179 * See also retrieve_ptr_limit(). 3180 */ 3181 if (!env->allow_ptr_leaks) { 3182 char tn_buf[48]; 3183 3184 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3185 verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", 3186 regno, tn_buf); 3187 return -EACCES; 3188 } 3189 /* Only initialized buffer on stack is allowed to be accessed 3190 * with variable offset. With uninitialized buffer it's hard to 3191 * guarantee that whole memory is marked as initialized on 3192 * helper return since specific bounds are unknown what may 3193 * cause uninitialized stack leaking. 3194 */ 3195 if (meta && meta->raw_mode) 3196 meta = NULL; 3197 3198 if (reg->smax_value >= BPF_MAX_VAR_OFF || 3199 reg->smax_value <= -BPF_MAX_VAR_OFF) { 3200 verbose(env, "R%d unbounded indirect variable offset stack access\n", 3201 regno); 3202 return -EACCES; 3203 } 3204 min_off = reg->smin_value + reg->off; 3205 max_off = reg->smax_value + reg->off; 3206 err = __check_stack_boundary(env, regno, min_off, access_size, 3207 zero_size_allowed); 3208 if (err) { 3209 verbose(env, "R%d min value is outside of stack bound\n", 3210 regno); 3211 return err; 3212 } 3213 err = __check_stack_boundary(env, regno, max_off, access_size, 3214 zero_size_allowed); 3215 if (err) { 3216 verbose(env, "R%d max value is outside of stack bound\n", 3217 regno); 3218 return err; 3219 } 3220 } 3221 3222 if (meta && meta->raw_mode) { 3223 meta->access_size = access_size; 3224 meta->regno = regno; 3225 return 0; 3226 } 3227 3228 for (i = min_off; i < max_off + access_size; i++) { 3229 u8 *stype; 3230 3231 slot = -i - 1; 3232 spi = slot / BPF_REG_SIZE; 3233 if (state->allocated_stack <= slot) 3234 goto err; 3235 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 3236 if (*stype == STACK_MISC) 3237 goto mark; 3238 if (*stype == STACK_ZERO) { 3239 /* helper can write anything into the stack */ 3240 *stype = STACK_MISC; 3241 goto mark; 3242 } 3243 if (state->stack[spi].slot_type[0] == STACK_SPILL && 3244 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { 3245 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); 3246 for (j = 0; j < BPF_REG_SIZE; j++) 3247 state->stack[spi].slot_type[j] = STACK_MISC; 3248 goto mark; 3249 } 3250 3251 err: 3252 if (tnum_is_const(reg->var_off)) { 3253 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 3254 min_off, i - min_off, access_size); 3255 } else { 3256 char tn_buf[48]; 3257 3258 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3259 verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", 3260 tn_buf, i - min_off, access_size); 3261 } 3262 return -EACCES; 3263 mark: 3264 /* reading any byte out of 8-byte 'spill_slot' will cause 3265 * the whole slot to be marked as 'read' 3266 */ 3267 mark_reg_read(env, &state->stack[spi].spilled_ptr, 3268 state->stack[spi].spilled_ptr.parent, 3269 REG_LIVE_READ64); 3270 } 3271 return update_stack_depth(env, state, min_off); 3272 } 3273 3274 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 3275 int access_size, bool zero_size_allowed, 3276 struct bpf_call_arg_meta *meta) 3277 { 3278 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3279 3280 switch (reg->type) { 3281 case PTR_TO_PACKET: 3282 case PTR_TO_PACKET_META: 3283 return check_packet_access(env, regno, reg->off, access_size, 3284 zero_size_allowed); 3285 case PTR_TO_MAP_VALUE: 3286 if (check_map_access_type(env, regno, reg->off, access_size, 3287 meta && meta->raw_mode ? BPF_WRITE : 3288 BPF_READ)) 3289 return -EACCES; 3290 return check_map_access(env, regno, reg->off, access_size, 3291 zero_size_allowed); 3292 default: /* scalar_value|ptr_to_stack or invalid ptr */ 3293 return check_stack_boundary(env, regno, access_size, 3294 zero_size_allowed, meta); 3295 } 3296 } 3297 3298 /* Implementation details: 3299 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL 3300 * Two bpf_map_lookups (even with the same key) will have different reg->id. 3301 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after 3302 * value_or_null->value transition, since the verifier only cares about 3303 * the range of access to valid map value pointer and doesn't care about actual 3304 * address of the map element. 3305 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps 3306 * reg->id > 0 after value_or_null->value transition. By doing so 3307 * two bpf_map_lookups will be considered two different pointers that 3308 * point to different bpf_spin_locks. 3309 * The verifier allows taking only one bpf_spin_lock at a time to avoid 3310 * dead-locks. 3311 * Since only one bpf_spin_lock is allowed the checks are simpler than 3312 * reg_is_refcounted() logic. The verifier needs to remember only 3313 * one spin_lock instead of array of acquired_refs. 3314 * cur_state->active_spin_lock remembers which map value element got locked 3315 * and clears it after bpf_spin_unlock. 3316 */ 3317 static int process_spin_lock(struct bpf_verifier_env *env, int regno, 3318 bool is_lock) 3319 { 3320 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3321 struct bpf_verifier_state *cur = env->cur_state; 3322 bool is_const = tnum_is_const(reg->var_off); 3323 struct bpf_map *map = reg->map_ptr; 3324 u64 val = reg->var_off.value; 3325 3326 if (reg->type != PTR_TO_MAP_VALUE) { 3327 verbose(env, "R%d is not a pointer to map_value\n", regno); 3328 return -EINVAL; 3329 } 3330 if (!is_const) { 3331 verbose(env, 3332 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", 3333 regno); 3334 return -EINVAL; 3335 } 3336 if (!map->btf) { 3337 verbose(env, 3338 "map '%s' has to have BTF in order to use bpf_spin_lock\n", 3339 map->name); 3340 return -EINVAL; 3341 } 3342 if (!map_value_has_spin_lock(map)) { 3343 if (map->spin_lock_off == -E2BIG) 3344 verbose(env, 3345 "map '%s' has more than one 'struct bpf_spin_lock'\n", 3346 map->name); 3347 else if (map->spin_lock_off == -ENOENT) 3348 verbose(env, 3349 "map '%s' doesn't have 'struct bpf_spin_lock'\n", 3350 map->name); 3351 else 3352 verbose(env, 3353 "map '%s' is not a struct type or bpf_spin_lock is mangled\n", 3354 map->name); 3355 return -EINVAL; 3356 } 3357 if (map->spin_lock_off != val + reg->off) { 3358 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", 3359 val + reg->off); 3360 return -EINVAL; 3361 } 3362 if (is_lock) { 3363 if (cur->active_spin_lock) { 3364 verbose(env, 3365 "Locking two bpf_spin_locks are not allowed\n"); 3366 return -EINVAL; 3367 } 3368 cur->active_spin_lock = reg->id; 3369 } else { 3370 if (!cur->active_spin_lock) { 3371 verbose(env, "bpf_spin_unlock without taking a lock\n"); 3372 return -EINVAL; 3373 } 3374 if (cur->active_spin_lock != reg->id) { 3375 verbose(env, "bpf_spin_unlock of different lock\n"); 3376 return -EINVAL; 3377 } 3378 cur->active_spin_lock = 0; 3379 } 3380 return 0; 3381 } 3382 3383 static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 3384 { 3385 return type == ARG_PTR_TO_MEM || 3386 type == ARG_PTR_TO_MEM_OR_NULL || 3387 type == ARG_PTR_TO_UNINIT_MEM; 3388 } 3389 3390 static bool arg_type_is_mem_size(enum bpf_arg_type type) 3391 { 3392 return type == ARG_CONST_SIZE || 3393 type == ARG_CONST_SIZE_OR_ZERO; 3394 } 3395 3396 static bool arg_type_is_int_ptr(enum bpf_arg_type type) 3397 { 3398 return type == ARG_PTR_TO_INT || 3399 type == ARG_PTR_TO_LONG; 3400 } 3401 3402 static int int_ptr_type_to_size(enum bpf_arg_type type) 3403 { 3404 if (type == ARG_PTR_TO_INT) 3405 return sizeof(u32); 3406 else if (type == ARG_PTR_TO_LONG) 3407 return sizeof(u64); 3408 3409 return -EINVAL; 3410 } 3411 3412 static int check_func_arg(struct bpf_verifier_env *env, u32 regno, 3413 enum bpf_arg_type arg_type, 3414 struct bpf_call_arg_meta *meta) 3415 { 3416 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3417 enum bpf_reg_type expected_type, type = reg->type; 3418 int err = 0; 3419 3420 if (arg_type == ARG_DONTCARE) 3421 return 0; 3422 3423 err = check_reg_arg(env, regno, SRC_OP); 3424 if (err) 3425 return err; 3426 3427 if (arg_type == ARG_ANYTHING) { 3428 if (is_pointer_value(env, regno)) { 3429 verbose(env, "R%d leaks addr into helper function\n", 3430 regno); 3431 return -EACCES; 3432 } 3433 return 0; 3434 } 3435 3436 if (type_is_pkt_pointer(type) && 3437 !may_access_direct_pkt_data(env, meta, BPF_READ)) { 3438 verbose(env, "helper access to the packet is not allowed\n"); 3439 return -EACCES; 3440 } 3441 3442 if (arg_type == ARG_PTR_TO_MAP_KEY || 3443 arg_type == ARG_PTR_TO_MAP_VALUE || 3444 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || 3445 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { 3446 expected_type = PTR_TO_STACK; 3447 if (register_is_null(reg) && 3448 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) 3449 /* final test in check_stack_boundary() */; 3450 else if (!type_is_pkt_pointer(type) && 3451 type != PTR_TO_MAP_VALUE && 3452 type != expected_type) 3453 goto err_type; 3454 } else if (arg_type == ARG_CONST_SIZE || 3455 arg_type == ARG_CONST_SIZE_OR_ZERO) { 3456 expected_type = SCALAR_VALUE; 3457 if (type != expected_type) 3458 goto err_type; 3459 } else if (arg_type == ARG_CONST_MAP_PTR) { 3460 expected_type = CONST_PTR_TO_MAP; 3461 if (type != expected_type) 3462 goto err_type; 3463 } else if (arg_type == ARG_PTR_TO_CTX) { 3464 expected_type = PTR_TO_CTX; 3465 if (type != expected_type) 3466 goto err_type; 3467 err = check_ctx_reg(env, reg, regno); 3468 if (err < 0) 3469 return err; 3470 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { 3471 expected_type = PTR_TO_SOCK_COMMON; 3472 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ 3473 if (!type_is_sk_pointer(type)) 3474 goto err_type; 3475 if (reg->ref_obj_id) { 3476 if (meta->ref_obj_id) { 3477 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", 3478 regno, reg->ref_obj_id, 3479 meta->ref_obj_id); 3480 return -EFAULT; 3481 } 3482 meta->ref_obj_id = reg->ref_obj_id; 3483 } 3484 } else if (arg_type == ARG_PTR_TO_SOCKET) { 3485 expected_type = PTR_TO_SOCKET; 3486 if (type != expected_type) 3487 goto err_type; 3488 } else if (arg_type == ARG_PTR_TO_BTF_ID) { 3489 expected_type = PTR_TO_BTF_ID; 3490 if (type != expected_type) 3491 goto err_type; 3492 if (reg->btf_id != meta->btf_id) { 3493 verbose(env, "Helper has type %s got %s in R%d\n", 3494 kernel_type_name(meta->btf_id), 3495 kernel_type_name(reg->btf_id), regno); 3496 3497 return -EACCES; 3498 } 3499 if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) { 3500 verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", 3501 regno); 3502 return -EACCES; 3503 } 3504 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { 3505 if (meta->func_id == BPF_FUNC_spin_lock) { 3506 if (process_spin_lock(env, regno, true)) 3507 return -EACCES; 3508 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 3509 if (process_spin_lock(env, regno, false)) 3510 return -EACCES; 3511 } else { 3512 verbose(env, "verifier internal error\n"); 3513 return -EFAULT; 3514 } 3515 } else if (arg_type_is_mem_ptr(arg_type)) { 3516 expected_type = PTR_TO_STACK; 3517 /* One exception here. In case function allows for NULL to be 3518 * passed in as argument, it's a SCALAR_VALUE type. Final test 3519 * happens during stack boundary checking. 3520 */ 3521 if (register_is_null(reg) && 3522 arg_type == ARG_PTR_TO_MEM_OR_NULL) 3523 /* final test in check_stack_boundary() */; 3524 else if (!type_is_pkt_pointer(type) && 3525 type != PTR_TO_MAP_VALUE && 3526 type != expected_type) 3527 goto err_type; 3528 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; 3529 } else if (arg_type_is_int_ptr(arg_type)) { 3530 expected_type = PTR_TO_STACK; 3531 if (!type_is_pkt_pointer(type) && 3532 type != PTR_TO_MAP_VALUE && 3533 type != expected_type) 3534 goto err_type; 3535 } else { 3536 verbose(env, "unsupported arg_type %d\n", arg_type); 3537 return -EFAULT; 3538 } 3539 3540 if (arg_type == ARG_CONST_MAP_PTR) { 3541 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ 3542 meta->map_ptr = reg->map_ptr; 3543 } else if (arg_type == ARG_PTR_TO_MAP_KEY) { 3544 /* bpf_map_xxx(..., map_ptr, ..., key) call: 3545 * check that [key, key + map->key_size) are within 3546 * stack limits and initialized 3547 */ 3548 if (!meta->map_ptr) { 3549 /* in function declaration map_ptr must come before 3550 * map_key, so that it's verified and known before 3551 * we have to check map_key here. Otherwise it means 3552 * that kernel subsystem misconfigured verifier 3553 */ 3554 verbose(env, "invalid map_ptr to access map->key\n"); 3555 return -EACCES; 3556 } 3557 err = check_helper_mem_access(env, regno, 3558 meta->map_ptr->key_size, false, 3559 NULL); 3560 } else if (arg_type == ARG_PTR_TO_MAP_VALUE || 3561 (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && 3562 !register_is_null(reg)) || 3563 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 3564 /* bpf_map_xxx(..., map_ptr, ..., value) call: 3565 * check [value, value + map->value_size) validity 3566 */ 3567 if (!meta->map_ptr) { 3568 /* kernel subsystem misconfigured verifier */ 3569 verbose(env, "invalid map_ptr to access map->value\n"); 3570 return -EACCES; 3571 } 3572 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); 3573 err = check_helper_mem_access(env, regno, 3574 meta->map_ptr->value_size, false, 3575 meta); 3576 } else if (arg_type_is_mem_size(arg_type)) { 3577 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 3578 3579 /* remember the mem_size which may be used later 3580 * to refine return values. 3581 */ 3582 meta->msize_smax_value = reg->smax_value; 3583 meta->msize_umax_value = reg->umax_value; 3584 3585 /* The register is SCALAR_VALUE; the access check 3586 * happens using its boundaries. 3587 */ 3588 if (!tnum_is_const(reg->var_off)) 3589 /* For unprivileged variable accesses, disable raw 3590 * mode so that the program is required to 3591 * initialize all the memory that the helper could 3592 * just partially fill up. 3593 */ 3594 meta = NULL; 3595 3596 if (reg->smin_value < 0) { 3597 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 3598 regno); 3599 return -EACCES; 3600 } 3601 3602 if (reg->umin_value == 0) { 3603 err = check_helper_mem_access(env, regno - 1, 0, 3604 zero_size_allowed, 3605 meta); 3606 if (err) 3607 return err; 3608 } 3609 3610 if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 3611 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 3612 regno); 3613 return -EACCES; 3614 } 3615 err = check_helper_mem_access(env, regno - 1, 3616 reg->umax_value, 3617 zero_size_allowed, meta); 3618 if (!err) 3619 err = mark_chain_precision(env, regno); 3620 } else if (arg_type_is_int_ptr(arg_type)) { 3621 int size = int_ptr_type_to_size(arg_type); 3622 3623 err = check_helper_mem_access(env, regno, size, false, meta); 3624 if (err) 3625 return err; 3626 err = check_ptr_alignment(env, reg, 0, size, true); 3627 } 3628 3629 return err; 3630 err_type: 3631 verbose(env, "R%d type=%s expected=%s\n", regno, 3632 reg_type_str[type], reg_type_str[expected_type]); 3633 return -EACCES; 3634 } 3635 3636 static int check_map_func_compatibility(struct bpf_verifier_env *env, 3637 struct bpf_map *map, int func_id) 3638 { 3639 if (!map) 3640 return 0; 3641 3642 /* We need a two way check, first is from map perspective ... */ 3643 switch (map->map_type) { 3644 case BPF_MAP_TYPE_PROG_ARRAY: 3645 if (func_id != BPF_FUNC_tail_call) 3646 goto error; 3647 break; 3648 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 3649 if (func_id != BPF_FUNC_perf_event_read && 3650 func_id != BPF_FUNC_perf_event_output && 3651 func_id != BPF_FUNC_skb_output && 3652 func_id != BPF_FUNC_perf_event_read_value) 3653 goto error; 3654 break; 3655 case BPF_MAP_TYPE_STACK_TRACE: 3656 if (func_id != BPF_FUNC_get_stackid) 3657 goto error; 3658 break; 3659 case BPF_MAP_TYPE_CGROUP_ARRAY: 3660 if (func_id != BPF_FUNC_skb_under_cgroup && 3661 func_id != BPF_FUNC_current_task_under_cgroup) 3662 goto error; 3663 break; 3664 case BPF_MAP_TYPE_CGROUP_STORAGE: 3665 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 3666 if (func_id != BPF_FUNC_get_local_storage) 3667 goto error; 3668 break; 3669 case BPF_MAP_TYPE_DEVMAP: 3670 case BPF_MAP_TYPE_DEVMAP_HASH: 3671 if (func_id != BPF_FUNC_redirect_map && 3672 func_id != BPF_FUNC_map_lookup_elem) 3673 goto error; 3674 break; 3675 /* Restrict bpf side of cpumap and xskmap, open when use-cases 3676 * appear. 3677 */ 3678 case BPF_MAP_TYPE_CPUMAP: 3679 if (func_id != BPF_FUNC_redirect_map) 3680 goto error; 3681 break; 3682 case BPF_MAP_TYPE_XSKMAP: 3683 if (func_id != BPF_FUNC_redirect_map && 3684 func_id != BPF_FUNC_map_lookup_elem) 3685 goto error; 3686 break; 3687 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 3688 case BPF_MAP_TYPE_HASH_OF_MAPS: 3689 if (func_id != BPF_FUNC_map_lookup_elem) 3690 goto error; 3691 break; 3692 case BPF_MAP_TYPE_SOCKMAP: 3693 if (func_id != BPF_FUNC_sk_redirect_map && 3694 func_id != BPF_FUNC_sock_map_update && 3695 func_id != BPF_FUNC_map_delete_elem && 3696 func_id != BPF_FUNC_msg_redirect_map) 3697 goto error; 3698 break; 3699 case BPF_MAP_TYPE_SOCKHASH: 3700 if (func_id != BPF_FUNC_sk_redirect_hash && 3701 func_id != BPF_FUNC_sock_hash_update && 3702 func_id != BPF_FUNC_map_delete_elem && 3703 func_id != BPF_FUNC_msg_redirect_hash) 3704 goto error; 3705 break; 3706 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 3707 if (func_id != BPF_FUNC_sk_select_reuseport) 3708 goto error; 3709 break; 3710 case BPF_MAP_TYPE_QUEUE: 3711 case BPF_MAP_TYPE_STACK: 3712 if (func_id != BPF_FUNC_map_peek_elem && 3713 func_id != BPF_FUNC_map_pop_elem && 3714 func_id != BPF_FUNC_map_push_elem) 3715 goto error; 3716 break; 3717 case BPF_MAP_TYPE_SK_STORAGE: 3718 if (func_id != BPF_FUNC_sk_storage_get && 3719 func_id != BPF_FUNC_sk_storage_delete) 3720 goto error; 3721 break; 3722 default: 3723 break; 3724 } 3725 3726 /* ... and second from the function itself. */ 3727 switch (func_id) { 3728 case BPF_FUNC_tail_call: 3729 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 3730 goto error; 3731 if (env->subprog_cnt > 1) { 3732 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); 3733 return -EINVAL; 3734 } 3735 break; 3736 case BPF_FUNC_perf_event_read: 3737 case BPF_FUNC_perf_event_output: 3738 case BPF_FUNC_perf_event_read_value: 3739 case BPF_FUNC_skb_output: 3740 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 3741 goto error; 3742 break; 3743 case BPF_FUNC_get_stackid: 3744 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 3745 goto error; 3746 break; 3747 case BPF_FUNC_current_task_under_cgroup: 3748 case BPF_FUNC_skb_under_cgroup: 3749 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 3750 goto error; 3751 break; 3752 case BPF_FUNC_redirect_map: 3753 if (map->map_type != BPF_MAP_TYPE_DEVMAP && 3754 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && 3755 map->map_type != BPF_MAP_TYPE_CPUMAP && 3756 map->map_type != BPF_MAP_TYPE_XSKMAP) 3757 goto error; 3758 break; 3759 case BPF_FUNC_sk_redirect_map: 3760 case BPF_FUNC_msg_redirect_map: 3761 case BPF_FUNC_sock_map_update: 3762 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) 3763 goto error; 3764 break; 3765 case BPF_FUNC_sk_redirect_hash: 3766 case BPF_FUNC_msg_redirect_hash: 3767 case BPF_FUNC_sock_hash_update: 3768 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) 3769 goto error; 3770 break; 3771 case BPF_FUNC_get_local_storage: 3772 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 3773 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 3774 goto error; 3775 break; 3776 case BPF_FUNC_sk_select_reuseport: 3777 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) 3778 goto error; 3779 break; 3780 case BPF_FUNC_map_peek_elem: 3781 case BPF_FUNC_map_pop_elem: 3782 case BPF_FUNC_map_push_elem: 3783 if (map->map_type != BPF_MAP_TYPE_QUEUE && 3784 map->map_type != BPF_MAP_TYPE_STACK) 3785 goto error; 3786 break; 3787 case BPF_FUNC_sk_storage_get: 3788 case BPF_FUNC_sk_storage_delete: 3789 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 3790 goto error; 3791 break; 3792 default: 3793 break; 3794 } 3795 3796 return 0; 3797 error: 3798 verbose(env, "cannot pass map_type %d into func %s#%d\n", 3799 map->map_type, func_id_name(func_id), func_id); 3800 return -EINVAL; 3801 } 3802 3803 static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 3804 { 3805 int count = 0; 3806 3807 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) 3808 count++; 3809 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) 3810 count++; 3811 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) 3812 count++; 3813 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) 3814 count++; 3815 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) 3816 count++; 3817 3818 /* We only support one arg being in raw mode at the moment, 3819 * which is sufficient for the helper functions we have 3820 * right now. 3821 */ 3822 return count <= 1; 3823 } 3824 3825 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, 3826 enum bpf_arg_type arg_next) 3827 { 3828 return (arg_type_is_mem_ptr(arg_curr) && 3829 !arg_type_is_mem_size(arg_next)) || 3830 (!arg_type_is_mem_ptr(arg_curr) && 3831 arg_type_is_mem_size(arg_next)); 3832 } 3833 3834 static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 3835 { 3836 /* bpf_xxx(..., buf, len) call will access 'len' 3837 * bytes from memory 'buf'. Both arg types need 3838 * to be paired, so make sure there's no buggy 3839 * helper function specification. 3840 */ 3841 if (arg_type_is_mem_size(fn->arg1_type) || 3842 arg_type_is_mem_ptr(fn->arg5_type) || 3843 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || 3844 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || 3845 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || 3846 check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) 3847 return false; 3848 3849 return true; 3850 } 3851 3852 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) 3853 { 3854 int count = 0; 3855 3856 if (arg_type_may_be_refcounted(fn->arg1_type)) 3857 count++; 3858 if (arg_type_may_be_refcounted(fn->arg2_type)) 3859 count++; 3860 if (arg_type_may_be_refcounted(fn->arg3_type)) 3861 count++; 3862 if (arg_type_may_be_refcounted(fn->arg4_type)) 3863 count++; 3864 if (arg_type_may_be_refcounted(fn->arg5_type)) 3865 count++; 3866 3867 /* A reference acquiring function cannot acquire 3868 * another refcounted ptr. 3869 */ 3870 if (is_acquire_function(func_id) && count) 3871 return false; 3872 3873 /* We only support one arg being unreferenced at the moment, 3874 * which is sufficient for the helper functions we have right now. 3875 */ 3876 return count <= 1; 3877 } 3878 3879 static int check_func_proto(const struct bpf_func_proto *fn, int func_id) 3880 { 3881 return check_raw_mode_ok(fn) && 3882 check_arg_pair_ok(fn) && 3883 check_refcount_ok(fn, func_id) ? 0 : -EINVAL; 3884 } 3885 3886 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 3887 * are now invalid, so turn them into unknown SCALAR_VALUE. 3888 */ 3889 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, 3890 struct bpf_func_state *state) 3891 { 3892 struct bpf_reg_state *regs = state->regs, *reg; 3893 int i; 3894 3895 for (i = 0; i < MAX_BPF_REG; i++) 3896 if (reg_is_pkt_pointer_any(®s[i])) 3897 mark_reg_unknown(env, regs, i); 3898 3899 bpf_for_each_spilled_reg(i, state, reg) { 3900 if (!reg) 3901 continue; 3902 if (reg_is_pkt_pointer_any(reg)) 3903 __mark_reg_unknown(env, reg); 3904 } 3905 } 3906 3907 static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 3908 { 3909 struct bpf_verifier_state *vstate = env->cur_state; 3910 int i; 3911 3912 for (i = 0; i <= vstate->curframe; i++) 3913 __clear_all_pkt_pointers(env, vstate->frame[i]); 3914 } 3915 3916 static void release_reg_references(struct bpf_verifier_env *env, 3917 struct bpf_func_state *state, 3918 int ref_obj_id) 3919 { 3920 struct bpf_reg_state *regs = state->regs, *reg; 3921 int i; 3922 3923 for (i = 0; i < MAX_BPF_REG; i++) 3924 if (regs[i].ref_obj_id == ref_obj_id) 3925 mark_reg_unknown(env, regs, i); 3926 3927 bpf_for_each_spilled_reg(i, state, reg) { 3928 if (!reg) 3929 continue; 3930 if (reg->ref_obj_id == ref_obj_id) 3931 __mark_reg_unknown(env, reg); 3932 } 3933 } 3934 3935 /* The pointer with the specified id has released its reference to kernel 3936 * resources. Identify all copies of the same pointer and clear the reference. 3937 */ 3938 static int release_reference(struct bpf_verifier_env *env, 3939 int ref_obj_id) 3940 { 3941 struct bpf_verifier_state *vstate = env->cur_state; 3942 int err; 3943 int i; 3944 3945 err = release_reference_state(cur_func(env), ref_obj_id); 3946 if (err) 3947 return err; 3948 3949 for (i = 0; i <= vstate->curframe; i++) 3950 release_reg_references(env, vstate->frame[i], ref_obj_id); 3951 3952 return 0; 3953 } 3954 3955 static void clear_caller_saved_regs(struct bpf_verifier_env *env, 3956 struct bpf_reg_state *regs) 3957 { 3958 int i; 3959 3960 /* after the call registers r0 - r5 were scratched */ 3961 for (i = 0; i < CALLER_SAVED_REGS; i++) { 3962 mark_reg_not_init(env, regs, caller_saved[i]); 3963 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 3964 } 3965 } 3966 3967 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 3968 int *insn_idx) 3969 { 3970 struct bpf_verifier_state *state = env->cur_state; 3971 struct bpf_func_info_aux *func_info_aux; 3972 struct bpf_func_state *caller, *callee; 3973 int i, err, subprog, target_insn; 3974 bool is_global = false; 3975 3976 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 3977 verbose(env, "the call stack of %d frames is too deep\n", 3978 state->curframe + 2); 3979 return -E2BIG; 3980 } 3981 3982 target_insn = *insn_idx + insn->imm; 3983 subprog = find_subprog(env, target_insn + 1); 3984 if (subprog < 0) { 3985 verbose(env, "verifier bug. No program starts at insn %d\n", 3986 target_insn + 1); 3987 return -EFAULT; 3988 } 3989 3990 caller = state->frame[state->curframe]; 3991 if (state->frame[state->curframe + 1]) { 3992 verbose(env, "verifier bug. Frame %d already allocated\n", 3993 state->curframe + 1); 3994 return -EFAULT; 3995 } 3996 3997 func_info_aux = env->prog->aux->func_info_aux; 3998 if (func_info_aux) 3999 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 4000 err = btf_check_func_arg_match(env, subprog, caller->regs); 4001 if (err == -EFAULT) 4002 return err; 4003 if (is_global) { 4004 if (err) { 4005 verbose(env, "Caller passes invalid args into func#%d\n", 4006 subprog); 4007 return err; 4008 } else { 4009 if (env->log.level & BPF_LOG_LEVEL) 4010 verbose(env, 4011 "Func#%d is global and valid. Skipping.\n", 4012 subprog); 4013 clear_caller_saved_regs(env, caller->regs); 4014 4015 /* All global functions return SCALAR_VALUE */ 4016 mark_reg_unknown(env, caller->regs, BPF_REG_0); 4017 4018 /* continue with next insn after call */ 4019 return 0; 4020 } 4021 } 4022 4023 callee = kzalloc(sizeof(*callee), GFP_KERNEL); 4024 if (!callee) 4025 return -ENOMEM; 4026 state->frame[state->curframe + 1] = callee; 4027 4028 /* callee cannot access r0, r6 - r9 for reading and has to write 4029 * into its own stack before reading from it. 4030 * callee can read/write into caller's stack 4031 */ 4032 init_func_state(env, callee, 4033 /* remember the callsite, it will be used by bpf_exit */ 4034 *insn_idx /* callsite */, 4035 state->curframe + 1 /* frameno within this callchain */, 4036 subprog /* subprog number within this prog */); 4037 4038 /* Transfer references to the callee */ 4039 err = transfer_reference_state(callee, caller); 4040 if (err) 4041 return err; 4042 4043 /* copy r1 - r5 args that callee can access. The copy includes parent 4044 * pointers, which connects us up to the liveness chain 4045 */ 4046 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4047 callee->regs[i] = caller->regs[i]; 4048 4049 clear_caller_saved_regs(env, caller->regs); 4050 4051 /* only increment it after check_reg_arg() finished */ 4052 state->curframe++; 4053 4054 /* and go analyze first insn of the callee */ 4055 *insn_idx = target_insn; 4056 4057 if (env->log.level & BPF_LOG_LEVEL) { 4058 verbose(env, "caller:\n"); 4059 print_verifier_state(env, caller); 4060 verbose(env, "callee:\n"); 4061 print_verifier_state(env, callee); 4062 } 4063 return 0; 4064 } 4065 4066 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) 4067 { 4068 struct bpf_verifier_state *state = env->cur_state; 4069 struct bpf_func_state *caller, *callee; 4070 struct bpf_reg_state *r0; 4071 int err; 4072 4073 callee = state->frame[state->curframe]; 4074 r0 = &callee->regs[BPF_REG_0]; 4075 if (r0->type == PTR_TO_STACK) { 4076 /* technically it's ok to return caller's stack pointer 4077 * (or caller's caller's pointer) back to the caller, 4078 * since these pointers are valid. Only current stack 4079 * pointer will be invalid as soon as function exits, 4080 * but let's be conservative 4081 */ 4082 verbose(env, "cannot return stack pointer to the caller\n"); 4083 return -EINVAL; 4084 } 4085 4086 state->curframe--; 4087 caller = state->frame[state->curframe]; 4088 /* return to the caller whatever r0 had in the callee */ 4089 caller->regs[BPF_REG_0] = *r0; 4090 4091 /* Transfer references to the caller */ 4092 err = transfer_reference_state(caller, callee); 4093 if (err) 4094 return err; 4095 4096 *insn_idx = callee->callsite + 1; 4097 if (env->log.level & BPF_LOG_LEVEL) { 4098 verbose(env, "returning from callee:\n"); 4099 print_verifier_state(env, callee); 4100 verbose(env, "to caller at %d:\n", *insn_idx); 4101 print_verifier_state(env, caller); 4102 } 4103 /* clear everything in the callee */ 4104 free_func_state(callee); 4105 state->frame[state->curframe + 1] = NULL; 4106 return 0; 4107 } 4108 4109 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, 4110 int func_id, 4111 struct bpf_call_arg_meta *meta) 4112 { 4113 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; 4114 4115 if (ret_type != RET_INTEGER || 4116 (func_id != BPF_FUNC_get_stack && 4117 func_id != BPF_FUNC_probe_read_str)) 4118 return; 4119 4120 ret_reg->smax_value = meta->msize_smax_value; 4121 ret_reg->umax_value = meta->msize_umax_value; 4122 __reg_deduce_bounds(ret_reg); 4123 __reg_bound_offset(ret_reg); 4124 } 4125 4126 static int 4127 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4128 int func_id, int insn_idx) 4129 { 4130 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4131 struct bpf_map *map = meta->map_ptr; 4132 4133 if (func_id != BPF_FUNC_tail_call && 4134 func_id != BPF_FUNC_map_lookup_elem && 4135 func_id != BPF_FUNC_map_update_elem && 4136 func_id != BPF_FUNC_map_delete_elem && 4137 func_id != BPF_FUNC_map_push_elem && 4138 func_id != BPF_FUNC_map_pop_elem && 4139 func_id != BPF_FUNC_map_peek_elem) 4140 return 0; 4141 4142 if (map == NULL) { 4143 verbose(env, "kernel subsystem misconfigured verifier\n"); 4144 return -EINVAL; 4145 } 4146 4147 /* In case of read-only, some additional restrictions 4148 * need to be applied in order to prevent altering the 4149 * state of the map from program side. 4150 */ 4151 if ((map->map_flags & BPF_F_RDONLY_PROG) && 4152 (func_id == BPF_FUNC_map_delete_elem || 4153 func_id == BPF_FUNC_map_update_elem || 4154 func_id == BPF_FUNC_map_push_elem || 4155 func_id == BPF_FUNC_map_pop_elem)) { 4156 verbose(env, "write into map forbidden\n"); 4157 return -EACCES; 4158 } 4159 4160 if (!BPF_MAP_PTR(aux->map_ptr_state)) 4161 bpf_map_ptr_store(aux, meta->map_ptr, 4162 meta->map_ptr->unpriv_array); 4163 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) 4164 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, 4165 meta->map_ptr->unpriv_array); 4166 return 0; 4167 } 4168 4169 static int 4170 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4171 int func_id, int insn_idx) 4172 { 4173 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4174 struct bpf_reg_state *regs = cur_regs(env), *reg; 4175 struct bpf_map *map = meta->map_ptr; 4176 struct tnum range; 4177 u64 val; 4178 int err; 4179 4180 if (func_id != BPF_FUNC_tail_call) 4181 return 0; 4182 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { 4183 verbose(env, "kernel subsystem misconfigured verifier\n"); 4184 return -EINVAL; 4185 } 4186 4187 range = tnum_range(0, map->max_entries - 1); 4188 reg = ®s[BPF_REG_3]; 4189 4190 if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { 4191 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4192 return 0; 4193 } 4194 4195 err = mark_chain_precision(env, BPF_REG_3); 4196 if (err) 4197 return err; 4198 4199 val = reg->var_off.value; 4200 if (bpf_map_key_unseen(aux)) 4201 bpf_map_key_store(aux, val); 4202 else if (!bpf_map_key_poisoned(aux) && 4203 bpf_map_key_immediate(aux) != val) 4204 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4205 return 0; 4206 } 4207 4208 static int check_reference_leak(struct bpf_verifier_env *env) 4209 { 4210 struct bpf_func_state *state = cur_func(env); 4211 int i; 4212 4213 for (i = 0; i < state->acquired_refs; i++) { 4214 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 4215 state->refs[i].id, state->refs[i].insn_idx); 4216 } 4217 return state->acquired_refs ? -EINVAL : 0; 4218 } 4219 4220 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) 4221 { 4222 const struct bpf_func_proto *fn = NULL; 4223 struct bpf_reg_state *regs; 4224 struct bpf_call_arg_meta meta; 4225 bool changes_data; 4226 int i, err; 4227 4228 /* find function prototype */ 4229 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { 4230 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), 4231 func_id); 4232 return -EINVAL; 4233 } 4234 4235 if (env->ops->get_func_proto) 4236 fn = env->ops->get_func_proto(func_id, env->prog); 4237 if (!fn) { 4238 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 4239 func_id); 4240 return -EINVAL; 4241 } 4242 4243 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 4244 if (!env->prog->gpl_compatible && fn->gpl_only) { 4245 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); 4246 return -EINVAL; 4247 } 4248 4249 /* With LD_ABS/IND some JITs save/restore skb from r1. */ 4250 changes_data = bpf_helper_changes_pkt_data(fn->func); 4251 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { 4252 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", 4253 func_id_name(func_id), func_id); 4254 return -EINVAL; 4255 } 4256 4257 memset(&meta, 0, sizeof(meta)); 4258 meta.pkt_access = fn->pkt_access; 4259 4260 err = check_func_proto(fn, func_id); 4261 if (err) { 4262 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 4263 func_id_name(func_id), func_id); 4264 return err; 4265 } 4266 4267 meta.func_id = func_id; 4268 /* check args */ 4269 for (i = 0; i < 5; i++) { 4270 err = btf_resolve_helper_id(&env->log, fn, i); 4271 if (err > 0) 4272 meta.btf_id = err; 4273 err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); 4274 if (err) 4275 return err; 4276 } 4277 4278 err = record_func_map(env, &meta, func_id, insn_idx); 4279 if (err) 4280 return err; 4281 4282 err = record_func_key(env, &meta, func_id, insn_idx); 4283 if (err) 4284 return err; 4285 4286 /* Mark slots with STACK_MISC in case of raw mode, stack offset 4287 * is inferred from register state. 4288 */ 4289 for (i = 0; i < meta.access_size; i++) { 4290 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, 4291 BPF_WRITE, -1, false); 4292 if (err) 4293 return err; 4294 } 4295 4296 if (func_id == BPF_FUNC_tail_call) { 4297 err = check_reference_leak(env); 4298 if (err) { 4299 verbose(env, "tail_call would lead to reference leak\n"); 4300 return err; 4301 } 4302 } else if (is_release_function(func_id)) { 4303 err = release_reference(env, meta.ref_obj_id); 4304 if (err) { 4305 verbose(env, "func %s#%d reference has not been acquired before\n", 4306 func_id_name(func_id), func_id); 4307 return err; 4308 } 4309 } 4310 4311 regs = cur_regs(env); 4312 4313 /* check that flags argument in get_local_storage(map, flags) is 0, 4314 * this is required because get_local_storage() can't return an error. 4315 */ 4316 if (func_id == BPF_FUNC_get_local_storage && 4317 !register_is_null(®s[BPF_REG_2])) { 4318 verbose(env, "get_local_storage() doesn't support non-zero flags\n"); 4319 return -EINVAL; 4320 } 4321 4322 /* reset caller saved regs */ 4323 for (i = 0; i < CALLER_SAVED_REGS; i++) { 4324 mark_reg_not_init(env, regs, caller_saved[i]); 4325 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 4326 } 4327 4328 /* helper call returns 64-bit value. */ 4329 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 4330 4331 /* update return register (already marked as written above) */ 4332 if (fn->ret_type == RET_INTEGER) { 4333 /* sets type to SCALAR_VALUE */ 4334 mark_reg_unknown(env, regs, BPF_REG_0); 4335 } else if (fn->ret_type == RET_VOID) { 4336 regs[BPF_REG_0].type = NOT_INIT; 4337 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || 4338 fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4339 /* There is no offset yet applied, variable or fixed */ 4340 mark_reg_known_zero(env, regs, BPF_REG_0); 4341 /* remember map_ptr, so that check_map_access() 4342 * can check 'value_size' boundary of memory access 4343 * to map element returned from bpf_map_lookup_elem() 4344 */ 4345 if (meta.map_ptr == NULL) { 4346 verbose(env, 4347 "kernel subsystem misconfigured verifier\n"); 4348 return -EINVAL; 4349 } 4350 regs[BPF_REG_0].map_ptr = meta.map_ptr; 4351 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4352 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; 4353 if (map_value_has_spin_lock(meta.map_ptr)) 4354 regs[BPF_REG_0].id = ++env->id_gen; 4355 } else { 4356 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; 4357 regs[BPF_REG_0].id = ++env->id_gen; 4358 } 4359 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 4360 mark_reg_known_zero(env, regs, BPF_REG_0); 4361 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 4362 regs[BPF_REG_0].id = ++env->id_gen; 4363 } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { 4364 mark_reg_known_zero(env, regs, BPF_REG_0); 4365 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; 4366 regs[BPF_REG_0].id = ++env->id_gen; 4367 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { 4368 mark_reg_known_zero(env, regs, BPF_REG_0); 4369 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; 4370 regs[BPF_REG_0].id = ++env->id_gen; 4371 } else { 4372 verbose(env, "unknown return type %d of func %s#%d\n", 4373 fn->ret_type, func_id_name(func_id), func_id); 4374 return -EINVAL; 4375 } 4376 4377 if (is_ptr_cast_function(func_id)) { 4378 /* For release_reference() */ 4379 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; 4380 } else if (is_acquire_function(func_id)) { 4381 int id = acquire_reference_state(env, insn_idx); 4382 4383 if (id < 0) 4384 return id; 4385 /* For mark_ptr_or_null_reg() */ 4386 regs[BPF_REG_0].id = id; 4387 /* For release_reference() */ 4388 regs[BPF_REG_0].ref_obj_id = id; 4389 } 4390 4391 do_refine_retval_range(regs, fn->ret_type, func_id, &meta); 4392 4393 err = check_map_func_compatibility(env, meta.map_ptr, func_id); 4394 if (err) 4395 return err; 4396 4397 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { 4398 const char *err_str; 4399 4400 #ifdef CONFIG_PERF_EVENTS 4401 err = get_callchain_buffers(sysctl_perf_event_max_stack); 4402 err_str = "cannot get callchain buffer for func %s#%d\n"; 4403 #else 4404 err = -ENOTSUPP; 4405 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; 4406 #endif 4407 if (err) { 4408 verbose(env, err_str, func_id_name(func_id), func_id); 4409 return err; 4410 } 4411 4412 env->prog->has_callchain_buf = true; 4413 } 4414 4415 if (changes_data) 4416 clear_all_pkt_pointers(env); 4417 return 0; 4418 } 4419 4420 static bool signed_add_overflows(s64 a, s64 b) 4421 { 4422 /* Do the add in u64, where overflow is well-defined */ 4423 s64 res = (s64)((u64)a + (u64)b); 4424 4425 if (b < 0) 4426 return res > a; 4427 return res < a; 4428 } 4429 4430 static bool signed_sub_overflows(s64 a, s64 b) 4431 { 4432 /* Do the sub in u64, where overflow is well-defined */ 4433 s64 res = (s64)((u64)a - (u64)b); 4434 4435 if (b < 0) 4436 return res < a; 4437 return res > a; 4438 } 4439 4440 static bool check_reg_sane_offset(struct bpf_verifier_env *env, 4441 const struct bpf_reg_state *reg, 4442 enum bpf_reg_type type) 4443 { 4444 bool known = tnum_is_const(reg->var_off); 4445 s64 val = reg->var_off.value; 4446 s64 smin = reg->smin_value; 4447 4448 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 4449 verbose(env, "math between %s pointer and %lld is not allowed\n", 4450 reg_type_str[type], val); 4451 return false; 4452 } 4453 4454 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { 4455 verbose(env, "%s pointer offset %d is not allowed\n", 4456 reg_type_str[type], reg->off); 4457 return false; 4458 } 4459 4460 if (smin == S64_MIN) { 4461 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", 4462 reg_type_str[type]); 4463 return false; 4464 } 4465 4466 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 4467 verbose(env, "value %lld makes %s pointer be out of bounds\n", 4468 smin, reg_type_str[type]); 4469 return false; 4470 } 4471 4472 return true; 4473 } 4474 4475 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) 4476 { 4477 return &env->insn_aux_data[env->insn_idx]; 4478 } 4479 4480 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, 4481 u32 *ptr_limit, u8 opcode, bool off_is_neg) 4482 { 4483 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || 4484 (opcode == BPF_SUB && !off_is_neg); 4485 u32 off; 4486 4487 switch (ptr_reg->type) { 4488 case PTR_TO_STACK: 4489 /* Indirect variable offset stack access is prohibited in 4490 * unprivileged mode so it's not handled here. 4491 */ 4492 off = ptr_reg->off + ptr_reg->var_off.value; 4493 if (mask_to_left) 4494 *ptr_limit = MAX_BPF_STACK + off; 4495 else 4496 *ptr_limit = -off; 4497 return 0; 4498 case PTR_TO_MAP_VALUE: 4499 if (mask_to_left) { 4500 *ptr_limit = ptr_reg->umax_value + ptr_reg->off; 4501 } else { 4502 off = ptr_reg->smin_value + ptr_reg->off; 4503 *ptr_limit = ptr_reg->map_ptr->value_size - off; 4504 } 4505 return 0; 4506 default: 4507 return -EINVAL; 4508 } 4509 } 4510 4511 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, 4512 const struct bpf_insn *insn) 4513 { 4514 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; 4515 } 4516 4517 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, 4518 u32 alu_state, u32 alu_limit) 4519 { 4520 /* If we arrived here from different branches with different 4521 * state or limits to sanitize, then this won't work. 4522 */ 4523 if (aux->alu_state && 4524 (aux->alu_state != alu_state || 4525 aux->alu_limit != alu_limit)) 4526 return -EACCES; 4527 4528 /* Corresponding fixup done in fixup_bpf_calls(). */ 4529 aux->alu_state = alu_state; 4530 aux->alu_limit = alu_limit; 4531 return 0; 4532 } 4533 4534 static int sanitize_val_alu(struct bpf_verifier_env *env, 4535 struct bpf_insn *insn) 4536 { 4537 struct bpf_insn_aux_data *aux = cur_aux(env); 4538 4539 if (can_skip_alu_sanitation(env, insn)) 4540 return 0; 4541 4542 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); 4543 } 4544 4545 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 4546 struct bpf_insn *insn, 4547 const struct bpf_reg_state *ptr_reg, 4548 struct bpf_reg_state *dst_reg, 4549 bool off_is_neg) 4550 { 4551 struct bpf_verifier_state *vstate = env->cur_state; 4552 struct bpf_insn_aux_data *aux = cur_aux(env); 4553 bool ptr_is_dst_reg = ptr_reg == dst_reg; 4554 u8 opcode = BPF_OP(insn->code); 4555 u32 alu_state, alu_limit; 4556 struct bpf_reg_state tmp; 4557 bool ret; 4558 4559 if (can_skip_alu_sanitation(env, insn)) 4560 return 0; 4561 4562 /* We already marked aux for masking from non-speculative 4563 * paths, thus we got here in the first place. We only care 4564 * to explore bad access from here. 4565 */ 4566 if (vstate->speculative) 4567 goto do_sim; 4568 4569 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; 4570 alu_state |= ptr_is_dst_reg ? 4571 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; 4572 4573 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) 4574 return 0; 4575 if (update_alu_sanitation_state(aux, alu_state, alu_limit)) 4576 return -EACCES; 4577 do_sim: 4578 /* Simulate and find potential out-of-bounds access under 4579 * speculative execution from truncation as a result of 4580 * masking when off was not within expected range. If off 4581 * sits in dst, then we temporarily need to move ptr there 4582 * to simulate dst (== 0) +/-= ptr. Needed, for example, 4583 * for cases where we use K-based arithmetic in one direction 4584 * and truncated reg-based in the other in order to explore 4585 * bad access. 4586 */ 4587 if (!ptr_is_dst_reg) { 4588 tmp = *dst_reg; 4589 *dst_reg = *ptr_reg; 4590 } 4591 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); 4592 if (!ptr_is_dst_reg && ret) 4593 *dst_reg = tmp; 4594 return !ret ? -EFAULT : 0; 4595 } 4596 4597 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 4598 * Caller should also handle BPF_MOV case separately. 4599 * If we return -EACCES, caller may want to try again treating pointer as a 4600 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. 4601 */ 4602 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, 4603 struct bpf_insn *insn, 4604 const struct bpf_reg_state *ptr_reg, 4605 const struct bpf_reg_state *off_reg) 4606 { 4607 struct bpf_verifier_state *vstate = env->cur_state; 4608 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 4609 struct bpf_reg_state *regs = state->regs, *dst_reg; 4610 bool known = tnum_is_const(off_reg->var_off); 4611 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, 4612 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 4613 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, 4614 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; 4615 u32 dst = insn->dst_reg, src = insn->src_reg; 4616 u8 opcode = BPF_OP(insn->code); 4617 int ret; 4618 4619 dst_reg = ®s[dst]; 4620 4621 if ((known && (smin_val != smax_val || umin_val != umax_val)) || 4622 smin_val > smax_val || umin_val > umax_val) { 4623 /* Taint dst register if offset had invalid bounds derived from 4624 * e.g. dead branches. 4625 */ 4626 __mark_reg_unknown(env, dst_reg); 4627 return 0; 4628 } 4629 4630 if (BPF_CLASS(insn->code) != BPF_ALU64) { 4631 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 4632 verbose(env, 4633 "R%d 32-bit pointer arithmetic prohibited\n", 4634 dst); 4635 return -EACCES; 4636 } 4637 4638 switch (ptr_reg->type) { 4639 case PTR_TO_MAP_VALUE_OR_NULL: 4640 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", 4641 dst, reg_type_str[ptr_reg->type]); 4642 return -EACCES; 4643 case CONST_PTR_TO_MAP: 4644 case PTR_TO_PACKET_END: 4645 case PTR_TO_SOCKET: 4646 case PTR_TO_SOCKET_OR_NULL: 4647 case PTR_TO_SOCK_COMMON: 4648 case PTR_TO_SOCK_COMMON_OR_NULL: 4649 case PTR_TO_TCP_SOCK: 4650 case PTR_TO_TCP_SOCK_OR_NULL: 4651 case PTR_TO_XDP_SOCK: 4652 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 4653 dst, reg_type_str[ptr_reg->type]); 4654 return -EACCES; 4655 case PTR_TO_MAP_VALUE: 4656 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { 4657 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", 4658 off_reg == dst_reg ? dst : src); 4659 return -EACCES; 4660 } 4661 /* fall-through */ 4662 default: 4663 break; 4664 } 4665 4666 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. 4667 * The id may be overwritten later if we create a new variable offset. 4668 */ 4669 dst_reg->type = ptr_reg->type; 4670 dst_reg->id = ptr_reg->id; 4671 4672 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || 4673 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) 4674 return -EINVAL; 4675 4676 switch (opcode) { 4677 case BPF_ADD: 4678 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 4679 if (ret < 0) { 4680 verbose(env, "R%d tried to add from different maps or paths\n", dst); 4681 return ret; 4682 } 4683 /* We can take a fixed offset as long as it doesn't overflow 4684 * the s32 'off' field 4685 */ 4686 if (known && (ptr_reg->off + smin_val == 4687 (s64)(s32)(ptr_reg->off + smin_val))) { 4688 /* pointer += K. Accumulate it into fixed offset */ 4689 dst_reg->smin_value = smin_ptr; 4690 dst_reg->smax_value = smax_ptr; 4691 dst_reg->umin_value = umin_ptr; 4692 dst_reg->umax_value = umax_ptr; 4693 dst_reg->var_off = ptr_reg->var_off; 4694 dst_reg->off = ptr_reg->off + smin_val; 4695 dst_reg->raw = ptr_reg->raw; 4696 break; 4697 } 4698 /* A new variable offset is created. Note that off_reg->off 4699 * == 0, since it's a scalar. 4700 * dst_reg gets the pointer type and since some positive 4701 * integer value was added to the pointer, give it a new 'id' 4702 * if it's a PTR_TO_PACKET. 4703 * this creates a new 'base' pointer, off_reg (variable) gets 4704 * added into the variable offset, and we copy the fixed offset 4705 * from ptr_reg. 4706 */ 4707 if (signed_add_overflows(smin_ptr, smin_val) || 4708 signed_add_overflows(smax_ptr, smax_val)) { 4709 dst_reg->smin_value = S64_MIN; 4710 dst_reg->smax_value = S64_MAX; 4711 } else { 4712 dst_reg->smin_value = smin_ptr + smin_val; 4713 dst_reg->smax_value = smax_ptr + smax_val; 4714 } 4715 if (umin_ptr + umin_val < umin_ptr || 4716 umax_ptr + umax_val < umax_ptr) { 4717 dst_reg->umin_value = 0; 4718 dst_reg->umax_value = U64_MAX; 4719 } else { 4720 dst_reg->umin_value = umin_ptr + umin_val; 4721 dst_reg->umax_value = umax_ptr + umax_val; 4722 } 4723 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); 4724 dst_reg->off = ptr_reg->off; 4725 dst_reg->raw = ptr_reg->raw; 4726 if (reg_is_pkt_pointer(ptr_reg)) { 4727 dst_reg->id = ++env->id_gen; 4728 /* something was added to pkt_ptr, set range to zero */ 4729 dst_reg->raw = 0; 4730 } 4731 break; 4732 case BPF_SUB: 4733 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 4734 if (ret < 0) { 4735 verbose(env, "R%d tried to sub from different maps or paths\n", dst); 4736 return ret; 4737 } 4738 if (dst_reg == off_reg) { 4739 /* scalar -= pointer. Creates an unknown scalar */ 4740 verbose(env, "R%d tried to subtract pointer from scalar\n", 4741 dst); 4742 return -EACCES; 4743 } 4744 /* We don't allow subtraction from FP, because (according to 4745 * test_verifier.c test "invalid fp arithmetic", JITs might not 4746 * be able to deal with it. 4747 */ 4748 if (ptr_reg->type == PTR_TO_STACK) { 4749 verbose(env, "R%d subtraction from stack pointer prohibited\n", 4750 dst); 4751 return -EACCES; 4752 } 4753 if (known && (ptr_reg->off - smin_val == 4754 (s64)(s32)(ptr_reg->off - smin_val))) { 4755 /* pointer -= K. Subtract it from fixed offset */ 4756 dst_reg->smin_value = smin_ptr; 4757 dst_reg->smax_value = smax_ptr; 4758 dst_reg->umin_value = umin_ptr; 4759 dst_reg->umax_value = umax_ptr; 4760 dst_reg->var_off = ptr_reg->var_off; 4761 dst_reg->id = ptr_reg->id; 4762 dst_reg->off = ptr_reg->off - smin_val; 4763 dst_reg->raw = ptr_reg->raw; 4764 break; 4765 } 4766 /* A new variable offset is created. If the subtrahend is known 4767 * nonnegative, then any reg->range we had before is still good. 4768 */ 4769 if (signed_sub_overflows(smin_ptr, smax_val) || 4770 signed_sub_overflows(smax_ptr, smin_val)) { 4771 /* Overflow possible, we know nothing */ 4772 dst_reg->smin_value = S64_MIN; 4773 dst_reg->smax_value = S64_MAX; 4774 } else { 4775 dst_reg->smin_value = smin_ptr - smax_val; 4776 dst_reg->smax_value = smax_ptr - smin_val; 4777 } 4778 if (umin_ptr < umax_val) { 4779 /* Overflow possible, we know nothing */ 4780 dst_reg->umin_value = 0; 4781 dst_reg->umax_value = U64_MAX; 4782 } else { 4783 /* Cannot overflow (as long as bounds are consistent) */ 4784 dst_reg->umin_value = umin_ptr - umax_val; 4785 dst_reg->umax_value = umax_ptr - umin_val; 4786 } 4787 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); 4788 dst_reg->off = ptr_reg->off; 4789 dst_reg->raw = ptr_reg->raw; 4790 if (reg_is_pkt_pointer(ptr_reg)) { 4791 dst_reg->id = ++env->id_gen; 4792 /* something was added to pkt_ptr, set range to zero */ 4793 if (smin_val < 0) 4794 dst_reg->raw = 0; 4795 } 4796 break; 4797 case BPF_AND: 4798 case BPF_OR: 4799 case BPF_XOR: 4800 /* bitwise ops on pointers are troublesome, prohibit. */ 4801 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", 4802 dst, bpf_alu_string[opcode >> 4]); 4803 return -EACCES; 4804 default: 4805 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 4806 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 4807 dst, bpf_alu_string[opcode >> 4]); 4808 return -EACCES; 4809 } 4810 4811 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) 4812 return -EINVAL; 4813 4814 __update_reg_bounds(dst_reg); 4815 __reg_deduce_bounds(dst_reg); 4816 __reg_bound_offset(dst_reg); 4817 4818 /* For unprivileged we require that resulting offset must be in bounds 4819 * in order to be able to sanitize access later on. 4820 */ 4821 if (!env->allow_ptr_leaks) { 4822 if (dst_reg->type == PTR_TO_MAP_VALUE && 4823 check_map_access(env, dst, dst_reg->off, 1, false)) { 4824 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 4825 "prohibited for !root\n", dst); 4826 return -EACCES; 4827 } else if (dst_reg->type == PTR_TO_STACK && 4828 check_stack_access(env, dst_reg, dst_reg->off + 4829 dst_reg->var_off.value, 1)) { 4830 verbose(env, "R%d stack pointer arithmetic goes out of range, " 4831 "prohibited for !root\n", dst); 4832 return -EACCES; 4833 } 4834 } 4835 4836 return 0; 4837 } 4838 4839 /* WARNING: This function does calculations on 64-bit values, but the actual 4840 * execution may occur on 32-bit values. Therefore, things like bitshifts 4841 * need extra checks in the 32-bit case. 4842 */ 4843 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 4844 struct bpf_insn *insn, 4845 struct bpf_reg_state *dst_reg, 4846 struct bpf_reg_state src_reg) 4847 { 4848 struct bpf_reg_state *regs = cur_regs(env); 4849 u8 opcode = BPF_OP(insn->code); 4850 bool src_known, dst_known; 4851 s64 smin_val, smax_val; 4852 u64 umin_val, umax_val; 4853 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 4854 u32 dst = insn->dst_reg; 4855 int ret; 4856 4857 if (insn_bitness == 32) { 4858 /* Relevant for 32-bit RSH: Information can propagate towards 4859 * LSB, so it isn't sufficient to only truncate the output to 4860 * 32 bits. 4861 */ 4862 coerce_reg_to_size(dst_reg, 4); 4863 coerce_reg_to_size(&src_reg, 4); 4864 } 4865 4866 smin_val = src_reg.smin_value; 4867 smax_val = src_reg.smax_value; 4868 umin_val = src_reg.umin_value; 4869 umax_val = src_reg.umax_value; 4870 src_known = tnum_is_const(src_reg.var_off); 4871 dst_known = tnum_is_const(dst_reg->var_off); 4872 4873 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || 4874 smin_val > smax_val || umin_val > umax_val) { 4875 /* Taint dst register if offset had invalid bounds derived from 4876 * e.g. dead branches. 4877 */ 4878 __mark_reg_unknown(env, dst_reg); 4879 return 0; 4880 } 4881 4882 if (!src_known && 4883 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { 4884 __mark_reg_unknown(env, dst_reg); 4885 return 0; 4886 } 4887 4888 switch (opcode) { 4889 case BPF_ADD: 4890 ret = sanitize_val_alu(env, insn); 4891 if (ret < 0) { 4892 verbose(env, "R%d tried to add from different pointers or scalars\n", dst); 4893 return ret; 4894 } 4895 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 4896 signed_add_overflows(dst_reg->smax_value, smax_val)) { 4897 dst_reg->smin_value = S64_MIN; 4898 dst_reg->smax_value = S64_MAX; 4899 } else { 4900 dst_reg->smin_value += smin_val; 4901 dst_reg->smax_value += smax_val; 4902 } 4903 if (dst_reg->umin_value + umin_val < umin_val || 4904 dst_reg->umax_value + umax_val < umax_val) { 4905 dst_reg->umin_value = 0; 4906 dst_reg->umax_value = U64_MAX; 4907 } else { 4908 dst_reg->umin_value += umin_val; 4909 dst_reg->umax_value += umax_val; 4910 } 4911 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 4912 break; 4913 case BPF_SUB: 4914 ret = sanitize_val_alu(env, insn); 4915 if (ret < 0) { 4916 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); 4917 return ret; 4918 } 4919 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 4920 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 4921 /* Overflow possible, we know nothing */ 4922 dst_reg->smin_value = S64_MIN; 4923 dst_reg->smax_value = S64_MAX; 4924 } else { 4925 dst_reg->smin_value -= smax_val; 4926 dst_reg->smax_value -= smin_val; 4927 } 4928 if (dst_reg->umin_value < umax_val) { 4929 /* Overflow possible, we know nothing */ 4930 dst_reg->umin_value = 0; 4931 dst_reg->umax_value = U64_MAX; 4932 } else { 4933 /* Cannot overflow (as long as bounds are consistent) */ 4934 dst_reg->umin_value -= umax_val; 4935 dst_reg->umax_value -= umin_val; 4936 } 4937 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); 4938 break; 4939 case BPF_MUL: 4940 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); 4941 if (smin_val < 0 || dst_reg->smin_value < 0) { 4942 /* Ain't nobody got time to multiply that sign */ 4943 __mark_reg_unbounded(dst_reg); 4944 __update_reg_bounds(dst_reg); 4945 break; 4946 } 4947 /* Both values are positive, so we can work with unsigned and 4948 * copy the result to signed (unless it exceeds S64_MAX). 4949 */ 4950 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { 4951 /* Potential overflow, we know nothing */ 4952 __mark_reg_unbounded(dst_reg); 4953 /* (except what we can learn from the var_off) */ 4954 __update_reg_bounds(dst_reg); 4955 break; 4956 } 4957 dst_reg->umin_value *= umin_val; 4958 dst_reg->umax_value *= umax_val; 4959 if (dst_reg->umax_value > S64_MAX) { 4960 /* Overflow possible, we know nothing */ 4961 dst_reg->smin_value = S64_MIN; 4962 dst_reg->smax_value = S64_MAX; 4963 } else { 4964 dst_reg->smin_value = dst_reg->umin_value; 4965 dst_reg->smax_value = dst_reg->umax_value; 4966 } 4967 break; 4968 case BPF_AND: 4969 if (src_known && dst_known) { 4970 __mark_reg_known(dst_reg, dst_reg->var_off.value & 4971 src_reg.var_off.value); 4972 break; 4973 } 4974 /* We get our minimum from the var_off, since that's inherently 4975 * bitwise. Our maximum is the minimum of the operands' maxima. 4976 */ 4977 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); 4978 dst_reg->umin_value = dst_reg->var_off.value; 4979 dst_reg->umax_value = min(dst_reg->umax_value, umax_val); 4980 if (dst_reg->smin_value < 0 || smin_val < 0) { 4981 /* Lose signed bounds when ANDing negative numbers, 4982 * ain't nobody got time for that. 4983 */ 4984 dst_reg->smin_value = S64_MIN; 4985 dst_reg->smax_value = S64_MAX; 4986 } else { 4987 /* ANDing two positives gives a positive, so safe to 4988 * cast result into s64. 4989 */ 4990 dst_reg->smin_value = dst_reg->umin_value; 4991 dst_reg->smax_value = dst_reg->umax_value; 4992 } 4993 /* We may learn something more from the var_off */ 4994 __update_reg_bounds(dst_reg); 4995 break; 4996 case BPF_OR: 4997 if (src_known && dst_known) { 4998 __mark_reg_known(dst_reg, dst_reg->var_off.value | 4999 src_reg.var_off.value); 5000 break; 5001 } 5002 /* We get our maximum from the var_off, and our minimum is the 5003 * maximum of the operands' minima 5004 */ 5005 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); 5006 dst_reg->umin_value = max(dst_reg->umin_value, umin_val); 5007 dst_reg->umax_value = dst_reg->var_off.value | 5008 dst_reg->var_off.mask; 5009 if (dst_reg->smin_value < 0 || smin_val < 0) { 5010 /* Lose signed bounds when ORing negative numbers, 5011 * ain't nobody got time for that. 5012 */ 5013 dst_reg->smin_value = S64_MIN; 5014 dst_reg->smax_value = S64_MAX; 5015 } else { 5016 /* ORing two positives gives a positive, so safe to 5017 * cast result into s64. 5018 */ 5019 dst_reg->smin_value = dst_reg->umin_value; 5020 dst_reg->smax_value = dst_reg->umax_value; 5021 } 5022 /* We may learn something more from the var_off */ 5023 __update_reg_bounds(dst_reg); 5024 break; 5025 case BPF_LSH: 5026 if (umax_val >= insn_bitness) { 5027 /* Shifts greater than 31 or 63 are undefined. 5028 * This includes shifts by a negative number. 5029 */ 5030 mark_reg_unknown(env, regs, insn->dst_reg); 5031 break; 5032 } 5033 /* We lose all sign bit information (except what we can pick 5034 * up from var_off) 5035 */ 5036 dst_reg->smin_value = S64_MIN; 5037 dst_reg->smax_value = S64_MAX; 5038 /* If we might shift our top bit out, then we know nothing */ 5039 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { 5040 dst_reg->umin_value = 0; 5041 dst_reg->umax_value = U64_MAX; 5042 } else { 5043 dst_reg->umin_value <<= umin_val; 5044 dst_reg->umax_value <<= umax_val; 5045 } 5046 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); 5047 /* We may learn something more from the var_off */ 5048 __update_reg_bounds(dst_reg); 5049 break; 5050 case BPF_RSH: 5051 if (umax_val >= insn_bitness) { 5052 /* Shifts greater than 31 or 63 are undefined. 5053 * This includes shifts by a negative number. 5054 */ 5055 mark_reg_unknown(env, regs, insn->dst_reg); 5056 break; 5057 } 5058 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 5059 * be negative, then either: 5060 * 1) src_reg might be zero, so the sign bit of the result is 5061 * unknown, so we lose our signed bounds 5062 * 2) it's known negative, thus the unsigned bounds capture the 5063 * signed bounds 5064 * 3) the signed bounds cross zero, so they tell us nothing 5065 * about the result 5066 * If the value in dst_reg is known nonnegative, then again the 5067 * unsigned bounts capture the signed bounds. 5068 * Thus, in all cases it suffices to blow away our signed bounds 5069 * and rely on inferring new ones from the unsigned bounds and 5070 * var_off of the result. 5071 */ 5072 dst_reg->smin_value = S64_MIN; 5073 dst_reg->smax_value = S64_MAX; 5074 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); 5075 dst_reg->umin_value >>= umax_val; 5076 dst_reg->umax_value >>= umin_val; 5077 /* We may learn something more from the var_off */ 5078 __update_reg_bounds(dst_reg); 5079 break; 5080 case BPF_ARSH: 5081 if (umax_val >= insn_bitness) { 5082 /* Shifts greater than 31 or 63 are undefined. 5083 * This includes shifts by a negative number. 5084 */ 5085 mark_reg_unknown(env, regs, insn->dst_reg); 5086 break; 5087 } 5088 5089 /* Upon reaching here, src_known is true and 5090 * umax_val is equal to umin_val. 5091 */ 5092 if (insn_bitness == 32) { 5093 dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val); 5094 dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val); 5095 } else { 5096 dst_reg->smin_value >>= umin_val; 5097 dst_reg->smax_value >>= umin_val; 5098 } 5099 5100 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 5101 insn_bitness); 5102 5103 /* blow away the dst_reg umin_value/umax_value and rely on 5104 * dst_reg var_off to refine the result. 5105 */ 5106 dst_reg->umin_value = 0; 5107 dst_reg->umax_value = U64_MAX; 5108 __update_reg_bounds(dst_reg); 5109 break; 5110 default: 5111 mark_reg_unknown(env, regs, insn->dst_reg); 5112 break; 5113 } 5114 5115 if (BPF_CLASS(insn->code) != BPF_ALU64) { 5116 /* 32-bit ALU ops are (32,32)->32 */ 5117 coerce_reg_to_size(dst_reg, 4); 5118 } 5119 5120 __reg_deduce_bounds(dst_reg); 5121 __reg_bound_offset(dst_reg); 5122 return 0; 5123 } 5124 5125 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max 5126 * and var_off. 5127 */ 5128 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 5129 struct bpf_insn *insn) 5130 { 5131 struct bpf_verifier_state *vstate = env->cur_state; 5132 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5133 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 5134 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 5135 u8 opcode = BPF_OP(insn->code); 5136 int err; 5137 5138 dst_reg = ®s[insn->dst_reg]; 5139 src_reg = NULL; 5140 if (dst_reg->type != SCALAR_VALUE) 5141 ptr_reg = dst_reg; 5142 if (BPF_SRC(insn->code) == BPF_X) { 5143 src_reg = ®s[insn->src_reg]; 5144 if (src_reg->type != SCALAR_VALUE) { 5145 if (dst_reg->type != SCALAR_VALUE) { 5146 /* Combining two pointers by any ALU op yields 5147 * an arbitrary scalar. Disallow all math except 5148 * pointer subtraction 5149 */ 5150 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 5151 mark_reg_unknown(env, regs, insn->dst_reg); 5152 return 0; 5153 } 5154 verbose(env, "R%d pointer %s pointer prohibited\n", 5155 insn->dst_reg, 5156 bpf_alu_string[opcode >> 4]); 5157 return -EACCES; 5158 } else { 5159 /* scalar += pointer 5160 * This is legal, but we have to reverse our 5161 * src/dest handling in computing the range 5162 */ 5163 err = mark_chain_precision(env, insn->dst_reg); 5164 if (err) 5165 return err; 5166 return adjust_ptr_min_max_vals(env, insn, 5167 src_reg, dst_reg); 5168 } 5169 } else if (ptr_reg) { 5170 /* pointer += scalar */ 5171 err = mark_chain_precision(env, insn->src_reg); 5172 if (err) 5173 return err; 5174 return adjust_ptr_min_max_vals(env, insn, 5175 dst_reg, src_reg); 5176 } 5177 } else { 5178 /* Pretend the src is a reg with a known value, since we only 5179 * need to be able to read from this state. 5180 */ 5181 off_reg.type = SCALAR_VALUE; 5182 __mark_reg_known(&off_reg, insn->imm); 5183 src_reg = &off_reg; 5184 if (ptr_reg) /* pointer += K */ 5185 return adjust_ptr_min_max_vals(env, insn, 5186 ptr_reg, src_reg); 5187 } 5188 5189 /* Got here implies adding two SCALAR_VALUEs */ 5190 if (WARN_ON_ONCE(ptr_reg)) { 5191 print_verifier_state(env, state); 5192 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 5193 return -EINVAL; 5194 } 5195 if (WARN_ON(!src_reg)) { 5196 print_verifier_state(env, state); 5197 verbose(env, "verifier internal error: no src_reg\n"); 5198 return -EINVAL; 5199 } 5200 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 5201 } 5202 5203 /* check validity of 32-bit and 64-bit arithmetic operations */ 5204 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) 5205 { 5206 struct bpf_reg_state *regs = cur_regs(env); 5207 u8 opcode = BPF_OP(insn->code); 5208 int err; 5209 5210 if (opcode == BPF_END || opcode == BPF_NEG) { 5211 if (opcode == BPF_NEG) { 5212 if (BPF_SRC(insn->code) != 0 || 5213 insn->src_reg != BPF_REG_0 || 5214 insn->off != 0 || insn->imm != 0) { 5215 verbose(env, "BPF_NEG uses reserved fields\n"); 5216 return -EINVAL; 5217 } 5218 } else { 5219 if (insn->src_reg != BPF_REG_0 || insn->off != 0 || 5220 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || 5221 BPF_CLASS(insn->code) == BPF_ALU64) { 5222 verbose(env, "BPF_END uses reserved fields\n"); 5223 return -EINVAL; 5224 } 5225 } 5226 5227 /* check src operand */ 5228 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 5229 if (err) 5230 return err; 5231 5232 if (is_pointer_value(env, insn->dst_reg)) { 5233 verbose(env, "R%d pointer arithmetic prohibited\n", 5234 insn->dst_reg); 5235 return -EACCES; 5236 } 5237 5238 /* check dest operand */ 5239 err = check_reg_arg(env, insn->dst_reg, DST_OP); 5240 if (err) 5241 return err; 5242 5243 } else if (opcode == BPF_MOV) { 5244 5245 if (BPF_SRC(insn->code) == BPF_X) { 5246 if (insn->imm != 0 || insn->off != 0) { 5247 verbose(env, "BPF_MOV uses reserved fields\n"); 5248 return -EINVAL; 5249 } 5250 5251 /* check src operand */ 5252 err = check_reg_arg(env, insn->src_reg, SRC_OP); 5253 if (err) 5254 return err; 5255 } else { 5256 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 5257 verbose(env, "BPF_MOV uses reserved fields\n"); 5258 return -EINVAL; 5259 } 5260 } 5261 5262 /* check dest operand, mark as required later */ 5263 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 5264 if (err) 5265 return err; 5266 5267 if (BPF_SRC(insn->code) == BPF_X) { 5268 struct bpf_reg_state *src_reg = regs + insn->src_reg; 5269 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 5270 5271 if (BPF_CLASS(insn->code) == BPF_ALU64) { 5272 /* case: R1 = R2 5273 * copy register state to dest reg 5274 */ 5275 *dst_reg = *src_reg; 5276 dst_reg->live |= REG_LIVE_WRITTEN; 5277 dst_reg->subreg_def = DEF_NOT_SUBREG; 5278 } else { 5279 /* R1 = (u32) R2 */ 5280 if (is_pointer_value(env, insn->src_reg)) { 5281 verbose(env, 5282 "R%d partial copy of pointer\n", 5283 insn->src_reg); 5284 return -EACCES; 5285 } else if (src_reg->type == SCALAR_VALUE) { 5286 *dst_reg = *src_reg; 5287 dst_reg->live |= REG_LIVE_WRITTEN; 5288 dst_reg->subreg_def = env->insn_idx + 1; 5289 } else { 5290 mark_reg_unknown(env, regs, 5291 insn->dst_reg); 5292 } 5293 coerce_reg_to_size(dst_reg, 4); 5294 } 5295 } else { 5296 /* case: R = imm 5297 * remember the value we stored into this reg 5298 */ 5299 /* clear any state __mark_reg_known doesn't set */ 5300 mark_reg_unknown(env, regs, insn->dst_reg); 5301 regs[insn->dst_reg].type = SCALAR_VALUE; 5302 if (BPF_CLASS(insn->code) == BPF_ALU64) { 5303 __mark_reg_known(regs + insn->dst_reg, 5304 insn->imm); 5305 } else { 5306 __mark_reg_known(regs + insn->dst_reg, 5307 (u32)insn->imm); 5308 } 5309 } 5310 5311 } else if (opcode > BPF_END) { 5312 verbose(env, "invalid BPF_ALU opcode %x\n", opcode); 5313 return -EINVAL; 5314 5315 } else { /* all other ALU ops: and, sub, xor, add, ... */ 5316 5317 if (BPF_SRC(insn->code) == BPF_X) { 5318 if (insn->imm != 0 || insn->off != 0) { 5319 verbose(env, "BPF_ALU uses reserved fields\n"); 5320 return -EINVAL; 5321 } 5322 /* check src1 operand */ 5323 err = check_reg_arg(env, insn->src_reg, SRC_OP); 5324 if (err) 5325 return err; 5326 } else { 5327 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 5328 verbose(env, "BPF_ALU uses reserved fields\n"); 5329 return -EINVAL; 5330 } 5331 } 5332 5333 /* check src2 operand */ 5334 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 5335 if (err) 5336 return err; 5337 5338 if ((opcode == BPF_MOD || opcode == BPF_DIV) && 5339 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { 5340 verbose(env, "div by zero\n"); 5341 return -EINVAL; 5342 } 5343 5344 if ((opcode == BPF_LSH || opcode == BPF_RSH || 5345 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { 5346 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; 5347 5348 if (insn->imm < 0 || insn->imm >= size) { 5349 verbose(env, "invalid shift %d\n", insn->imm); 5350 return -EINVAL; 5351 } 5352 } 5353 5354 /* check dest operand */ 5355 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 5356 if (err) 5357 return err; 5358 5359 return adjust_reg_min_max_vals(env, insn); 5360 } 5361 5362 return 0; 5363 } 5364 5365 static void __find_good_pkt_pointers(struct bpf_func_state *state, 5366 struct bpf_reg_state *dst_reg, 5367 enum bpf_reg_type type, u16 new_range) 5368 { 5369 struct bpf_reg_state *reg; 5370 int i; 5371 5372 for (i = 0; i < MAX_BPF_REG; i++) { 5373 reg = &state->regs[i]; 5374 if (reg->type == type && reg->id == dst_reg->id) 5375 /* keep the maximum range already checked */ 5376 reg->range = max(reg->range, new_range); 5377 } 5378 5379 bpf_for_each_spilled_reg(i, state, reg) { 5380 if (!reg) 5381 continue; 5382 if (reg->type == type && reg->id == dst_reg->id) 5383 reg->range = max(reg->range, new_range); 5384 } 5385 } 5386 5387 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, 5388 struct bpf_reg_state *dst_reg, 5389 enum bpf_reg_type type, 5390 bool range_right_open) 5391 { 5392 u16 new_range; 5393 int i; 5394 5395 if (dst_reg->off < 0 || 5396 (dst_reg->off == 0 && range_right_open)) 5397 /* This doesn't give us any range */ 5398 return; 5399 5400 if (dst_reg->umax_value > MAX_PACKET_OFF || 5401 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) 5402 /* Risk of overflow. For instance, ptr + (1<<63) may be less 5403 * than pkt_end, but that's because it's also less than pkt. 5404 */ 5405 return; 5406 5407 new_range = dst_reg->off; 5408 if (range_right_open) 5409 new_range--; 5410 5411 /* Examples for register markings: 5412 * 5413 * pkt_data in dst register: 5414 * 5415 * r2 = r3; 5416 * r2 += 8; 5417 * if (r2 > pkt_end) goto <handle exception> 5418 * <access okay> 5419 * 5420 * r2 = r3; 5421 * r2 += 8; 5422 * if (r2 < pkt_end) goto <access okay> 5423 * <handle exception> 5424 * 5425 * Where: 5426 * r2 == dst_reg, pkt_end == src_reg 5427 * r2=pkt(id=n,off=8,r=0) 5428 * r3=pkt(id=n,off=0,r=0) 5429 * 5430 * pkt_data in src register: 5431 * 5432 * r2 = r3; 5433 * r2 += 8; 5434 * if (pkt_end >= r2) goto <access okay> 5435 * <handle exception> 5436 * 5437 * r2 = r3; 5438 * r2 += 8; 5439 * if (pkt_end <= r2) goto <handle exception> 5440 * <access okay> 5441 * 5442 * Where: 5443 * pkt_end == dst_reg, r2 == src_reg 5444 * r2=pkt(id=n,off=8,r=0) 5445 * r3=pkt(id=n,off=0,r=0) 5446 * 5447 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 5448 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) 5449 * and [r3, r3 + 8-1) respectively is safe to access depending on 5450 * the check. 5451 */ 5452 5453 /* If our ids match, then we must have the same max_value. And we 5454 * don't care about the other reg's fixed offset, since if it's too big 5455 * the range won't allow anything. 5456 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. 5457 */ 5458 for (i = 0; i <= vstate->curframe; i++) 5459 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, 5460 new_range); 5461 } 5462 5463 /* compute branch direction of the expression "if (reg opcode val) goto target;" 5464 * and return: 5465 * 1 - branch will be taken and "goto target" will be executed 5466 * 0 - branch will not be taken and fall-through to next insn 5467 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] 5468 */ 5469 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, 5470 bool is_jmp32) 5471 { 5472 struct bpf_reg_state reg_lo; 5473 s64 sval; 5474 5475 if (__is_pointer_value(false, reg)) 5476 return -1; 5477 5478 if (is_jmp32) { 5479 reg_lo = *reg; 5480 reg = ®_lo; 5481 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size 5482 * could truncate high bits and update umin/umax according to 5483 * information of low bits. 5484 */ 5485 coerce_reg_to_size(reg, 4); 5486 /* smin/smax need special handling. For example, after coerce, 5487 * if smin_value is 0x00000000ffffffffLL, the value is -1 when 5488 * used as operand to JMP32. It is a negative number from s32's 5489 * point of view, while it is a positive number when seen as 5490 * s64. The smin/smax are kept as s64, therefore, when used with 5491 * JMP32, they need to be transformed into s32, then sign 5492 * extended back to s64. 5493 * 5494 * Also, smin/smax were copied from umin/umax. If umin/umax has 5495 * different sign bit, then min/max relationship doesn't 5496 * maintain after casting into s32, for this case, set smin/smax 5497 * to safest range. 5498 */ 5499 if ((reg->umax_value ^ reg->umin_value) & 5500 (1ULL << 31)) { 5501 reg->smin_value = S32_MIN; 5502 reg->smax_value = S32_MAX; 5503 } 5504 reg->smin_value = (s64)(s32)reg->smin_value; 5505 reg->smax_value = (s64)(s32)reg->smax_value; 5506 5507 val = (u32)val; 5508 sval = (s64)(s32)val; 5509 } else { 5510 sval = (s64)val; 5511 } 5512 5513 switch (opcode) { 5514 case BPF_JEQ: 5515 if (tnum_is_const(reg->var_off)) 5516 return !!tnum_equals_const(reg->var_off, val); 5517 break; 5518 case BPF_JNE: 5519 if (tnum_is_const(reg->var_off)) 5520 return !tnum_equals_const(reg->var_off, val); 5521 break; 5522 case BPF_JSET: 5523 if ((~reg->var_off.mask & reg->var_off.value) & val) 5524 return 1; 5525 if (!((reg->var_off.mask | reg->var_off.value) & val)) 5526 return 0; 5527 break; 5528 case BPF_JGT: 5529 if (reg->umin_value > val) 5530 return 1; 5531 else if (reg->umax_value <= val) 5532 return 0; 5533 break; 5534 case BPF_JSGT: 5535 if (reg->smin_value > sval) 5536 return 1; 5537 else if (reg->smax_value < sval) 5538 return 0; 5539 break; 5540 case BPF_JLT: 5541 if (reg->umax_value < val) 5542 return 1; 5543 else if (reg->umin_value >= val) 5544 return 0; 5545 break; 5546 case BPF_JSLT: 5547 if (reg->smax_value < sval) 5548 return 1; 5549 else if (reg->smin_value >= sval) 5550 return 0; 5551 break; 5552 case BPF_JGE: 5553 if (reg->umin_value >= val) 5554 return 1; 5555 else if (reg->umax_value < val) 5556 return 0; 5557 break; 5558 case BPF_JSGE: 5559 if (reg->smin_value >= sval) 5560 return 1; 5561 else if (reg->smax_value < sval) 5562 return 0; 5563 break; 5564 case BPF_JLE: 5565 if (reg->umax_value <= val) 5566 return 1; 5567 else if (reg->umin_value > val) 5568 return 0; 5569 break; 5570 case BPF_JSLE: 5571 if (reg->smax_value <= sval) 5572 return 1; 5573 else if (reg->smin_value > sval) 5574 return 0; 5575 break; 5576 } 5577 5578 return -1; 5579 } 5580 5581 /* Generate min value of the high 32-bit from TNUM info. */ 5582 static u64 gen_hi_min(struct tnum var) 5583 { 5584 return var.value & ~0xffffffffULL; 5585 } 5586 5587 /* Generate max value of the high 32-bit from TNUM info. */ 5588 static u64 gen_hi_max(struct tnum var) 5589 { 5590 return (var.value | var.mask) & ~0xffffffffULL; 5591 } 5592 5593 /* Return true if VAL is compared with a s64 sign extended from s32, and they 5594 * are with the same signedness. 5595 */ 5596 static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) 5597 { 5598 return ((s32)sval >= 0 && 5599 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || 5600 ((s32)sval < 0 && 5601 reg->smax_value <= 0 && reg->smin_value >= S32_MIN); 5602 } 5603 5604 /* Adjusts the register min/max values in the case that the dst_reg is the 5605 * variable register that we are working on, and src_reg is a constant or we're 5606 * simply doing a BPF_K check. 5607 * In JEQ/JNE cases we also adjust the var_off values. 5608 */ 5609 static void reg_set_min_max(struct bpf_reg_state *true_reg, 5610 struct bpf_reg_state *false_reg, u64 val, 5611 u8 opcode, bool is_jmp32) 5612 { 5613 s64 sval; 5614 5615 /* If the dst_reg is a pointer, we can't learn anything about its 5616 * variable offset from the compare (unless src_reg were a pointer into 5617 * the same object, but we don't bother with that. 5618 * Since false_reg and true_reg have the same type by construction, we 5619 * only need to check one of them for pointerness. 5620 */ 5621 if (__is_pointer_value(false, false_reg)) 5622 return; 5623 5624 val = is_jmp32 ? (u32)val : val; 5625 sval = is_jmp32 ? (s64)(s32)val : (s64)val; 5626 5627 switch (opcode) { 5628 case BPF_JEQ: 5629 case BPF_JNE: 5630 { 5631 struct bpf_reg_state *reg = 5632 opcode == BPF_JEQ ? true_reg : false_reg; 5633 5634 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but 5635 * if it is true we know the value for sure. Likewise for 5636 * BPF_JNE. 5637 */ 5638 if (is_jmp32) { 5639 u64 old_v = reg->var_off.value; 5640 u64 hi_mask = ~0xffffffffULL; 5641 5642 reg->var_off.value = (old_v & hi_mask) | val; 5643 reg->var_off.mask &= hi_mask; 5644 } else { 5645 __mark_reg_known(reg, val); 5646 } 5647 break; 5648 } 5649 case BPF_JSET: 5650 false_reg->var_off = tnum_and(false_reg->var_off, 5651 tnum_const(~val)); 5652 if (is_power_of_2(val)) 5653 true_reg->var_off = tnum_or(true_reg->var_off, 5654 tnum_const(val)); 5655 break; 5656 case BPF_JGE: 5657 case BPF_JGT: 5658 { 5659 u64 false_umax = opcode == BPF_JGT ? val : val - 1; 5660 u64 true_umin = opcode == BPF_JGT ? val + 1 : val; 5661 5662 if (is_jmp32) { 5663 false_umax += gen_hi_max(false_reg->var_off); 5664 true_umin += gen_hi_min(true_reg->var_off); 5665 } 5666 false_reg->umax_value = min(false_reg->umax_value, false_umax); 5667 true_reg->umin_value = max(true_reg->umin_value, true_umin); 5668 break; 5669 } 5670 case BPF_JSGE: 5671 case BPF_JSGT: 5672 { 5673 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; 5674 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; 5675 5676 /* If the full s64 was not sign-extended from s32 then don't 5677 * deduct further info. 5678 */ 5679 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5680 break; 5681 false_reg->smax_value = min(false_reg->smax_value, false_smax); 5682 true_reg->smin_value = max(true_reg->smin_value, true_smin); 5683 break; 5684 } 5685 case BPF_JLE: 5686 case BPF_JLT: 5687 { 5688 u64 false_umin = opcode == BPF_JLT ? val : val + 1; 5689 u64 true_umax = opcode == BPF_JLT ? val - 1 : val; 5690 5691 if (is_jmp32) { 5692 false_umin += gen_hi_min(false_reg->var_off); 5693 true_umax += gen_hi_max(true_reg->var_off); 5694 } 5695 false_reg->umin_value = max(false_reg->umin_value, false_umin); 5696 true_reg->umax_value = min(true_reg->umax_value, true_umax); 5697 break; 5698 } 5699 case BPF_JSLE: 5700 case BPF_JSLT: 5701 { 5702 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; 5703 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; 5704 5705 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5706 break; 5707 false_reg->smin_value = max(false_reg->smin_value, false_smin); 5708 true_reg->smax_value = min(true_reg->smax_value, true_smax); 5709 break; 5710 } 5711 default: 5712 break; 5713 } 5714 5715 __reg_deduce_bounds(false_reg); 5716 __reg_deduce_bounds(true_reg); 5717 /* We might have learned some bits from the bounds. */ 5718 __reg_bound_offset(false_reg); 5719 __reg_bound_offset(true_reg); 5720 if (is_jmp32) { 5721 __reg_bound_offset32(false_reg); 5722 __reg_bound_offset32(true_reg); 5723 } 5724 /* Intersecting with the old var_off might have improved our bounds 5725 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5726 * then new var_off is (0; 0x7f...fc) which improves our umax. 5727 */ 5728 __update_reg_bounds(false_reg); 5729 __update_reg_bounds(true_reg); 5730 } 5731 5732 /* Same as above, but for the case that dst_reg holds a constant and src_reg is 5733 * the variable reg. 5734 */ 5735 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, 5736 struct bpf_reg_state *false_reg, u64 val, 5737 u8 opcode, bool is_jmp32) 5738 { 5739 s64 sval; 5740 5741 if (__is_pointer_value(false, false_reg)) 5742 return; 5743 5744 val = is_jmp32 ? (u32)val : val; 5745 sval = is_jmp32 ? (s64)(s32)val : (s64)val; 5746 5747 switch (opcode) { 5748 case BPF_JEQ: 5749 case BPF_JNE: 5750 { 5751 struct bpf_reg_state *reg = 5752 opcode == BPF_JEQ ? true_reg : false_reg; 5753 5754 if (is_jmp32) { 5755 u64 old_v = reg->var_off.value; 5756 u64 hi_mask = ~0xffffffffULL; 5757 5758 reg->var_off.value = (old_v & hi_mask) | val; 5759 reg->var_off.mask &= hi_mask; 5760 } else { 5761 __mark_reg_known(reg, val); 5762 } 5763 break; 5764 } 5765 case BPF_JSET: 5766 false_reg->var_off = tnum_and(false_reg->var_off, 5767 tnum_const(~val)); 5768 if (is_power_of_2(val)) 5769 true_reg->var_off = tnum_or(true_reg->var_off, 5770 tnum_const(val)); 5771 break; 5772 case BPF_JGE: 5773 case BPF_JGT: 5774 { 5775 u64 false_umin = opcode == BPF_JGT ? val : val + 1; 5776 u64 true_umax = opcode == BPF_JGT ? val - 1 : val; 5777 5778 if (is_jmp32) { 5779 false_umin += gen_hi_min(false_reg->var_off); 5780 true_umax += gen_hi_max(true_reg->var_off); 5781 } 5782 false_reg->umin_value = max(false_reg->umin_value, false_umin); 5783 true_reg->umax_value = min(true_reg->umax_value, true_umax); 5784 break; 5785 } 5786 case BPF_JSGE: 5787 case BPF_JSGT: 5788 { 5789 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; 5790 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; 5791 5792 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5793 break; 5794 false_reg->smin_value = max(false_reg->smin_value, false_smin); 5795 true_reg->smax_value = min(true_reg->smax_value, true_smax); 5796 break; 5797 } 5798 case BPF_JLE: 5799 case BPF_JLT: 5800 { 5801 u64 false_umax = opcode == BPF_JLT ? val : val - 1; 5802 u64 true_umin = opcode == BPF_JLT ? val + 1 : val; 5803 5804 if (is_jmp32) { 5805 false_umax += gen_hi_max(false_reg->var_off); 5806 true_umin += gen_hi_min(true_reg->var_off); 5807 } 5808 false_reg->umax_value = min(false_reg->umax_value, false_umax); 5809 true_reg->umin_value = max(true_reg->umin_value, true_umin); 5810 break; 5811 } 5812 case BPF_JSLE: 5813 case BPF_JSLT: 5814 { 5815 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; 5816 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; 5817 5818 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5819 break; 5820 false_reg->smax_value = min(false_reg->smax_value, false_smax); 5821 true_reg->smin_value = max(true_reg->smin_value, true_smin); 5822 break; 5823 } 5824 default: 5825 break; 5826 } 5827 5828 __reg_deduce_bounds(false_reg); 5829 __reg_deduce_bounds(true_reg); 5830 /* We might have learned some bits from the bounds. */ 5831 __reg_bound_offset(false_reg); 5832 __reg_bound_offset(true_reg); 5833 if (is_jmp32) { 5834 __reg_bound_offset32(false_reg); 5835 __reg_bound_offset32(true_reg); 5836 } 5837 /* Intersecting with the old var_off might have improved our bounds 5838 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5839 * then new var_off is (0; 0x7f...fc) which improves our umax. 5840 */ 5841 __update_reg_bounds(false_reg); 5842 __update_reg_bounds(true_reg); 5843 } 5844 5845 /* Regs are known to be equal, so intersect their min/max/var_off */ 5846 static void __reg_combine_min_max(struct bpf_reg_state *src_reg, 5847 struct bpf_reg_state *dst_reg) 5848 { 5849 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, 5850 dst_reg->umin_value); 5851 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, 5852 dst_reg->umax_value); 5853 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, 5854 dst_reg->smin_value); 5855 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, 5856 dst_reg->smax_value); 5857 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, 5858 dst_reg->var_off); 5859 /* We might have learned new bounds from the var_off. */ 5860 __update_reg_bounds(src_reg); 5861 __update_reg_bounds(dst_reg); 5862 /* We might have learned something about the sign bit. */ 5863 __reg_deduce_bounds(src_reg); 5864 __reg_deduce_bounds(dst_reg); 5865 /* We might have learned some bits from the bounds. */ 5866 __reg_bound_offset(src_reg); 5867 __reg_bound_offset(dst_reg); 5868 /* Intersecting with the old var_off might have improved our bounds 5869 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5870 * then new var_off is (0; 0x7f...fc) which improves our umax. 5871 */ 5872 __update_reg_bounds(src_reg); 5873 __update_reg_bounds(dst_reg); 5874 } 5875 5876 static void reg_combine_min_max(struct bpf_reg_state *true_src, 5877 struct bpf_reg_state *true_dst, 5878 struct bpf_reg_state *false_src, 5879 struct bpf_reg_state *false_dst, 5880 u8 opcode) 5881 { 5882 switch (opcode) { 5883 case BPF_JEQ: 5884 __reg_combine_min_max(true_src, true_dst); 5885 break; 5886 case BPF_JNE: 5887 __reg_combine_min_max(false_src, false_dst); 5888 break; 5889 } 5890 } 5891 5892 static void mark_ptr_or_null_reg(struct bpf_func_state *state, 5893 struct bpf_reg_state *reg, u32 id, 5894 bool is_null) 5895 { 5896 if (reg_type_may_be_null(reg->type) && reg->id == id) { 5897 /* Old offset (both fixed and variable parts) should 5898 * have been known-zero, because we don't allow pointer 5899 * arithmetic on pointers that might be NULL. 5900 */ 5901 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || 5902 !tnum_equals_const(reg->var_off, 0) || 5903 reg->off)) { 5904 __mark_reg_known_zero(reg); 5905 reg->off = 0; 5906 } 5907 if (is_null) { 5908 reg->type = SCALAR_VALUE; 5909 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 5910 if (reg->map_ptr->inner_map_meta) { 5911 reg->type = CONST_PTR_TO_MAP; 5912 reg->map_ptr = reg->map_ptr->inner_map_meta; 5913 } else if (reg->map_ptr->map_type == 5914 BPF_MAP_TYPE_XSKMAP) { 5915 reg->type = PTR_TO_XDP_SOCK; 5916 } else { 5917 reg->type = PTR_TO_MAP_VALUE; 5918 } 5919 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 5920 reg->type = PTR_TO_SOCKET; 5921 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { 5922 reg->type = PTR_TO_SOCK_COMMON; 5923 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { 5924 reg->type = PTR_TO_TCP_SOCK; 5925 } 5926 if (is_null) { 5927 /* We don't need id and ref_obj_id from this point 5928 * onwards anymore, thus we should better reset it, 5929 * so that state pruning has chances to take effect. 5930 */ 5931 reg->id = 0; 5932 reg->ref_obj_id = 0; 5933 } else if (!reg_may_point_to_spin_lock(reg)) { 5934 /* For not-NULL ptr, reg->ref_obj_id will be reset 5935 * in release_reg_references(). 5936 * 5937 * reg->id is still used by spin_lock ptr. Other 5938 * than spin_lock ptr type, reg->id can be reset. 5939 */ 5940 reg->id = 0; 5941 } 5942 } 5943 } 5944 5945 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, 5946 bool is_null) 5947 { 5948 struct bpf_reg_state *reg; 5949 int i; 5950 5951 for (i = 0; i < MAX_BPF_REG; i++) 5952 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); 5953 5954 bpf_for_each_spilled_reg(i, state, reg) { 5955 if (!reg) 5956 continue; 5957 mark_ptr_or_null_reg(state, reg, id, is_null); 5958 } 5959 } 5960 5961 /* The logic is similar to find_good_pkt_pointers(), both could eventually 5962 * be folded together at some point. 5963 */ 5964 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, 5965 bool is_null) 5966 { 5967 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5968 struct bpf_reg_state *regs = state->regs; 5969 u32 ref_obj_id = regs[regno].ref_obj_id; 5970 u32 id = regs[regno].id; 5971 int i; 5972 5973 if (ref_obj_id && ref_obj_id == id && is_null) 5974 /* regs[regno] is in the " == NULL" branch. 5975 * No one could have freed the reference state before 5976 * doing the NULL check. 5977 */ 5978 WARN_ON_ONCE(release_reference_state(state, id)); 5979 5980 for (i = 0; i <= vstate->curframe; i++) 5981 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); 5982 } 5983 5984 static bool try_match_pkt_pointers(const struct bpf_insn *insn, 5985 struct bpf_reg_state *dst_reg, 5986 struct bpf_reg_state *src_reg, 5987 struct bpf_verifier_state *this_branch, 5988 struct bpf_verifier_state *other_branch) 5989 { 5990 if (BPF_SRC(insn->code) != BPF_X) 5991 return false; 5992 5993 /* Pointers are always 64-bit. */ 5994 if (BPF_CLASS(insn->code) == BPF_JMP32) 5995 return false; 5996 5997 switch (BPF_OP(insn->code)) { 5998 case BPF_JGT: 5999 if ((dst_reg->type == PTR_TO_PACKET && 6000 src_reg->type == PTR_TO_PACKET_END) || 6001 (dst_reg->type == PTR_TO_PACKET_META && 6002 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6003 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 6004 find_good_pkt_pointers(this_branch, dst_reg, 6005 dst_reg->type, false); 6006 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6007 src_reg->type == PTR_TO_PACKET) || 6008 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6009 src_reg->type == PTR_TO_PACKET_META)) { 6010 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 6011 find_good_pkt_pointers(other_branch, src_reg, 6012 src_reg->type, true); 6013 } else { 6014 return false; 6015 } 6016 break; 6017 case BPF_JLT: 6018 if ((dst_reg->type == PTR_TO_PACKET && 6019 src_reg->type == PTR_TO_PACKET_END) || 6020 (dst_reg->type == PTR_TO_PACKET_META && 6021 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6022 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 6023 find_good_pkt_pointers(other_branch, dst_reg, 6024 dst_reg->type, true); 6025 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6026 src_reg->type == PTR_TO_PACKET) || 6027 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6028 src_reg->type == PTR_TO_PACKET_META)) { 6029 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 6030 find_good_pkt_pointers(this_branch, src_reg, 6031 src_reg->type, false); 6032 } else { 6033 return false; 6034 } 6035 break; 6036 case BPF_JGE: 6037 if ((dst_reg->type == PTR_TO_PACKET && 6038 src_reg->type == PTR_TO_PACKET_END) || 6039 (dst_reg->type == PTR_TO_PACKET_META && 6040 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6041 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 6042 find_good_pkt_pointers(this_branch, dst_reg, 6043 dst_reg->type, true); 6044 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6045 src_reg->type == PTR_TO_PACKET) || 6046 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6047 src_reg->type == PTR_TO_PACKET_META)) { 6048 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 6049 find_good_pkt_pointers(other_branch, src_reg, 6050 src_reg->type, false); 6051 } else { 6052 return false; 6053 } 6054 break; 6055 case BPF_JLE: 6056 if ((dst_reg->type == PTR_TO_PACKET && 6057 src_reg->type == PTR_TO_PACKET_END) || 6058 (dst_reg->type == PTR_TO_PACKET_META && 6059 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6060 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 6061 find_good_pkt_pointers(other_branch, dst_reg, 6062 dst_reg->type, false); 6063 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6064 src_reg->type == PTR_TO_PACKET) || 6065 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6066 src_reg->type == PTR_TO_PACKET_META)) { 6067 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 6068 find_good_pkt_pointers(this_branch, src_reg, 6069 src_reg->type, true); 6070 } else { 6071 return false; 6072 } 6073 break; 6074 default: 6075 return false; 6076 } 6077 6078 return true; 6079 } 6080 6081 static int check_cond_jmp_op(struct bpf_verifier_env *env, 6082 struct bpf_insn *insn, int *insn_idx) 6083 { 6084 struct bpf_verifier_state *this_branch = env->cur_state; 6085 struct bpf_verifier_state *other_branch; 6086 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 6087 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; 6088 u8 opcode = BPF_OP(insn->code); 6089 bool is_jmp32; 6090 int pred = -1; 6091 int err; 6092 6093 /* Only conditional jumps are expected to reach here. */ 6094 if (opcode == BPF_JA || opcode > BPF_JSLE) { 6095 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); 6096 return -EINVAL; 6097 } 6098 6099 if (BPF_SRC(insn->code) == BPF_X) { 6100 if (insn->imm != 0) { 6101 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 6102 return -EINVAL; 6103 } 6104 6105 /* check src1 operand */ 6106 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6107 if (err) 6108 return err; 6109 6110 if (is_pointer_value(env, insn->src_reg)) { 6111 verbose(env, "R%d pointer comparison prohibited\n", 6112 insn->src_reg); 6113 return -EACCES; 6114 } 6115 src_reg = ®s[insn->src_reg]; 6116 } else { 6117 if (insn->src_reg != BPF_REG_0) { 6118 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 6119 return -EINVAL; 6120 } 6121 } 6122 6123 /* check src2 operand */ 6124 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6125 if (err) 6126 return err; 6127 6128 dst_reg = ®s[insn->dst_reg]; 6129 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 6130 6131 if (BPF_SRC(insn->code) == BPF_K) 6132 pred = is_branch_taken(dst_reg, insn->imm, 6133 opcode, is_jmp32); 6134 else if (src_reg->type == SCALAR_VALUE && 6135 tnum_is_const(src_reg->var_off)) 6136 pred = is_branch_taken(dst_reg, src_reg->var_off.value, 6137 opcode, is_jmp32); 6138 if (pred >= 0) { 6139 err = mark_chain_precision(env, insn->dst_reg); 6140 if (BPF_SRC(insn->code) == BPF_X && !err) 6141 err = mark_chain_precision(env, insn->src_reg); 6142 if (err) 6143 return err; 6144 } 6145 if (pred == 1) { 6146 /* only follow the goto, ignore fall-through */ 6147 *insn_idx += insn->off; 6148 return 0; 6149 } else if (pred == 0) { 6150 /* only follow fall-through branch, since 6151 * that's where the program will go 6152 */ 6153 return 0; 6154 } 6155 6156 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, 6157 false); 6158 if (!other_branch) 6159 return -EFAULT; 6160 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 6161 6162 /* detect if we are comparing against a constant value so we can adjust 6163 * our min/max values for our dst register. 6164 * this is only legit if both are scalars (or pointers to the same 6165 * object, I suppose, but we don't support that right now), because 6166 * otherwise the different base pointers mean the offsets aren't 6167 * comparable. 6168 */ 6169 if (BPF_SRC(insn->code) == BPF_X) { 6170 struct bpf_reg_state *src_reg = ®s[insn->src_reg]; 6171 struct bpf_reg_state lo_reg0 = *dst_reg; 6172 struct bpf_reg_state lo_reg1 = *src_reg; 6173 struct bpf_reg_state *src_lo, *dst_lo; 6174 6175 dst_lo = &lo_reg0; 6176 src_lo = &lo_reg1; 6177 coerce_reg_to_size(dst_lo, 4); 6178 coerce_reg_to_size(src_lo, 4); 6179 6180 if (dst_reg->type == SCALAR_VALUE && 6181 src_reg->type == SCALAR_VALUE) { 6182 if (tnum_is_const(src_reg->var_off) || 6183 (is_jmp32 && tnum_is_const(src_lo->var_off))) 6184 reg_set_min_max(&other_branch_regs[insn->dst_reg], 6185 dst_reg, 6186 is_jmp32 6187 ? src_lo->var_off.value 6188 : src_reg->var_off.value, 6189 opcode, is_jmp32); 6190 else if (tnum_is_const(dst_reg->var_off) || 6191 (is_jmp32 && tnum_is_const(dst_lo->var_off))) 6192 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], 6193 src_reg, 6194 is_jmp32 6195 ? dst_lo->var_off.value 6196 : dst_reg->var_off.value, 6197 opcode, is_jmp32); 6198 else if (!is_jmp32 && 6199 (opcode == BPF_JEQ || opcode == BPF_JNE)) 6200 /* Comparing for equality, we can combine knowledge */ 6201 reg_combine_min_max(&other_branch_regs[insn->src_reg], 6202 &other_branch_regs[insn->dst_reg], 6203 src_reg, dst_reg, opcode); 6204 } 6205 } else if (dst_reg->type == SCALAR_VALUE) { 6206 reg_set_min_max(&other_branch_regs[insn->dst_reg], 6207 dst_reg, insn->imm, opcode, is_jmp32); 6208 } 6209 6210 /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). 6211 * NOTE: these optimizations below are related with pointer comparison 6212 * which will never be JMP32. 6213 */ 6214 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && 6215 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 6216 reg_type_may_be_null(dst_reg->type)) { 6217 /* Mark all identical registers in each branch as either 6218 * safe or unknown depending R == 0 or R != 0 conditional. 6219 */ 6220 mark_ptr_or_null_regs(this_branch, insn->dst_reg, 6221 opcode == BPF_JNE); 6222 mark_ptr_or_null_regs(other_branch, insn->dst_reg, 6223 opcode == BPF_JEQ); 6224 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], 6225 this_branch, other_branch) && 6226 is_pointer_value(env, insn->dst_reg)) { 6227 verbose(env, "R%d pointer comparison prohibited\n", 6228 insn->dst_reg); 6229 return -EACCES; 6230 } 6231 if (env->log.level & BPF_LOG_LEVEL) 6232 print_verifier_state(env, this_branch->frame[this_branch->curframe]); 6233 return 0; 6234 } 6235 6236 /* verify BPF_LD_IMM64 instruction */ 6237 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 6238 { 6239 struct bpf_insn_aux_data *aux = cur_aux(env); 6240 struct bpf_reg_state *regs = cur_regs(env); 6241 struct bpf_map *map; 6242 int err; 6243 6244 if (BPF_SIZE(insn->code) != BPF_DW) { 6245 verbose(env, "invalid BPF_LD_IMM insn\n"); 6246 return -EINVAL; 6247 } 6248 if (insn->off != 0) { 6249 verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); 6250 return -EINVAL; 6251 } 6252 6253 err = check_reg_arg(env, insn->dst_reg, DST_OP); 6254 if (err) 6255 return err; 6256 6257 if (insn->src_reg == 0) { 6258 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 6259 6260 regs[insn->dst_reg].type = SCALAR_VALUE; 6261 __mark_reg_known(®s[insn->dst_reg], imm); 6262 return 0; 6263 } 6264 6265 map = env->used_maps[aux->map_index]; 6266 mark_reg_known_zero(env, regs, insn->dst_reg); 6267 regs[insn->dst_reg].map_ptr = map; 6268 6269 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { 6270 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; 6271 regs[insn->dst_reg].off = aux->map_off; 6272 if (map_value_has_spin_lock(map)) 6273 regs[insn->dst_reg].id = ++env->id_gen; 6274 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 6275 regs[insn->dst_reg].type = CONST_PTR_TO_MAP; 6276 } else { 6277 verbose(env, "bpf verifier is misconfigured\n"); 6278 return -EINVAL; 6279 } 6280 6281 return 0; 6282 } 6283 6284 static bool may_access_skb(enum bpf_prog_type type) 6285 { 6286 switch (type) { 6287 case BPF_PROG_TYPE_SOCKET_FILTER: 6288 case BPF_PROG_TYPE_SCHED_CLS: 6289 case BPF_PROG_TYPE_SCHED_ACT: 6290 return true; 6291 default: 6292 return false; 6293 } 6294 } 6295 6296 /* verify safety of LD_ABS|LD_IND instructions: 6297 * - they can only appear in the programs where ctx == skb 6298 * - since they are wrappers of function calls, they scratch R1-R5 registers, 6299 * preserve R6-R9, and store return value into R0 6300 * 6301 * Implicit input: 6302 * ctx == skb == R6 == CTX 6303 * 6304 * Explicit input: 6305 * SRC == any register 6306 * IMM == 32-bit immediate 6307 * 6308 * Output: 6309 * R0 - 8/16/32-bit skb data converted to cpu endianness 6310 */ 6311 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) 6312 { 6313 struct bpf_reg_state *regs = cur_regs(env); 6314 static const int ctx_reg = BPF_REG_6; 6315 u8 mode = BPF_MODE(insn->code); 6316 int i, err; 6317 6318 if (!may_access_skb(env->prog->type)) { 6319 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); 6320 return -EINVAL; 6321 } 6322 6323 if (!env->ops->gen_ld_abs) { 6324 verbose(env, "bpf verifier is misconfigured\n"); 6325 return -EINVAL; 6326 } 6327 6328 if (env->subprog_cnt > 1) { 6329 /* when program has LD_ABS insn JITs and interpreter assume 6330 * that r1 == ctx == skb which is not the case for callees 6331 * that can have arbitrary arguments. It's problematic 6332 * for main prog as well since JITs would need to analyze 6333 * all functions in order to make proper register save/restore 6334 * decisions in the main prog. Hence disallow LD_ABS with calls 6335 */ 6336 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); 6337 return -EINVAL; 6338 } 6339 6340 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 6341 BPF_SIZE(insn->code) == BPF_DW || 6342 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 6343 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); 6344 return -EINVAL; 6345 } 6346 6347 /* check whether implicit source operand (register R6) is readable */ 6348 err = check_reg_arg(env, ctx_reg, SRC_OP); 6349 if (err) 6350 return err; 6351 6352 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as 6353 * gen_ld_abs() may terminate the program at runtime, leading to 6354 * reference leak. 6355 */ 6356 err = check_reference_leak(env); 6357 if (err) { 6358 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); 6359 return err; 6360 } 6361 6362 if (env->cur_state->active_spin_lock) { 6363 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); 6364 return -EINVAL; 6365 } 6366 6367 if (regs[ctx_reg].type != PTR_TO_CTX) { 6368 verbose(env, 6369 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 6370 return -EINVAL; 6371 } 6372 6373 if (mode == BPF_IND) { 6374 /* check explicit source operand */ 6375 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6376 if (err) 6377 return err; 6378 } 6379 6380 err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); 6381 if (err < 0) 6382 return err; 6383 6384 /* reset caller saved regs to unreadable */ 6385 for (i = 0; i < CALLER_SAVED_REGS; i++) { 6386 mark_reg_not_init(env, regs, caller_saved[i]); 6387 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 6388 } 6389 6390 /* mark destination R0 register as readable, since it contains 6391 * the value fetched from the packet. 6392 * Already marked as written above. 6393 */ 6394 mark_reg_unknown(env, regs, BPF_REG_0); 6395 /* ld_abs load up to 32-bit skb data. */ 6396 regs[BPF_REG_0].subreg_def = env->insn_idx + 1; 6397 return 0; 6398 } 6399 6400 static int check_return_code(struct bpf_verifier_env *env) 6401 { 6402 struct tnum enforce_attach_type_range = tnum_unknown; 6403 const struct bpf_prog *prog = env->prog; 6404 struct bpf_reg_state *reg; 6405 struct tnum range = tnum_range(0, 1); 6406 int err; 6407 6408 /* The struct_ops func-ptr's return type could be "void" */ 6409 if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS && 6410 !prog->aux->attach_func_proto->type) 6411 return 0; 6412 6413 /* eBPF calling convetion is such that R0 is used 6414 * to return the value from eBPF program. 6415 * Make sure that it's readable at this time 6416 * of bpf_exit, which means that program wrote 6417 * something into it earlier 6418 */ 6419 err = check_reg_arg(env, BPF_REG_0, SRC_OP); 6420 if (err) 6421 return err; 6422 6423 if (is_pointer_value(env, BPF_REG_0)) { 6424 verbose(env, "R0 leaks addr as return value\n"); 6425 return -EACCES; 6426 } 6427 6428 switch (env->prog->type) { 6429 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 6430 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || 6431 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) 6432 range = tnum_range(1, 1); 6433 break; 6434 case BPF_PROG_TYPE_CGROUP_SKB: 6435 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { 6436 range = tnum_range(0, 3); 6437 enforce_attach_type_range = tnum_range(2, 3); 6438 } 6439 break; 6440 case BPF_PROG_TYPE_CGROUP_SOCK: 6441 case BPF_PROG_TYPE_SOCK_OPS: 6442 case BPF_PROG_TYPE_CGROUP_DEVICE: 6443 case BPF_PROG_TYPE_CGROUP_SYSCTL: 6444 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 6445 break; 6446 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6447 if (!env->prog->aux->attach_btf_id) 6448 return 0; 6449 range = tnum_const(0); 6450 break; 6451 default: 6452 return 0; 6453 } 6454 6455 reg = cur_regs(env) + BPF_REG_0; 6456 if (reg->type != SCALAR_VALUE) { 6457 verbose(env, "At program exit the register R0 is not a known value (%s)\n", 6458 reg_type_str[reg->type]); 6459 return -EINVAL; 6460 } 6461 6462 if (!tnum_in(range, reg->var_off)) { 6463 char tn_buf[48]; 6464 6465 verbose(env, "At program exit the register R0 "); 6466 if (!tnum_is_unknown(reg->var_off)) { 6467 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6468 verbose(env, "has value %s", tn_buf); 6469 } else { 6470 verbose(env, "has unknown scalar value"); 6471 } 6472 tnum_strn(tn_buf, sizeof(tn_buf), range); 6473 verbose(env, " should have been in %s\n", tn_buf); 6474 return -EINVAL; 6475 } 6476 6477 if (!tnum_is_unknown(enforce_attach_type_range) && 6478 tnum_in(enforce_attach_type_range, reg->var_off)) 6479 env->prog->enforce_expected_attach_type = 1; 6480 return 0; 6481 } 6482 6483 /* non-recursive DFS pseudo code 6484 * 1 procedure DFS-iterative(G,v): 6485 * 2 label v as discovered 6486 * 3 let S be a stack 6487 * 4 S.push(v) 6488 * 5 while S is not empty 6489 * 6 t <- S.pop() 6490 * 7 if t is what we're looking for: 6491 * 8 return t 6492 * 9 for all edges e in G.adjacentEdges(t) do 6493 * 10 if edge e is already labelled 6494 * 11 continue with the next edge 6495 * 12 w <- G.adjacentVertex(t,e) 6496 * 13 if vertex w is not discovered and not explored 6497 * 14 label e as tree-edge 6498 * 15 label w as discovered 6499 * 16 S.push(w) 6500 * 17 continue at 5 6501 * 18 else if vertex w is discovered 6502 * 19 label e as back-edge 6503 * 20 else 6504 * 21 // vertex w is explored 6505 * 22 label e as forward- or cross-edge 6506 * 23 label t as explored 6507 * 24 S.pop() 6508 * 6509 * convention: 6510 * 0x10 - discovered 6511 * 0x11 - discovered and fall-through edge labelled 6512 * 0x12 - discovered and fall-through and branch edges labelled 6513 * 0x20 - explored 6514 */ 6515 6516 enum { 6517 DISCOVERED = 0x10, 6518 EXPLORED = 0x20, 6519 FALLTHROUGH = 1, 6520 BRANCH = 2, 6521 }; 6522 6523 static u32 state_htab_size(struct bpf_verifier_env *env) 6524 { 6525 return env->prog->len; 6526 } 6527 6528 static struct bpf_verifier_state_list **explored_state( 6529 struct bpf_verifier_env *env, 6530 int idx) 6531 { 6532 struct bpf_verifier_state *cur = env->cur_state; 6533 struct bpf_func_state *state = cur->frame[cur->curframe]; 6534 6535 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; 6536 } 6537 6538 static void init_explored_state(struct bpf_verifier_env *env, int idx) 6539 { 6540 env->insn_aux_data[idx].prune_point = true; 6541 } 6542 6543 /* t, w, e - match pseudo-code above: 6544 * t - index of current instruction 6545 * w - next instruction 6546 * e - edge 6547 */ 6548 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, 6549 bool loop_ok) 6550 { 6551 int *insn_stack = env->cfg.insn_stack; 6552 int *insn_state = env->cfg.insn_state; 6553 6554 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 6555 return 0; 6556 6557 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 6558 return 0; 6559 6560 if (w < 0 || w >= env->prog->len) { 6561 verbose_linfo(env, t, "%d: ", t); 6562 verbose(env, "jump out of range from insn %d to %d\n", t, w); 6563 return -EINVAL; 6564 } 6565 6566 if (e == BRANCH) 6567 /* mark branch target for state pruning */ 6568 init_explored_state(env, w); 6569 6570 if (insn_state[w] == 0) { 6571 /* tree-edge */ 6572 insn_state[t] = DISCOVERED | e; 6573 insn_state[w] = DISCOVERED; 6574 if (env->cfg.cur_stack >= env->prog->len) 6575 return -E2BIG; 6576 insn_stack[env->cfg.cur_stack++] = w; 6577 return 1; 6578 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 6579 if (loop_ok && env->allow_ptr_leaks) 6580 return 0; 6581 verbose_linfo(env, t, "%d: ", t); 6582 verbose_linfo(env, w, "%d: ", w); 6583 verbose(env, "back-edge from insn %d to %d\n", t, w); 6584 return -EINVAL; 6585 } else if (insn_state[w] == EXPLORED) { 6586 /* forward- or cross-edge */ 6587 insn_state[t] = DISCOVERED | e; 6588 } else { 6589 verbose(env, "insn state internal bug\n"); 6590 return -EFAULT; 6591 } 6592 return 0; 6593 } 6594 6595 /* non-recursive depth-first-search to detect loops in BPF program 6596 * loop == back-edge in directed graph 6597 */ 6598 static int check_cfg(struct bpf_verifier_env *env) 6599 { 6600 struct bpf_insn *insns = env->prog->insnsi; 6601 int insn_cnt = env->prog->len; 6602 int *insn_stack, *insn_state; 6603 int ret = 0; 6604 int i, t; 6605 6606 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 6607 if (!insn_state) 6608 return -ENOMEM; 6609 6610 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 6611 if (!insn_stack) { 6612 kvfree(insn_state); 6613 return -ENOMEM; 6614 } 6615 6616 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 6617 insn_stack[0] = 0; /* 0 is the first instruction */ 6618 env->cfg.cur_stack = 1; 6619 6620 peek_stack: 6621 if (env->cfg.cur_stack == 0) 6622 goto check_state; 6623 t = insn_stack[env->cfg.cur_stack - 1]; 6624 6625 if (BPF_CLASS(insns[t].code) == BPF_JMP || 6626 BPF_CLASS(insns[t].code) == BPF_JMP32) { 6627 u8 opcode = BPF_OP(insns[t].code); 6628 6629 if (opcode == BPF_EXIT) { 6630 goto mark_explored; 6631 } else if (opcode == BPF_CALL) { 6632 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 6633 if (ret == 1) 6634 goto peek_stack; 6635 else if (ret < 0) 6636 goto err_free; 6637 if (t + 1 < insn_cnt) 6638 init_explored_state(env, t + 1); 6639 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 6640 init_explored_state(env, t); 6641 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, 6642 env, false); 6643 if (ret == 1) 6644 goto peek_stack; 6645 else if (ret < 0) 6646 goto err_free; 6647 } 6648 } else if (opcode == BPF_JA) { 6649 if (BPF_SRC(insns[t].code) != BPF_K) { 6650 ret = -EINVAL; 6651 goto err_free; 6652 } 6653 /* unconditional jump with single edge */ 6654 ret = push_insn(t, t + insns[t].off + 1, 6655 FALLTHROUGH, env, true); 6656 if (ret == 1) 6657 goto peek_stack; 6658 else if (ret < 0) 6659 goto err_free; 6660 /* unconditional jmp is not a good pruning point, 6661 * but it's marked, since backtracking needs 6662 * to record jmp history in is_state_visited(). 6663 */ 6664 init_explored_state(env, t + insns[t].off + 1); 6665 /* tell verifier to check for equivalent states 6666 * after every call and jump 6667 */ 6668 if (t + 1 < insn_cnt) 6669 init_explored_state(env, t + 1); 6670 } else { 6671 /* conditional jump with two edges */ 6672 init_explored_state(env, t); 6673 ret = push_insn(t, t + 1, FALLTHROUGH, env, true); 6674 if (ret == 1) 6675 goto peek_stack; 6676 else if (ret < 0) 6677 goto err_free; 6678 6679 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); 6680 if (ret == 1) 6681 goto peek_stack; 6682 else if (ret < 0) 6683 goto err_free; 6684 } 6685 } else { 6686 /* all other non-branch instructions with single 6687 * fall-through edge 6688 */ 6689 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 6690 if (ret == 1) 6691 goto peek_stack; 6692 else if (ret < 0) 6693 goto err_free; 6694 } 6695 6696 mark_explored: 6697 insn_state[t] = EXPLORED; 6698 if (env->cfg.cur_stack-- <= 0) { 6699 verbose(env, "pop stack internal bug\n"); 6700 ret = -EFAULT; 6701 goto err_free; 6702 } 6703 goto peek_stack; 6704 6705 check_state: 6706 for (i = 0; i < insn_cnt; i++) { 6707 if (insn_state[i] != EXPLORED) { 6708 verbose(env, "unreachable insn %d\n", i); 6709 ret = -EINVAL; 6710 goto err_free; 6711 } 6712 } 6713 ret = 0; /* cfg looks good */ 6714 6715 err_free: 6716 kvfree(insn_state); 6717 kvfree(insn_stack); 6718 env->cfg.insn_state = env->cfg.insn_stack = NULL; 6719 return ret; 6720 } 6721 6722 /* The minimum supported BTF func info size */ 6723 #define MIN_BPF_FUNCINFO_SIZE 8 6724 #define MAX_FUNCINFO_REC_SIZE 252 6725 6726 static int check_btf_func(struct bpf_verifier_env *env, 6727 const union bpf_attr *attr, 6728 union bpf_attr __user *uattr) 6729 { 6730 u32 i, nfuncs, urec_size, min_size; 6731 u32 krec_size = sizeof(struct bpf_func_info); 6732 struct bpf_func_info *krecord; 6733 struct bpf_func_info_aux *info_aux = NULL; 6734 const struct btf_type *type; 6735 struct bpf_prog *prog; 6736 const struct btf *btf; 6737 void __user *urecord; 6738 u32 prev_offset = 0; 6739 int ret = 0; 6740 6741 nfuncs = attr->func_info_cnt; 6742 if (!nfuncs) 6743 return 0; 6744 6745 if (nfuncs != env->subprog_cnt) { 6746 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); 6747 return -EINVAL; 6748 } 6749 6750 urec_size = attr->func_info_rec_size; 6751 if (urec_size < MIN_BPF_FUNCINFO_SIZE || 6752 urec_size > MAX_FUNCINFO_REC_SIZE || 6753 urec_size % sizeof(u32)) { 6754 verbose(env, "invalid func info rec size %u\n", urec_size); 6755 return -EINVAL; 6756 } 6757 6758 prog = env->prog; 6759 btf = prog->aux->btf; 6760 6761 urecord = u64_to_user_ptr(attr->func_info); 6762 min_size = min_t(u32, krec_size, urec_size); 6763 6764 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); 6765 if (!krecord) 6766 return -ENOMEM; 6767 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); 6768 if (!info_aux) 6769 goto err_free; 6770 6771 for (i = 0; i < nfuncs; i++) { 6772 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); 6773 if (ret) { 6774 if (ret == -E2BIG) { 6775 verbose(env, "nonzero tailing record in func info"); 6776 /* set the size kernel expects so loader can zero 6777 * out the rest of the record. 6778 */ 6779 if (put_user(min_size, &uattr->func_info_rec_size)) 6780 ret = -EFAULT; 6781 } 6782 goto err_free; 6783 } 6784 6785 if (copy_from_user(&krecord[i], urecord, min_size)) { 6786 ret = -EFAULT; 6787 goto err_free; 6788 } 6789 6790 /* check insn_off */ 6791 if (i == 0) { 6792 if (krecord[i].insn_off) { 6793 verbose(env, 6794 "nonzero insn_off %u for the first func info record", 6795 krecord[i].insn_off); 6796 ret = -EINVAL; 6797 goto err_free; 6798 } 6799 } else if (krecord[i].insn_off <= prev_offset) { 6800 verbose(env, 6801 "same or smaller insn offset (%u) than previous func info record (%u)", 6802 krecord[i].insn_off, prev_offset); 6803 ret = -EINVAL; 6804 goto err_free; 6805 } 6806 6807 if (env->subprog_info[i].start != krecord[i].insn_off) { 6808 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); 6809 ret = -EINVAL; 6810 goto err_free; 6811 } 6812 6813 /* check type_id */ 6814 type = btf_type_by_id(btf, krecord[i].type_id); 6815 if (!type || !btf_type_is_func(type)) { 6816 verbose(env, "invalid type id %d in func info", 6817 krecord[i].type_id); 6818 ret = -EINVAL; 6819 goto err_free; 6820 } 6821 info_aux[i].linkage = BTF_INFO_VLEN(type->info); 6822 prev_offset = krecord[i].insn_off; 6823 urecord += urec_size; 6824 } 6825 6826 prog->aux->func_info = krecord; 6827 prog->aux->func_info_cnt = nfuncs; 6828 prog->aux->func_info_aux = info_aux; 6829 return 0; 6830 6831 err_free: 6832 kvfree(krecord); 6833 kfree(info_aux); 6834 return ret; 6835 } 6836 6837 static void adjust_btf_func(struct bpf_verifier_env *env) 6838 { 6839 struct bpf_prog_aux *aux = env->prog->aux; 6840 int i; 6841 6842 if (!aux->func_info) 6843 return; 6844 6845 for (i = 0; i < env->subprog_cnt; i++) 6846 aux->func_info[i].insn_off = env->subprog_info[i].start; 6847 } 6848 6849 #define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ 6850 sizeof(((struct bpf_line_info *)(0))->line_col)) 6851 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE 6852 6853 static int check_btf_line(struct bpf_verifier_env *env, 6854 const union bpf_attr *attr, 6855 union bpf_attr __user *uattr) 6856 { 6857 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; 6858 struct bpf_subprog_info *sub; 6859 struct bpf_line_info *linfo; 6860 struct bpf_prog *prog; 6861 const struct btf *btf; 6862 void __user *ulinfo; 6863 int err; 6864 6865 nr_linfo = attr->line_info_cnt; 6866 if (!nr_linfo) 6867 return 0; 6868 6869 rec_size = attr->line_info_rec_size; 6870 if (rec_size < MIN_BPF_LINEINFO_SIZE || 6871 rec_size > MAX_LINEINFO_REC_SIZE || 6872 rec_size & (sizeof(u32) - 1)) 6873 return -EINVAL; 6874 6875 /* Need to zero it in case the userspace may 6876 * pass in a smaller bpf_line_info object. 6877 */ 6878 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), 6879 GFP_KERNEL | __GFP_NOWARN); 6880 if (!linfo) 6881 return -ENOMEM; 6882 6883 prog = env->prog; 6884 btf = prog->aux->btf; 6885 6886 s = 0; 6887 sub = env->subprog_info; 6888 ulinfo = u64_to_user_ptr(attr->line_info); 6889 expected_size = sizeof(struct bpf_line_info); 6890 ncopy = min_t(u32, expected_size, rec_size); 6891 for (i = 0; i < nr_linfo; i++) { 6892 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); 6893 if (err) { 6894 if (err == -E2BIG) { 6895 verbose(env, "nonzero tailing record in line_info"); 6896 if (put_user(expected_size, 6897 &uattr->line_info_rec_size)) 6898 err = -EFAULT; 6899 } 6900 goto err_free; 6901 } 6902 6903 if (copy_from_user(&linfo[i], ulinfo, ncopy)) { 6904 err = -EFAULT; 6905 goto err_free; 6906 } 6907 6908 /* 6909 * Check insn_off to ensure 6910 * 1) strictly increasing AND 6911 * 2) bounded by prog->len 6912 * 6913 * The linfo[0].insn_off == 0 check logically falls into 6914 * the later "missing bpf_line_info for func..." case 6915 * because the first linfo[0].insn_off must be the 6916 * first sub also and the first sub must have 6917 * subprog_info[0].start == 0. 6918 */ 6919 if ((i && linfo[i].insn_off <= prev_offset) || 6920 linfo[i].insn_off >= prog->len) { 6921 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", 6922 i, linfo[i].insn_off, prev_offset, 6923 prog->len); 6924 err = -EINVAL; 6925 goto err_free; 6926 } 6927 6928 if (!prog->insnsi[linfo[i].insn_off].code) { 6929 verbose(env, 6930 "Invalid insn code at line_info[%u].insn_off\n", 6931 i); 6932 err = -EINVAL; 6933 goto err_free; 6934 } 6935 6936 if (!btf_name_by_offset(btf, linfo[i].line_off) || 6937 !btf_name_by_offset(btf, linfo[i].file_name_off)) { 6938 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); 6939 err = -EINVAL; 6940 goto err_free; 6941 } 6942 6943 if (s != env->subprog_cnt) { 6944 if (linfo[i].insn_off == sub[s].start) { 6945 sub[s].linfo_idx = i; 6946 s++; 6947 } else if (sub[s].start < linfo[i].insn_off) { 6948 verbose(env, "missing bpf_line_info for func#%u\n", s); 6949 err = -EINVAL; 6950 goto err_free; 6951 } 6952 } 6953 6954 prev_offset = linfo[i].insn_off; 6955 ulinfo += rec_size; 6956 } 6957 6958 if (s != env->subprog_cnt) { 6959 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", 6960 env->subprog_cnt - s, s); 6961 err = -EINVAL; 6962 goto err_free; 6963 } 6964 6965 prog->aux->linfo = linfo; 6966 prog->aux->nr_linfo = nr_linfo; 6967 6968 return 0; 6969 6970 err_free: 6971 kvfree(linfo); 6972 return err; 6973 } 6974 6975 static int check_btf_info(struct bpf_verifier_env *env, 6976 const union bpf_attr *attr, 6977 union bpf_attr __user *uattr) 6978 { 6979 struct btf *btf; 6980 int err; 6981 6982 if (!attr->func_info_cnt && !attr->line_info_cnt) 6983 return 0; 6984 6985 btf = btf_get_by_fd(attr->prog_btf_fd); 6986 if (IS_ERR(btf)) 6987 return PTR_ERR(btf); 6988 env->prog->aux->btf = btf; 6989 6990 err = check_btf_func(env, attr, uattr); 6991 if (err) 6992 return err; 6993 6994 err = check_btf_line(env, attr, uattr); 6995 if (err) 6996 return err; 6997 6998 return 0; 6999 } 7000 7001 /* check %cur's range satisfies %old's */ 7002 static bool range_within(struct bpf_reg_state *old, 7003 struct bpf_reg_state *cur) 7004 { 7005 return old->umin_value <= cur->umin_value && 7006 old->umax_value >= cur->umax_value && 7007 old->smin_value <= cur->smin_value && 7008 old->smax_value >= cur->smax_value; 7009 } 7010 7011 /* Maximum number of register states that can exist at once */ 7012 #define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) 7013 struct idpair { 7014 u32 old; 7015 u32 cur; 7016 }; 7017 7018 /* If in the old state two registers had the same id, then they need to have 7019 * the same id in the new state as well. But that id could be different from 7020 * the old state, so we need to track the mapping from old to new ids. 7021 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 7022 * regs with old id 5 must also have new id 9 for the new state to be safe. But 7023 * regs with a different old id could still have new id 9, we don't care about 7024 * that. 7025 * So we look through our idmap to see if this old id has been seen before. If 7026 * so, we require the new id to match; otherwise, we add the id pair to the map. 7027 */ 7028 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) 7029 { 7030 unsigned int i; 7031 7032 for (i = 0; i < ID_MAP_SIZE; i++) { 7033 if (!idmap[i].old) { 7034 /* Reached an empty slot; haven't seen this id before */ 7035 idmap[i].old = old_id; 7036 idmap[i].cur = cur_id; 7037 return true; 7038 } 7039 if (idmap[i].old == old_id) 7040 return idmap[i].cur == cur_id; 7041 } 7042 /* We ran out of idmap slots, which should be impossible */ 7043 WARN_ON_ONCE(1); 7044 return false; 7045 } 7046 7047 static void clean_func_state(struct bpf_verifier_env *env, 7048 struct bpf_func_state *st) 7049 { 7050 enum bpf_reg_liveness live; 7051 int i, j; 7052 7053 for (i = 0; i < BPF_REG_FP; i++) { 7054 live = st->regs[i].live; 7055 /* liveness must not touch this register anymore */ 7056 st->regs[i].live |= REG_LIVE_DONE; 7057 if (!(live & REG_LIVE_READ)) 7058 /* since the register is unused, clear its state 7059 * to make further comparison simpler 7060 */ 7061 __mark_reg_not_init(env, &st->regs[i]); 7062 } 7063 7064 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 7065 live = st->stack[i].spilled_ptr.live; 7066 /* liveness must not touch this stack slot anymore */ 7067 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; 7068 if (!(live & REG_LIVE_READ)) { 7069 __mark_reg_not_init(env, &st->stack[i].spilled_ptr); 7070 for (j = 0; j < BPF_REG_SIZE; j++) 7071 st->stack[i].slot_type[j] = STACK_INVALID; 7072 } 7073 } 7074 } 7075 7076 static void clean_verifier_state(struct bpf_verifier_env *env, 7077 struct bpf_verifier_state *st) 7078 { 7079 int i; 7080 7081 if (st->frame[0]->regs[0].live & REG_LIVE_DONE) 7082 /* all regs in this state in all frames were already marked */ 7083 return; 7084 7085 for (i = 0; i <= st->curframe; i++) 7086 clean_func_state(env, st->frame[i]); 7087 } 7088 7089 /* the parentage chains form a tree. 7090 * the verifier states are added to state lists at given insn and 7091 * pushed into state stack for future exploration. 7092 * when the verifier reaches bpf_exit insn some of the verifer states 7093 * stored in the state lists have their final liveness state already, 7094 * but a lot of states will get revised from liveness point of view when 7095 * the verifier explores other branches. 7096 * Example: 7097 * 1: r0 = 1 7098 * 2: if r1 == 100 goto pc+1 7099 * 3: r0 = 2 7100 * 4: exit 7101 * when the verifier reaches exit insn the register r0 in the state list of 7102 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch 7103 * of insn 2 and goes exploring further. At the insn 4 it will walk the 7104 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. 7105 * 7106 * Since the verifier pushes the branch states as it sees them while exploring 7107 * the program the condition of walking the branch instruction for the second 7108 * time means that all states below this branch were already explored and 7109 * their final liveness markes are already propagated. 7110 * Hence when the verifier completes the search of state list in is_state_visited() 7111 * we can call this clean_live_states() function to mark all liveness states 7112 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' 7113 * will not be used. 7114 * This function also clears the registers and stack for states that !READ 7115 * to simplify state merging. 7116 * 7117 * Important note here that walking the same branch instruction in the callee 7118 * doesn't meant that the states are DONE. The verifier has to compare 7119 * the callsites 7120 */ 7121 static void clean_live_states(struct bpf_verifier_env *env, int insn, 7122 struct bpf_verifier_state *cur) 7123 { 7124 struct bpf_verifier_state_list *sl; 7125 int i; 7126 7127 sl = *explored_state(env, insn); 7128 while (sl) { 7129 if (sl->state.branches) 7130 goto next; 7131 if (sl->state.insn_idx != insn || 7132 sl->state.curframe != cur->curframe) 7133 goto next; 7134 for (i = 0; i <= cur->curframe; i++) 7135 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) 7136 goto next; 7137 clean_verifier_state(env, &sl->state); 7138 next: 7139 sl = sl->next; 7140 } 7141 } 7142 7143 /* Returns true if (rold safe implies rcur safe) */ 7144 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, 7145 struct idpair *idmap) 7146 { 7147 bool equal; 7148 7149 if (!(rold->live & REG_LIVE_READ)) 7150 /* explored state didn't use this */ 7151 return true; 7152 7153 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; 7154 7155 if (rold->type == PTR_TO_STACK) 7156 /* two stack pointers are equal only if they're pointing to 7157 * the same stack frame, since fp-8 in foo != fp-8 in bar 7158 */ 7159 return equal && rold->frameno == rcur->frameno; 7160 7161 if (equal) 7162 return true; 7163 7164 if (rold->type == NOT_INIT) 7165 /* explored state can't have used this */ 7166 return true; 7167 if (rcur->type == NOT_INIT) 7168 return false; 7169 switch (rold->type) { 7170 case SCALAR_VALUE: 7171 if (rcur->type == SCALAR_VALUE) { 7172 if (!rold->precise && !rcur->precise) 7173 return true; 7174 /* new val must satisfy old val knowledge */ 7175 return range_within(rold, rcur) && 7176 tnum_in(rold->var_off, rcur->var_off); 7177 } else { 7178 /* We're trying to use a pointer in place of a scalar. 7179 * Even if the scalar was unbounded, this could lead to 7180 * pointer leaks because scalars are allowed to leak 7181 * while pointers are not. We could make this safe in 7182 * special cases if root is calling us, but it's 7183 * probably not worth the hassle. 7184 */ 7185 return false; 7186 } 7187 case PTR_TO_MAP_VALUE: 7188 /* If the new min/max/var_off satisfy the old ones and 7189 * everything else matches, we are OK. 7190 * 'id' is not compared, since it's only used for maps with 7191 * bpf_spin_lock inside map element and in such cases if 7192 * the rest of the prog is valid for one map element then 7193 * it's valid for all map elements regardless of the key 7194 * used in bpf_map_lookup() 7195 */ 7196 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 7197 range_within(rold, rcur) && 7198 tnum_in(rold->var_off, rcur->var_off); 7199 case PTR_TO_MAP_VALUE_OR_NULL: 7200 /* a PTR_TO_MAP_VALUE could be safe to use as a 7201 * PTR_TO_MAP_VALUE_OR_NULL into the same map. 7202 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- 7203 * checked, doing so could have affected others with the same 7204 * id, and we can't check for that because we lost the id when 7205 * we converted to a PTR_TO_MAP_VALUE. 7206 */ 7207 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) 7208 return false; 7209 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) 7210 return false; 7211 /* Check our ids match any regs they're supposed to */ 7212 return check_ids(rold->id, rcur->id, idmap); 7213 case PTR_TO_PACKET_META: 7214 case PTR_TO_PACKET: 7215 if (rcur->type != rold->type) 7216 return false; 7217 /* We must have at least as much range as the old ptr 7218 * did, so that any accesses which were safe before are 7219 * still safe. This is true even if old range < old off, 7220 * since someone could have accessed through (ptr - k), or 7221 * even done ptr -= k in a register, to get a safe access. 7222 */ 7223 if (rold->range > rcur->range) 7224 return false; 7225 /* If the offsets don't match, we can't trust our alignment; 7226 * nor can we be sure that we won't fall out of range. 7227 */ 7228 if (rold->off != rcur->off) 7229 return false; 7230 /* id relations must be preserved */ 7231 if (rold->id && !check_ids(rold->id, rcur->id, idmap)) 7232 return false; 7233 /* new val must satisfy old val knowledge */ 7234 return range_within(rold, rcur) && 7235 tnum_in(rold->var_off, rcur->var_off); 7236 case PTR_TO_CTX: 7237 case CONST_PTR_TO_MAP: 7238 case PTR_TO_PACKET_END: 7239 case PTR_TO_FLOW_KEYS: 7240 case PTR_TO_SOCKET: 7241 case PTR_TO_SOCKET_OR_NULL: 7242 case PTR_TO_SOCK_COMMON: 7243 case PTR_TO_SOCK_COMMON_OR_NULL: 7244 case PTR_TO_TCP_SOCK: 7245 case PTR_TO_TCP_SOCK_OR_NULL: 7246 case PTR_TO_XDP_SOCK: 7247 /* Only valid matches are exact, which memcmp() above 7248 * would have accepted 7249 */ 7250 default: 7251 /* Don't know what's going on, just say it's not safe */ 7252 return false; 7253 } 7254 7255 /* Shouldn't get here; if we do, say it's not safe */ 7256 WARN_ON_ONCE(1); 7257 return false; 7258 } 7259 7260 static bool stacksafe(struct bpf_func_state *old, 7261 struct bpf_func_state *cur, 7262 struct idpair *idmap) 7263 { 7264 int i, spi; 7265 7266 /* walk slots of the explored stack and ignore any additional 7267 * slots in the current stack, since explored(safe) state 7268 * didn't use them 7269 */ 7270 for (i = 0; i < old->allocated_stack; i++) { 7271 spi = i / BPF_REG_SIZE; 7272 7273 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { 7274 i += BPF_REG_SIZE - 1; 7275 /* explored state didn't use this */ 7276 continue; 7277 } 7278 7279 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) 7280 continue; 7281 7282 /* explored stack has more populated slots than current stack 7283 * and these slots were used 7284 */ 7285 if (i >= cur->allocated_stack) 7286 return false; 7287 7288 /* if old state was safe with misc data in the stack 7289 * it will be safe with zero-initialized stack. 7290 * The opposite is not true 7291 */ 7292 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 7293 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 7294 continue; 7295 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 7296 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 7297 /* Ex: old explored (safe) state has STACK_SPILL in 7298 * this stack slot, but current has has STACK_MISC -> 7299 * this verifier states are not equivalent, 7300 * return false to continue verification of this path 7301 */ 7302 return false; 7303 if (i % BPF_REG_SIZE) 7304 continue; 7305 if (old->stack[spi].slot_type[0] != STACK_SPILL) 7306 continue; 7307 if (!regsafe(&old->stack[spi].spilled_ptr, 7308 &cur->stack[spi].spilled_ptr, 7309 idmap)) 7310 /* when explored and current stack slot are both storing 7311 * spilled registers, check that stored pointers types 7312 * are the same as well. 7313 * Ex: explored safe path could have stored 7314 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 7315 * but current path has stored: 7316 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 7317 * such verifier states are not equivalent. 7318 * return false to continue verification of this path 7319 */ 7320 return false; 7321 } 7322 return true; 7323 } 7324 7325 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) 7326 { 7327 if (old->acquired_refs != cur->acquired_refs) 7328 return false; 7329 return !memcmp(old->refs, cur->refs, 7330 sizeof(*old->refs) * old->acquired_refs); 7331 } 7332 7333 /* compare two verifier states 7334 * 7335 * all states stored in state_list are known to be valid, since 7336 * verifier reached 'bpf_exit' instruction through them 7337 * 7338 * this function is called when verifier exploring different branches of 7339 * execution popped from the state stack. If it sees an old state that has 7340 * more strict register state and more strict stack state then this execution 7341 * branch doesn't need to be explored further, since verifier already 7342 * concluded that more strict state leads to valid finish. 7343 * 7344 * Therefore two states are equivalent if register state is more conservative 7345 * and explored stack state is more conservative than the current one. 7346 * Example: 7347 * explored current 7348 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 7349 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 7350 * 7351 * In other words if current stack state (one being explored) has more 7352 * valid slots than old one that already passed validation, it means 7353 * the verifier can stop exploring and conclude that current state is valid too 7354 * 7355 * Similarly with registers. If explored state has register type as invalid 7356 * whereas register type in current state is meaningful, it means that 7357 * the current state will reach 'bpf_exit' instruction safely 7358 */ 7359 static bool func_states_equal(struct bpf_func_state *old, 7360 struct bpf_func_state *cur) 7361 { 7362 struct idpair *idmap; 7363 bool ret = false; 7364 int i; 7365 7366 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); 7367 /* If we failed to allocate the idmap, just say it's not safe */ 7368 if (!idmap) 7369 return false; 7370 7371 for (i = 0; i < MAX_BPF_REG; i++) { 7372 if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) 7373 goto out_free; 7374 } 7375 7376 if (!stacksafe(old, cur, idmap)) 7377 goto out_free; 7378 7379 if (!refsafe(old, cur)) 7380 goto out_free; 7381 ret = true; 7382 out_free: 7383 kfree(idmap); 7384 return ret; 7385 } 7386 7387 static bool states_equal(struct bpf_verifier_env *env, 7388 struct bpf_verifier_state *old, 7389 struct bpf_verifier_state *cur) 7390 { 7391 int i; 7392 7393 if (old->curframe != cur->curframe) 7394 return false; 7395 7396 /* Verification state from speculative execution simulation 7397 * must never prune a non-speculative execution one. 7398 */ 7399 if (old->speculative && !cur->speculative) 7400 return false; 7401 7402 if (old->active_spin_lock != cur->active_spin_lock) 7403 return false; 7404 7405 /* for states to be equal callsites have to be the same 7406 * and all frame states need to be equivalent 7407 */ 7408 for (i = 0; i <= old->curframe; i++) { 7409 if (old->frame[i]->callsite != cur->frame[i]->callsite) 7410 return false; 7411 if (!func_states_equal(old->frame[i], cur->frame[i])) 7412 return false; 7413 } 7414 return true; 7415 } 7416 7417 /* Return 0 if no propagation happened. Return negative error code if error 7418 * happened. Otherwise, return the propagated bit. 7419 */ 7420 static int propagate_liveness_reg(struct bpf_verifier_env *env, 7421 struct bpf_reg_state *reg, 7422 struct bpf_reg_state *parent_reg) 7423 { 7424 u8 parent_flag = parent_reg->live & REG_LIVE_READ; 7425 u8 flag = reg->live & REG_LIVE_READ; 7426 int err; 7427 7428 /* When comes here, read flags of PARENT_REG or REG could be any of 7429 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need 7430 * of propagation if PARENT_REG has strongest REG_LIVE_READ64. 7431 */ 7432 if (parent_flag == REG_LIVE_READ64 || 7433 /* Or if there is no read flag from REG. */ 7434 !flag || 7435 /* Or if the read flag from REG is the same as PARENT_REG. */ 7436 parent_flag == flag) 7437 return 0; 7438 7439 err = mark_reg_read(env, reg, parent_reg, flag); 7440 if (err) 7441 return err; 7442 7443 return flag; 7444 } 7445 7446 /* A write screens off any subsequent reads; but write marks come from the 7447 * straight-line code between a state and its parent. When we arrive at an 7448 * equivalent state (jump target or such) we didn't arrive by the straight-line 7449 * code, so read marks in the state must propagate to the parent regardless 7450 * of the state's write marks. That's what 'parent == state->parent' comparison 7451 * in mark_reg_read() is for. 7452 */ 7453 static int propagate_liveness(struct bpf_verifier_env *env, 7454 const struct bpf_verifier_state *vstate, 7455 struct bpf_verifier_state *vparent) 7456 { 7457 struct bpf_reg_state *state_reg, *parent_reg; 7458 struct bpf_func_state *state, *parent; 7459 int i, frame, err = 0; 7460 7461 if (vparent->curframe != vstate->curframe) { 7462 WARN(1, "propagate_live: parent frame %d current frame %d\n", 7463 vparent->curframe, vstate->curframe); 7464 return -EFAULT; 7465 } 7466 /* Propagate read liveness of registers... */ 7467 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 7468 for (frame = 0; frame <= vstate->curframe; frame++) { 7469 parent = vparent->frame[frame]; 7470 state = vstate->frame[frame]; 7471 parent_reg = parent->regs; 7472 state_reg = state->regs; 7473 /* We don't need to worry about FP liveness, it's read-only */ 7474 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { 7475 err = propagate_liveness_reg(env, &state_reg[i], 7476 &parent_reg[i]); 7477 if (err < 0) 7478 return err; 7479 if (err == REG_LIVE_READ64) 7480 mark_insn_zext(env, &parent_reg[i]); 7481 } 7482 7483 /* Propagate stack slots. */ 7484 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && 7485 i < parent->allocated_stack / BPF_REG_SIZE; i++) { 7486 parent_reg = &parent->stack[i].spilled_ptr; 7487 state_reg = &state->stack[i].spilled_ptr; 7488 err = propagate_liveness_reg(env, state_reg, 7489 parent_reg); 7490 if (err < 0) 7491 return err; 7492 } 7493 } 7494 return 0; 7495 } 7496 7497 /* find precise scalars in the previous equivalent state and 7498 * propagate them into the current state 7499 */ 7500 static int propagate_precision(struct bpf_verifier_env *env, 7501 const struct bpf_verifier_state *old) 7502 { 7503 struct bpf_reg_state *state_reg; 7504 struct bpf_func_state *state; 7505 int i, err = 0; 7506 7507 state = old->frame[old->curframe]; 7508 state_reg = state->regs; 7509 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 7510 if (state_reg->type != SCALAR_VALUE || 7511 !state_reg->precise) 7512 continue; 7513 if (env->log.level & BPF_LOG_LEVEL2) 7514 verbose(env, "propagating r%d\n", i); 7515 err = mark_chain_precision(env, i); 7516 if (err < 0) 7517 return err; 7518 } 7519 7520 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 7521 if (state->stack[i].slot_type[0] != STACK_SPILL) 7522 continue; 7523 state_reg = &state->stack[i].spilled_ptr; 7524 if (state_reg->type != SCALAR_VALUE || 7525 !state_reg->precise) 7526 continue; 7527 if (env->log.level & BPF_LOG_LEVEL2) 7528 verbose(env, "propagating fp%d\n", 7529 (-i - 1) * BPF_REG_SIZE); 7530 err = mark_chain_precision_stack(env, i); 7531 if (err < 0) 7532 return err; 7533 } 7534 return 0; 7535 } 7536 7537 static bool states_maybe_looping(struct bpf_verifier_state *old, 7538 struct bpf_verifier_state *cur) 7539 { 7540 struct bpf_func_state *fold, *fcur; 7541 int i, fr = cur->curframe; 7542 7543 if (old->curframe != fr) 7544 return false; 7545 7546 fold = old->frame[fr]; 7547 fcur = cur->frame[fr]; 7548 for (i = 0; i < MAX_BPF_REG; i++) 7549 if (memcmp(&fold->regs[i], &fcur->regs[i], 7550 offsetof(struct bpf_reg_state, parent))) 7551 return false; 7552 return true; 7553 } 7554 7555 7556 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 7557 { 7558 struct bpf_verifier_state_list *new_sl; 7559 struct bpf_verifier_state_list *sl, **pprev; 7560 struct bpf_verifier_state *cur = env->cur_state, *new; 7561 int i, j, err, states_cnt = 0; 7562 bool add_new_state = env->test_state_freq ? true : false; 7563 7564 cur->last_insn_idx = env->prev_insn_idx; 7565 if (!env->insn_aux_data[insn_idx].prune_point) 7566 /* this 'insn_idx' instruction wasn't marked, so we will not 7567 * be doing state search here 7568 */ 7569 return 0; 7570 7571 /* bpf progs typically have pruning point every 4 instructions 7572 * http://vger.kernel.org/bpfconf2019.html#session-1 7573 * Do not add new state for future pruning if the verifier hasn't seen 7574 * at least 2 jumps and at least 8 instructions. 7575 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 7576 * In tests that amounts to up to 50% reduction into total verifier 7577 * memory consumption and 20% verifier time speedup. 7578 */ 7579 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 7580 env->insn_processed - env->prev_insn_processed >= 8) 7581 add_new_state = true; 7582 7583 pprev = explored_state(env, insn_idx); 7584 sl = *pprev; 7585 7586 clean_live_states(env, insn_idx, cur); 7587 7588 while (sl) { 7589 states_cnt++; 7590 if (sl->state.insn_idx != insn_idx) 7591 goto next; 7592 if (sl->state.branches) { 7593 if (states_maybe_looping(&sl->state, cur) && 7594 states_equal(env, &sl->state, cur)) { 7595 verbose_linfo(env, insn_idx, "; "); 7596 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 7597 return -EINVAL; 7598 } 7599 /* if the verifier is processing a loop, avoid adding new state 7600 * too often, since different loop iterations have distinct 7601 * states and may not help future pruning. 7602 * This threshold shouldn't be too low to make sure that 7603 * a loop with large bound will be rejected quickly. 7604 * The most abusive loop will be: 7605 * r1 += 1 7606 * if r1 < 1000000 goto pc-2 7607 * 1M insn_procssed limit / 100 == 10k peak states. 7608 * This threshold shouldn't be too high either, since states 7609 * at the end of the loop are likely to be useful in pruning. 7610 */ 7611 if (env->jmps_processed - env->prev_jmps_processed < 20 && 7612 env->insn_processed - env->prev_insn_processed < 100) 7613 add_new_state = false; 7614 goto miss; 7615 } 7616 if (states_equal(env, &sl->state, cur)) { 7617 sl->hit_cnt++; 7618 /* reached equivalent register/stack state, 7619 * prune the search. 7620 * Registers read by the continuation are read by us. 7621 * If we have any write marks in env->cur_state, they 7622 * will prevent corresponding reads in the continuation 7623 * from reaching our parent (an explored_state). Our 7624 * own state will get the read marks recorded, but 7625 * they'll be immediately forgotten as we're pruning 7626 * this state and will pop a new one. 7627 */ 7628 err = propagate_liveness(env, &sl->state, cur); 7629 7630 /* if previous state reached the exit with precision and 7631 * current state is equivalent to it (except precsion marks) 7632 * the precision needs to be propagated back in 7633 * the current state. 7634 */ 7635 err = err ? : push_jmp_history(env, cur); 7636 err = err ? : propagate_precision(env, &sl->state); 7637 if (err) 7638 return err; 7639 return 1; 7640 } 7641 miss: 7642 /* when new state is not going to be added do not increase miss count. 7643 * Otherwise several loop iterations will remove the state 7644 * recorded earlier. The goal of these heuristics is to have 7645 * states from some iterations of the loop (some in the beginning 7646 * and some at the end) to help pruning. 7647 */ 7648 if (add_new_state) 7649 sl->miss_cnt++; 7650 /* heuristic to determine whether this state is beneficial 7651 * to keep checking from state equivalence point of view. 7652 * Higher numbers increase max_states_per_insn and verification time, 7653 * but do not meaningfully decrease insn_processed. 7654 */ 7655 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { 7656 /* the state is unlikely to be useful. Remove it to 7657 * speed up verification 7658 */ 7659 *pprev = sl->next; 7660 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { 7661 u32 br = sl->state.branches; 7662 7663 WARN_ONCE(br, 7664 "BUG live_done but branches_to_explore %d\n", 7665 br); 7666 free_verifier_state(&sl->state, false); 7667 kfree(sl); 7668 env->peak_states--; 7669 } else { 7670 /* cannot free this state, since parentage chain may 7671 * walk it later. Add it for free_list instead to 7672 * be freed at the end of verification 7673 */ 7674 sl->next = env->free_list; 7675 env->free_list = sl; 7676 } 7677 sl = *pprev; 7678 continue; 7679 } 7680 next: 7681 pprev = &sl->next; 7682 sl = *pprev; 7683 } 7684 7685 if (env->max_states_per_insn < states_cnt) 7686 env->max_states_per_insn = states_cnt; 7687 7688 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 7689 return push_jmp_history(env, cur); 7690 7691 if (!add_new_state) 7692 return push_jmp_history(env, cur); 7693 7694 /* There were no equivalent states, remember the current one. 7695 * Technically the current state is not proven to be safe yet, 7696 * but it will either reach outer most bpf_exit (which means it's safe) 7697 * or it will be rejected. When there are no loops the verifier won't be 7698 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 7699 * again on the way to bpf_exit. 7700 * When looping the sl->state.branches will be > 0 and this state 7701 * will not be considered for equivalence until branches == 0. 7702 */ 7703 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 7704 if (!new_sl) 7705 return -ENOMEM; 7706 env->total_states++; 7707 env->peak_states++; 7708 env->prev_jmps_processed = env->jmps_processed; 7709 env->prev_insn_processed = env->insn_processed; 7710 7711 /* add new state to the head of linked list */ 7712 new = &new_sl->state; 7713 err = copy_verifier_state(new, cur); 7714 if (err) { 7715 free_verifier_state(new, false); 7716 kfree(new_sl); 7717 return err; 7718 } 7719 new->insn_idx = insn_idx; 7720 WARN_ONCE(new->branches != 1, 7721 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); 7722 7723 cur->parent = new; 7724 cur->first_insn_idx = insn_idx; 7725 clear_jmp_history(cur); 7726 new_sl->next = *explored_state(env, insn_idx); 7727 *explored_state(env, insn_idx) = new_sl; 7728 /* connect new state to parentage chain. Current frame needs all 7729 * registers connected. Only r6 - r9 of the callers are alive (pushed 7730 * to the stack implicitly by JITs) so in callers' frames connect just 7731 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to 7732 * the state of the call instruction (with WRITTEN set), and r0 comes 7733 * from callee with its full parentage chain, anyway. 7734 */ 7735 /* clear write marks in current state: the writes we did are not writes 7736 * our child did, so they don't screen off its reads from us. 7737 * (There are no read marks in current state, because reads always mark 7738 * their parent and current state never has children yet. Only 7739 * explored_states can get read marks.) 7740 */ 7741 for (j = 0; j <= cur->curframe; j++) { 7742 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) 7743 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; 7744 for (i = 0; i < BPF_REG_FP; i++) 7745 cur->frame[j]->regs[i].live = REG_LIVE_NONE; 7746 } 7747 7748 /* all stack frames are accessible from callee, clear them all */ 7749 for (j = 0; j <= cur->curframe; j++) { 7750 struct bpf_func_state *frame = cur->frame[j]; 7751 struct bpf_func_state *newframe = new->frame[j]; 7752 7753 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { 7754 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; 7755 frame->stack[i].spilled_ptr.parent = 7756 &newframe->stack[i].spilled_ptr; 7757 } 7758 } 7759 return 0; 7760 } 7761 7762 /* Return true if it's OK to have the same insn return a different type. */ 7763 static bool reg_type_mismatch_ok(enum bpf_reg_type type) 7764 { 7765 switch (type) { 7766 case PTR_TO_CTX: 7767 case PTR_TO_SOCKET: 7768 case PTR_TO_SOCKET_OR_NULL: 7769 case PTR_TO_SOCK_COMMON: 7770 case PTR_TO_SOCK_COMMON_OR_NULL: 7771 case PTR_TO_TCP_SOCK: 7772 case PTR_TO_TCP_SOCK_OR_NULL: 7773 case PTR_TO_XDP_SOCK: 7774 case PTR_TO_BTF_ID: 7775 return false; 7776 default: 7777 return true; 7778 } 7779 } 7780 7781 /* If an instruction was previously used with particular pointer types, then we 7782 * need to be careful to avoid cases such as the below, where it may be ok 7783 * for one branch accessing the pointer, but not ok for the other branch: 7784 * 7785 * R1 = sock_ptr 7786 * goto X; 7787 * ... 7788 * R1 = some_other_valid_ptr; 7789 * goto X; 7790 * ... 7791 * R2 = *(u32 *)(R1 + 0); 7792 */ 7793 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) 7794 { 7795 return src != prev && (!reg_type_mismatch_ok(src) || 7796 !reg_type_mismatch_ok(prev)); 7797 } 7798 7799 static int do_check(struct bpf_verifier_env *env) 7800 { 7801 struct bpf_verifier_state *state = env->cur_state; 7802 struct bpf_insn *insns = env->prog->insnsi; 7803 struct bpf_reg_state *regs; 7804 int insn_cnt = env->prog->len; 7805 bool do_print_state = false; 7806 int prev_insn_idx = -1; 7807 7808 for (;;) { 7809 struct bpf_insn *insn; 7810 u8 class; 7811 int err; 7812 7813 env->prev_insn_idx = prev_insn_idx; 7814 if (env->insn_idx >= insn_cnt) { 7815 verbose(env, "invalid insn idx %d insn_cnt %d\n", 7816 env->insn_idx, insn_cnt); 7817 return -EFAULT; 7818 } 7819 7820 insn = &insns[env->insn_idx]; 7821 class = BPF_CLASS(insn->code); 7822 7823 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 7824 verbose(env, 7825 "BPF program is too large. Processed %d insn\n", 7826 env->insn_processed); 7827 return -E2BIG; 7828 } 7829 7830 err = is_state_visited(env, env->insn_idx); 7831 if (err < 0) 7832 return err; 7833 if (err == 1) { 7834 /* found equivalent state, can prune the search */ 7835 if (env->log.level & BPF_LOG_LEVEL) { 7836 if (do_print_state) 7837 verbose(env, "\nfrom %d to %d%s: safe\n", 7838 env->prev_insn_idx, env->insn_idx, 7839 env->cur_state->speculative ? 7840 " (speculative execution)" : ""); 7841 else 7842 verbose(env, "%d: safe\n", env->insn_idx); 7843 } 7844 goto process_bpf_exit; 7845 } 7846 7847 if (signal_pending(current)) 7848 return -EAGAIN; 7849 7850 if (need_resched()) 7851 cond_resched(); 7852 7853 if (env->log.level & BPF_LOG_LEVEL2 || 7854 (env->log.level & BPF_LOG_LEVEL && do_print_state)) { 7855 if (env->log.level & BPF_LOG_LEVEL2) 7856 verbose(env, "%d:", env->insn_idx); 7857 else 7858 verbose(env, "\nfrom %d to %d%s:", 7859 env->prev_insn_idx, env->insn_idx, 7860 env->cur_state->speculative ? 7861 " (speculative execution)" : ""); 7862 print_verifier_state(env, state->frame[state->curframe]); 7863 do_print_state = false; 7864 } 7865 7866 if (env->log.level & BPF_LOG_LEVEL) { 7867 const struct bpf_insn_cbs cbs = { 7868 .cb_print = verbose, 7869 .private_data = env, 7870 }; 7871 7872 verbose_linfo(env, env->insn_idx, "; "); 7873 verbose(env, "%d: ", env->insn_idx); 7874 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 7875 } 7876 7877 if (bpf_prog_is_dev_bound(env->prog->aux)) { 7878 err = bpf_prog_offload_verify_insn(env, env->insn_idx, 7879 env->prev_insn_idx); 7880 if (err) 7881 return err; 7882 } 7883 7884 regs = cur_regs(env); 7885 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 7886 prev_insn_idx = env->insn_idx; 7887 7888 if (class == BPF_ALU || class == BPF_ALU64) { 7889 err = check_alu_op(env, insn); 7890 if (err) 7891 return err; 7892 7893 } else if (class == BPF_LDX) { 7894 enum bpf_reg_type *prev_src_type, src_reg_type; 7895 7896 /* check for reserved fields is already done */ 7897 7898 /* check src operand */ 7899 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7900 if (err) 7901 return err; 7902 7903 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 7904 if (err) 7905 return err; 7906 7907 src_reg_type = regs[insn->src_reg].type; 7908 7909 /* check that memory (src_reg + off) is readable, 7910 * the state of dst_reg will be updated by this func 7911 */ 7912 err = check_mem_access(env, env->insn_idx, insn->src_reg, 7913 insn->off, BPF_SIZE(insn->code), 7914 BPF_READ, insn->dst_reg, false); 7915 if (err) 7916 return err; 7917 7918 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; 7919 7920 if (*prev_src_type == NOT_INIT) { 7921 /* saw a valid insn 7922 * dst_reg = *(u32 *)(src_reg + off) 7923 * save type to validate intersecting paths 7924 */ 7925 *prev_src_type = src_reg_type; 7926 7927 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { 7928 /* ABuser program is trying to use the same insn 7929 * dst_reg = *(u32*) (src_reg + off) 7930 * with different pointer types: 7931 * src_reg == ctx in one branch and 7932 * src_reg == stack|map in some other branch. 7933 * Reject it. 7934 */ 7935 verbose(env, "same insn cannot be used with different pointers\n"); 7936 return -EINVAL; 7937 } 7938 7939 } else if (class == BPF_STX) { 7940 enum bpf_reg_type *prev_dst_type, dst_reg_type; 7941 7942 if (BPF_MODE(insn->code) == BPF_XADD) { 7943 err = check_xadd(env, env->insn_idx, insn); 7944 if (err) 7945 return err; 7946 env->insn_idx++; 7947 continue; 7948 } 7949 7950 /* check src1 operand */ 7951 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7952 if (err) 7953 return err; 7954 /* check src2 operand */ 7955 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7956 if (err) 7957 return err; 7958 7959 dst_reg_type = regs[insn->dst_reg].type; 7960 7961 /* check that memory (dst_reg + off) is writeable */ 7962 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 7963 insn->off, BPF_SIZE(insn->code), 7964 BPF_WRITE, insn->src_reg, false); 7965 if (err) 7966 return err; 7967 7968 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; 7969 7970 if (*prev_dst_type == NOT_INIT) { 7971 *prev_dst_type = dst_reg_type; 7972 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { 7973 verbose(env, "same insn cannot be used with different pointers\n"); 7974 return -EINVAL; 7975 } 7976 7977 } else if (class == BPF_ST) { 7978 if (BPF_MODE(insn->code) != BPF_MEM || 7979 insn->src_reg != BPF_REG_0) { 7980 verbose(env, "BPF_ST uses reserved fields\n"); 7981 return -EINVAL; 7982 } 7983 /* check src operand */ 7984 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7985 if (err) 7986 return err; 7987 7988 if (is_ctx_reg(env, insn->dst_reg)) { 7989 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 7990 insn->dst_reg, 7991 reg_type_str[reg_state(env, insn->dst_reg)->type]); 7992 return -EACCES; 7993 } 7994 7995 /* check that memory (dst_reg + off) is writeable */ 7996 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 7997 insn->off, BPF_SIZE(insn->code), 7998 BPF_WRITE, -1, false); 7999 if (err) 8000 return err; 8001 8002 } else if (class == BPF_JMP || class == BPF_JMP32) { 8003 u8 opcode = BPF_OP(insn->code); 8004 8005 env->jmps_processed++; 8006 if (opcode == BPF_CALL) { 8007 if (BPF_SRC(insn->code) != BPF_K || 8008 insn->off != 0 || 8009 (insn->src_reg != BPF_REG_0 && 8010 insn->src_reg != BPF_PSEUDO_CALL) || 8011 insn->dst_reg != BPF_REG_0 || 8012 class == BPF_JMP32) { 8013 verbose(env, "BPF_CALL uses reserved fields\n"); 8014 return -EINVAL; 8015 } 8016 8017 if (env->cur_state->active_spin_lock && 8018 (insn->src_reg == BPF_PSEUDO_CALL || 8019 insn->imm != BPF_FUNC_spin_unlock)) { 8020 verbose(env, "function calls are not allowed while holding a lock\n"); 8021 return -EINVAL; 8022 } 8023 if (insn->src_reg == BPF_PSEUDO_CALL) 8024 err = check_func_call(env, insn, &env->insn_idx); 8025 else 8026 err = check_helper_call(env, insn->imm, env->insn_idx); 8027 if (err) 8028 return err; 8029 8030 } else if (opcode == BPF_JA) { 8031 if (BPF_SRC(insn->code) != BPF_K || 8032 insn->imm != 0 || 8033 insn->src_reg != BPF_REG_0 || 8034 insn->dst_reg != BPF_REG_0 || 8035 class == BPF_JMP32) { 8036 verbose(env, "BPF_JA uses reserved fields\n"); 8037 return -EINVAL; 8038 } 8039 8040 env->insn_idx += insn->off + 1; 8041 continue; 8042 8043 } else if (opcode == BPF_EXIT) { 8044 if (BPF_SRC(insn->code) != BPF_K || 8045 insn->imm != 0 || 8046 insn->src_reg != BPF_REG_0 || 8047 insn->dst_reg != BPF_REG_0 || 8048 class == BPF_JMP32) { 8049 verbose(env, "BPF_EXIT uses reserved fields\n"); 8050 return -EINVAL; 8051 } 8052 8053 if (env->cur_state->active_spin_lock) { 8054 verbose(env, "bpf_spin_unlock is missing\n"); 8055 return -EINVAL; 8056 } 8057 8058 if (state->curframe) { 8059 /* exit from nested function */ 8060 err = prepare_func_exit(env, &env->insn_idx); 8061 if (err) 8062 return err; 8063 do_print_state = true; 8064 continue; 8065 } 8066 8067 err = check_reference_leak(env); 8068 if (err) 8069 return err; 8070 8071 err = check_return_code(env); 8072 if (err) 8073 return err; 8074 process_bpf_exit: 8075 update_branch_counts(env, env->cur_state); 8076 err = pop_stack(env, &prev_insn_idx, 8077 &env->insn_idx); 8078 if (err < 0) { 8079 if (err != -ENOENT) 8080 return err; 8081 break; 8082 } else { 8083 do_print_state = true; 8084 continue; 8085 } 8086 } else { 8087 err = check_cond_jmp_op(env, insn, &env->insn_idx); 8088 if (err) 8089 return err; 8090 } 8091 } else if (class == BPF_LD) { 8092 u8 mode = BPF_MODE(insn->code); 8093 8094 if (mode == BPF_ABS || mode == BPF_IND) { 8095 err = check_ld_abs(env, insn); 8096 if (err) 8097 return err; 8098 8099 } else if (mode == BPF_IMM) { 8100 err = check_ld_imm(env, insn); 8101 if (err) 8102 return err; 8103 8104 env->insn_idx++; 8105 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 8106 } else { 8107 verbose(env, "invalid BPF_LD mode\n"); 8108 return -EINVAL; 8109 } 8110 } else { 8111 verbose(env, "unknown insn class %d\n", class); 8112 return -EINVAL; 8113 } 8114 8115 env->insn_idx++; 8116 } 8117 8118 return 0; 8119 } 8120 8121 static int check_map_prealloc(struct bpf_map *map) 8122 { 8123 return (map->map_type != BPF_MAP_TYPE_HASH && 8124 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 8125 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || 8126 !(map->map_flags & BPF_F_NO_PREALLOC); 8127 } 8128 8129 static bool is_tracing_prog_type(enum bpf_prog_type type) 8130 { 8131 switch (type) { 8132 case BPF_PROG_TYPE_KPROBE: 8133 case BPF_PROG_TYPE_TRACEPOINT: 8134 case BPF_PROG_TYPE_PERF_EVENT: 8135 case BPF_PROG_TYPE_RAW_TRACEPOINT: 8136 return true; 8137 default: 8138 return false; 8139 } 8140 } 8141 8142 static int check_map_prog_compatibility(struct bpf_verifier_env *env, 8143 struct bpf_map *map, 8144 struct bpf_prog *prog) 8145 8146 { 8147 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use 8148 * preallocated hash maps, since doing memory allocation 8149 * in overflow_handler can crash depending on where nmi got 8150 * triggered. 8151 */ 8152 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { 8153 if (!check_map_prealloc(map)) { 8154 verbose(env, "perf_event programs can only use preallocated hash map\n"); 8155 return -EINVAL; 8156 } 8157 if (map->inner_map_meta && 8158 !check_map_prealloc(map->inner_map_meta)) { 8159 verbose(env, "perf_event programs can only use preallocated inner hash map\n"); 8160 return -EINVAL; 8161 } 8162 } 8163 8164 if ((is_tracing_prog_type(prog->type) || 8165 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && 8166 map_value_has_spin_lock(map)) { 8167 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 8168 return -EINVAL; 8169 } 8170 8171 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 8172 !bpf_offload_prog_map_match(prog, map)) { 8173 verbose(env, "offload device mismatch between prog and map\n"); 8174 return -EINVAL; 8175 } 8176 8177 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 8178 verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 8179 return -EINVAL; 8180 } 8181 8182 return 0; 8183 } 8184 8185 static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 8186 { 8187 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 8188 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 8189 } 8190 8191 /* look for pseudo eBPF instructions that access map FDs and 8192 * replace them with actual map pointers 8193 */ 8194 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) 8195 { 8196 struct bpf_insn *insn = env->prog->insnsi; 8197 int insn_cnt = env->prog->len; 8198 int i, j, err; 8199 8200 err = bpf_prog_calc_tag(env->prog); 8201 if (err) 8202 return err; 8203 8204 for (i = 0; i < insn_cnt; i++, insn++) { 8205 if (BPF_CLASS(insn->code) == BPF_LDX && 8206 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { 8207 verbose(env, "BPF_LDX uses reserved fields\n"); 8208 return -EINVAL; 8209 } 8210 8211 if (BPF_CLASS(insn->code) == BPF_STX && 8212 ((BPF_MODE(insn->code) != BPF_MEM && 8213 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { 8214 verbose(env, "BPF_STX uses reserved fields\n"); 8215 return -EINVAL; 8216 } 8217 8218 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 8219 struct bpf_insn_aux_data *aux; 8220 struct bpf_map *map; 8221 struct fd f; 8222 u64 addr; 8223 8224 if (i == insn_cnt - 1 || insn[1].code != 0 || 8225 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 8226 insn[1].off != 0) { 8227 verbose(env, "invalid bpf_ld_imm64 insn\n"); 8228 return -EINVAL; 8229 } 8230 8231 if (insn[0].src_reg == 0) 8232 /* valid generic load 64-bit imm */ 8233 goto next_insn; 8234 8235 /* In final convert_pseudo_ld_imm64() step, this is 8236 * converted into regular 64-bit imm load insn. 8237 */ 8238 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && 8239 insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || 8240 (insn[0].src_reg == BPF_PSEUDO_MAP_FD && 8241 insn[1].imm != 0)) { 8242 verbose(env, 8243 "unrecognized bpf_ld_imm64 insn\n"); 8244 return -EINVAL; 8245 } 8246 8247 f = fdget(insn[0].imm); 8248 map = __bpf_map_get(f); 8249 if (IS_ERR(map)) { 8250 verbose(env, "fd %d is not pointing to valid bpf_map\n", 8251 insn[0].imm); 8252 return PTR_ERR(map); 8253 } 8254 8255 err = check_map_prog_compatibility(env, map, env->prog); 8256 if (err) { 8257 fdput(f); 8258 return err; 8259 } 8260 8261 aux = &env->insn_aux_data[i]; 8262 if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 8263 addr = (unsigned long)map; 8264 } else { 8265 u32 off = insn[1].imm; 8266 8267 if (off >= BPF_MAX_VAR_OFF) { 8268 verbose(env, "direct value offset of %u is not allowed\n", off); 8269 fdput(f); 8270 return -EINVAL; 8271 } 8272 8273 if (!map->ops->map_direct_value_addr) { 8274 verbose(env, "no direct value access support for this map type\n"); 8275 fdput(f); 8276 return -EINVAL; 8277 } 8278 8279 err = map->ops->map_direct_value_addr(map, &addr, off); 8280 if (err) { 8281 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", 8282 map->value_size, off); 8283 fdput(f); 8284 return err; 8285 } 8286 8287 aux->map_off = off; 8288 addr += off; 8289 } 8290 8291 insn[0].imm = (u32)addr; 8292 insn[1].imm = addr >> 32; 8293 8294 /* check whether we recorded this map already */ 8295 for (j = 0; j < env->used_map_cnt; j++) { 8296 if (env->used_maps[j] == map) { 8297 aux->map_index = j; 8298 fdput(f); 8299 goto next_insn; 8300 } 8301 } 8302 8303 if (env->used_map_cnt >= MAX_USED_MAPS) { 8304 fdput(f); 8305 return -E2BIG; 8306 } 8307 8308 /* hold the map. If the program is rejected by verifier, 8309 * the map will be released by release_maps() or it 8310 * will be used by the valid program until it's unloaded 8311 * and all maps are released in free_used_maps() 8312 */ 8313 bpf_map_inc(map); 8314 8315 aux->map_index = env->used_map_cnt; 8316 env->used_maps[env->used_map_cnt++] = map; 8317 8318 if (bpf_map_is_cgroup_storage(map) && 8319 bpf_cgroup_storage_assign(env->prog->aux, map)) { 8320 verbose(env, "only one cgroup storage of each type is allowed\n"); 8321 fdput(f); 8322 return -EBUSY; 8323 } 8324 8325 fdput(f); 8326 next_insn: 8327 insn++; 8328 i++; 8329 continue; 8330 } 8331 8332 /* Basic sanity check before we invest more work here. */ 8333 if (!bpf_opcode_in_insntable(insn->code)) { 8334 verbose(env, "unknown opcode %02x\n", insn->code); 8335 return -EINVAL; 8336 } 8337 } 8338 8339 /* now all pseudo BPF_LD_IMM64 instructions load valid 8340 * 'struct bpf_map *' into a register instead of user map_fd. 8341 * These pointers will be used later by verifier to validate map access. 8342 */ 8343 return 0; 8344 } 8345 8346 /* drop refcnt of maps used by the rejected program */ 8347 static void release_maps(struct bpf_verifier_env *env) 8348 { 8349 __bpf_free_used_maps(env->prog->aux, env->used_maps, 8350 env->used_map_cnt); 8351 } 8352 8353 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ 8354 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) 8355 { 8356 struct bpf_insn *insn = env->prog->insnsi; 8357 int insn_cnt = env->prog->len; 8358 int i; 8359 8360 for (i = 0; i < insn_cnt; i++, insn++) 8361 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) 8362 insn->src_reg = 0; 8363 } 8364 8365 /* single env->prog->insni[off] instruction was replaced with the range 8366 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 8367 * [0, off) and [off, end) to new locations, so the patched range stays zero 8368 */ 8369 static int adjust_insn_aux_data(struct bpf_verifier_env *env, 8370 struct bpf_prog *new_prog, u32 off, u32 cnt) 8371 { 8372 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; 8373 struct bpf_insn *insn = new_prog->insnsi; 8374 u32 prog_len; 8375 int i; 8376 8377 /* aux info at OFF always needs adjustment, no matter fast path 8378 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 8379 * original insn at old prog. 8380 */ 8381 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); 8382 8383 if (cnt == 1) 8384 return 0; 8385 prog_len = new_prog->len; 8386 new_data = vzalloc(array_size(prog_len, 8387 sizeof(struct bpf_insn_aux_data))); 8388 if (!new_data) 8389 return -ENOMEM; 8390 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); 8391 memcpy(new_data + off + cnt - 1, old_data + off, 8392 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 8393 for (i = off; i < off + cnt - 1; i++) { 8394 new_data[i].seen = env->pass_cnt; 8395 new_data[i].zext_dst = insn_has_def32(env, insn + i); 8396 } 8397 env->insn_aux_data = new_data; 8398 vfree(old_data); 8399 return 0; 8400 } 8401 8402 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 8403 { 8404 int i; 8405 8406 if (len == 1) 8407 return; 8408 /* NOTE: fake 'exit' subprog should be updated as well. */ 8409 for (i = 0; i <= env->subprog_cnt; i++) { 8410 if (env->subprog_info[i].start <= off) 8411 continue; 8412 env->subprog_info[i].start += len - 1; 8413 } 8414 } 8415 8416 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 8417 const struct bpf_insn *patch, u32 len) 8418 { 8419 struct bpf_prog *new_prog; 8420 8421 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 8422 if (IS_ERR(new_prog)) { 8423 if (PTR_ERR(new_prog) == -ERANGE) 8424 verbose(env, 8425 "insn %d cannot be patched due to 16-bit range\n", 8426 env->insn_aux_data[off].orig_idx); 8427 return NULL; 8428 } 8429 if (adjust_insn_aux_data(env, new_prog, off, len)) 8430 return NULL; 8431 adjust_subprog_starts(env, off, len); 8432 return new_prog; 8433 } 8434 8435 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 8436 u32 off, u32 cnt) 8437 { 8438 int i, j; 8439 8440 /* find first prog starting at or after off (first to remove) */ 8441 for (i = 0; i < env->subprog_cnt; i++) 8442 if (env->subprog_info[i].start >= off) 8443 break; 8444 /* find first prog starting at or after off + cnt (first to stay) */ 8445 for (j = i; j < env->subprog_cnt; j++) 8446 if (env->subprog_info[j].start >= off + cnt) 8447 break; 8448 /* if j doesn't start exactly at off + cnt, we are just removing 8449 * the front of previous prog 8450 */ 8451 if (env->subprog_info[j].start != off + cnt) 8452 j--; 8453 8454 if (j > i) { 8455 struct bpf_prog_aux *aux = env->prog->aux; 8456 int move; 8457 8458 /* move fake 'exit' subprog as well */ 8459 move = env->subprog_cnt + 1 - j; 8460 8461 memmove(env->subprog_info + i, 8462 env->subprog_info + j, 8463 sizeof(*env->subprog_info) * move); 8464 env->subprog_cnt -= j - i; 8465 8466 /* remove func_info */ 8467 if (aux->func_info) { 8468 move = aux->func_info_cnt - j; 8469 8470 memmove(aux->func_info + i, 8471 aux->func_info + j, 8472 sizeof(*aux->func_info) * move); 8473 aux->func_info_cnt -= j - i; 8474 /* func_info->insn_off is set after all code rewrites, 8475 * in adjust_btf_func() - no need to adjust 8476 */ 8477 } 8478 } else { 8479 /* convert i from "first prog to remove" to "first to adjust" */ 8480 if (env->subprog_info[i].start == off) 8481 i++; 8482 } 8483 8484 /* update fake 'exit' subprog as well */ 8485 for (; i <= env->subprog_cnt; i++) 8486 env->subprog_info[i].start -= cnt; 8487 8488 return 0; 8489 } 8490 8491 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 8492 u32 cnt) 8493 { 8494 struct bpf_prog *prog = env->prog; 8495 u32 i, l_off, l_cnt, nr_linfo; 8496 struct bpf_line_info *linfo; 8497 8498 nr_linfo = prog->aux->nr_linfo; 8499 if (!nr_linfo) 8500 return 0; 8501 8502 linfo = prog->aux->linfo; 8503 8504 /* find first line info to remove, count lines to be removed */ 8505 for (i = 0; i < nr_linfo; i++) 8506 if (linfo[i].insn_off >= off) 8507 break; 8508 8509 l_off = i; 8510 l_cnt = 0; 8511 for (; i < nr_linfo; i++) 8512 if (linfo[i].insn_off < off + cnt) 8513 l_cnt++; 8514 else 8515 break; 8516 8517 /* First live insn doesn't match first live linfo, it needs to "inherit" 8518 * last removed linfo. prog is already modified, so prog->len == off 8519 * means no live instructions after (tail of the program was removed). 8520 */ 8521 if (prog->len != off && l_cnt && 8522 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 8523 l_cnt--; 8524 linfo[--i].insn_off = off + cnt; 8525 } 8526 8527 /* remove the line info which refer to the removed instructions */ 8528 if (l_cnt) { 8529 memmove(linfo + l_off, linfo + i, 8530 sizeof(*linfo) * (nr_linfo - i)); 8531 8532 prog->aux->nr_linfo -= l_cnt; 8533 nr_linfo = prog->aux->nr_linfo; 8534 } 8535 8536 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 8537 for (i = l_off; i < nr_linfo; i++) 8538 linfo[i].insn_off -= cnt; 8539 8540 /* fix up all subprogs (incl. 'exit') which start >= off */ 8541 for (i = 0; i <= env->subprog_cnt; i++) 8542 if (env->subprog_info[i].linfo_idx > l_off) { 8543 /* program may have started in the removed region but 8544 * may not be fully removed 8545 */ 8546 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 8547 env->subprog_info[i].linfo_idx -= l_cnt; 8548 else 8549 env->subprog_info[i].linfo_idx = l_off; 8550 } 8551 8552 return 0; 8553 } 8554 8555 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 8556 { 8557 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8558 unsigned int orig_prog_len = env->prog->len; 8559 int err; 8560 8561 if (bpf_prog_is_dev_bound(env->prog->aux)) 8562 bpf_prog_offload_remove_insns(env, off, cnt); 8563 8564 err = bpf_remove_insns(env->prog, off, cnt); 8565 if (err) 8566 return err; 8567 8568 err = adjust_subprog_starts_after_remove(env, off, cnt); 8569 if (err) 8570 return err; 8571 8572 err = bpf_adj_linfo_after_remove(env, off, cnt); 8573 if (err) 8574 return err; 8575 8576 memmove(aux_data + off, aux_data + off + cnt, 8577 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 8578 8579 return 0; 8580 } 8581 8582 /* The verifier does more data flow analysis than llvm and will not 8583 * explore branches that are dead at run time. Malicious programs can 8584 * have dead code too. Therefore replace all dead at-run-time code 8585 * with 'ja -1'. 8586 * 8587 * Just nops are not optimal, e.g. if they would sit at the end of the 8588 * program and through another bug we would manage to jump there, then 8589 * we'd execute beyond program memory otherwise. Returning exception 8590 * code also wouldn't work since we can have subprogs where the dead 8591 * code could be located. 8592 */ 8593 static void sanitize_dead_code(struct bpf_verifier_env *env) 8594 { 8595 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8596 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); 8597 struct bpf_insn *insn = env->prog->insnsi; 8598 const int insn_cnt = env->prog->len; 8599 int i; 8600 8601 for (i = 0; i < insn_cnt; i++) { 8602 if (aux_data[i].seen) 8603 continue; 8604 memcpy(insn + i, &trap, sizeof(trap)); 8605 } 8606 } 8607 8608 static bool insn_is_cond_jump(u8 code) 8609 { 8610 u8 op; 8611 8612 if (BPF_CLASS(code) == BPF_JMP32) 8613 return true; 8614 8615 if (BPF_CLASS(code) != BPF_JMP) 8616 return false; 8617 8618 op = BPF_OP(code); 8619 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 8620 } 8621 8622 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 8623 { 8624 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8625 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 8626 struct bpf_insn *insn = env->prog->insnsi; 8627 const int insn_cnt = env->prog->len; 8628 int i; 8629 8630 for (i = 0; i < insn_cnt; i++, insn++) { 8631 if (!insn_is_cond_jump(insn->code)) 8632 continue; 8633 8634 if (!aux_data[i + 1].seen) 8635 ja.off = insn->off; 8636 else if (!aux_data[i + 1 + insn->off].seen) 8637 ja.off = 0; 8638 else 8639 continue; 8640 8641 if (bpf_prog_is_dev_bound(env->prog->aux)) 8642 bpf_prog_offload_replace_insn(env, i, &ja); 8643 8644 memcpy(insn, &ja, sizeof(ja)); 8645 } 8646 } 8647 8648 static int opt_remove_dead_code(struct bpf_verifier_env *env) 8649 { 8650 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8651 int insn_cnt = env->prog->len; 8652 int i, err; 8653 8654 for (i = 0; i < insn_cnt; i++) { 8655 int j; 8656 8657 j = 0; 8658 while (i + j < insn_cnt && !aux_data[i + j].seen) 8659 j++; 8660 if (!j) 8661 continue; 8662 8663 err = verifier_remove_insns(env, i, j); 8664 if (err) 8665 return err; 8666 insn_cnt = env->prog->len; 8667 } 8668 8669 return 0; 8670 } 8671 8672 static int opt_remove_nops(struct bpf_verifier_env *env) 8673 { 8674 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 8675 struct bpf_insn *insn = env->prog->insnsi; 8676 int insn_cnt = env->prog->len; 8677 int i, err; 8678 8679 for (i = 0; i < insn_cnt; i++) { 8680 if (memcmp(&insn[i], &ja, sizeof(ja))) 8681 continue; 8682 8683 err = verifier_remove_insns(env, i, 1); 8684 if (err) 8685 return err; 8686 insn_cnt--; 8687 i--; 8688 } 8689 8690 return 0; 8691 } 8692 8693 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 8694 const union bpf_attr *attr) 8695 { 8696 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; 8697 struct bpf_insn_aux_data *aux = env->insn_aux_data; 8698 int i, patch_len, delta = 0, len = env->prog->len; 8699 struct bpf_insn *insns = env->prog->insnsi; 8700 struct bpf_prog *new_prog; 8701 bool rnd_hi32; 8702 8703 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 8704 zext_patch[1] = BPF_ZEXT_REG(0); 8705 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 8706 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 8707 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 8708 for (i = 0; i < len; i++) { 8709 int adj_idx = i + delta; 8710 struct bpf_insn insn; 8711 8712 insn = insns[adj_idx]; 8713 if (!aux[adj_idx].zext_dst) { 8714 u8 code, class; 8715 u32 imm_rnd; 8716 8717 if (!rnd_hi32) 8718 continue; 8719 8720 code = insn.code; 8721 class = BPF_CLASS(code); 8722 if (insn_no_def(&insn)) 8723 continue; 8724 8725 /* NOTE: arg "reg" (the fourth one) is only used for 8726 * BPF_STX which has been ruled out in above 8727 * check, it is safe to pass NULL here. 8728 */ 8729 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { 8730 if (class == BPF_LD && 8731 BPF_MODE(code) == BPF_IMM) 8732 i++; 8733 continue; 8734 } 8735 8736 /* ctx load could be transformed into wider load. */ 8737 if (class == BPF_LDX && 8738 aux[adj_idx].ptr_type == PTR_TO_CTX) 8739 continue; 8740 8741 imm_rnd = get_random_int(); 8742 rnd_hi32_patch[0] = insn; 8743 rnd_hi32_patch[1].imm = imm_rnd; 8744 rnd_hi32_patch[3].dst_reg = insn.dst_reg; 8745 patch = rnd_hi32_patch; 8746 patch_len = 4; 8747 goto apply_patch_buffer; 8748 } 8749 8750 if (!bpf_jit_needs_zext()) 8751 continue; 8752 8753 zext_patch[0] = insn; 8754 zext_patch[1].dst_reg = insn.dst_reg; 8755 zext_patch[1].src_reg = insn.dst_reg; 8756 patch = zext_patch; 8757 patch_len = 2; 8758 apply_patch_buffer: 8759 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 8760 if (!new_prog) 8761 return -ENOMEM; 8762 env->prog = new_prog; 8763 insns = new_prog->insnsi; 8764 aux = env->insn_aux_data; 8765 delta += patch_len - 1; 8766 } 8767 8768 return 0; 8769 } 8770 8771 /* convert load instructions that access fields of a context type into a 8772 * sequence of instructions that access fields of the underlying structure: 8773 * struct __sk_buff -> struct sk_buff 8774 * struct bpf_sock_ops -> struct sock 8775 */ 8776 static int convert_ctx_accesses(struct bpf_verifier_env *env) 8777 { 8778 const struct bpf_verifier_ops *ops = env->ops; 8779 int i, cnt, size, ctx_field_size, delta = 0; 8780 const int insn_cnt = env->prog->len; 8781 struct bpf_insn insn_buf[16], *insn; 8782 u32 target_size, size_default, off; 8783 struct bpf_prog *new_prog; 8784 enum bpf_access_type type; 8785 bool is_narrower_load; 8786 8787 if (ops->gen_prologue || env->seen_direct_write) { 8788 if (!ops->gen_prologue) { 8789 verbose(env, "bpf verifier is misconfigured\n"); 8790 return -EINVAL; 8791 } 8792 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 8793 env->prog); 8794 if (cnt >= ARRAY_SIZE(insn_buf)) { 8795 verbose(env, "bpf verifier is misconfigured\n"); 8796 return -EINVAL; 8797 } else if (cnt) { 8798 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 8799 if (!new_prog) 8800 return -ENOMEM; 8801 8802 env->prog = new_prog; 8803 delta += cnt - 1; 8804 } 8805 } 8806 8807 if (bpf_prog_is_dev_bound(env->prog->aux)) 8808 return 0; 8809 8810 insn = env->prog->insnsi + delta; 8811 8812 for (i = 0; i < insn_cnt; i++, insn++) { 8813 bpf_convert_ctx_access_t convert_ctx_access; 8814 8815 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 8816 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 8817 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 8818 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) 8819 type = BPF_READ; 8820 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 8821 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 8822 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 8823 insn->code == (BPF_STX | BPF_MEM | BPF_DW)) 8824 type = BPF_WRITE; 8825 else 8826 continue; 8827 8828 if (type == BPF_WRITE && 8829 env->insn_aux_data[i + delta].sanitize_stack_off) { 8830 struct bpf_insn patch[] = { 8831 /* Sanitize suspicious stack slot with zero. 8832 * There are no memory dependencies for this store, 8833 * since it's only using frame pointer and immediate 8834 * constant of zero 8835 */ 8836 BPF_ST_MEM(BPF_DW, BPF_REG_FP, 8837 env->insn_aux_data[i + delta].sanitize_stack_off, 8838 0), 8839 /* the original STX instruction will immediately 8840 * overwrite the same stack slot with appropriate value 8841 */ 8842 *insn, 8843 }; 8844 8845 cnt = ARRAY_SIZE(patch); 8846 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); 8847 if (!new_prog) 8848 return -ENOMEM; 8849 8850 delta += cnt - 1; 8851 env->prog = new_prog; 8852 insn = new_prog->insnsi + i + delta; 8853 continue; 8854 } 8855 8856 switch (env->insn_aux_data[i + delta].ptr_type) { 8857 case PTR_TO_CTX: 8858 if (!ops->convert_ctx_access) 8859 continue; 8860 convert_ctx_access = ops->convert_ctx_access; 8861 break; 8862 case PTR_TO_SOCKET: 8863 case PTR_TO_SOCK_COMMON: 8864 convert_ctx_access = bpf_sock_convert_ctx_access; 8865 break; 8866 case PTR_TO_TCP_SOCK: 8867 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 8868 break; 8869 case PTR_TO_XDP_SOCK: 8870 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 8871 break; 8872 case PTR_TO_BTF_ID: 8873 if (type == BPF_READ) { 8874 insn->code = BPF_LDX | BPF_PROBE_MEM | 8875 BPF_SIZE((insn)->code); 8876 env->prog->aux->num_exentries++; 8877 } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 8878 verbose(env, "Writes through BTF pointers are not allowed\n"); 8879 return -EINVAL; 8880 } 8881 continue; 8882 default: 8883 continue; 8884 } 8885 8886 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 8887 size = BPF_LDST_BYTES(insn); 8888 8889 /* If the read access is a narrower load of the field, 8890 * convert to a 4/8-byte load, to minimum program type specific 8891 * convert_ctx_access changes. If conversion is successful, 8892 * we will apply proper mask to the result. 8893 */ 8894 is_narrower_load = size < ctx_field_size; 8895 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 8896 off = insn->off; 8897 if (is_narrower_load) { 8898 u8 size_code; 8899 8900 if (type == BPF_WRITE) { 8901 verbose(env, "bpf verifier narrow ctx access misconfigured\n"); 8902 return -EINVAL; 8903 } 8904 8905 size_code = BPF_H; 8906 if (ctx_field_size == 4) 8907 size_code = BPF_W; 8908 else if (ctx_field_size == 8) 8909 size_code = BPF_DW; 8910 8911 insn->off = off & ~(size_default - 1); 8912 insn->code = BPF_LDX | BPF_MEM | size_code; 8913 } 8914 8915 target_size = 0; 8916 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 8917 &target_size); 8918 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || 8919 (ctx_field_size && !target_size)) { 8920 verbose(env, "bpf verifier is misconfigured\n"); 8921 return -EINVAL; 8922 } 8923 8924 if (is_narrower_load && size < target_size) { 8925 u8 shift = bpf_ctx_narrow_access_offset( 8926 off, size, size_default) * 8; 8927 if (ctx_field_size <= 4) { 8928 if (shift) 8929 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 8930 insn->dst_reg, 8931 shift); 8932 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 8933 (1 << size * 8) - 1); 8934 } else { 8935 if (shift) 8936 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 8937 insn->dst_reg, 8938 shift); 8939 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, 8940 (1ULL << size * 8) - 1); 8941 } 8942 } 8943 8944 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 8945 if (!new_prog) 8946 return -ENOMEM; 8947 8948 delta += cnt - 1; 8949 8950 /* keep walking new program and skip insns we just inserted */ 8951 env->prog = new_prog; 8952 insn = new_prog->insnsi + i + delta; 8953 } 8954 8955 return 0; 8956 } 8957 8958 static int jit_subprogs(struct bpf_verifier_env *env) 8959 { 8960 struct bpf_prog *prog = env->prog, **func, *tmp; 8961 int i, j, subprog_start, subprog_end = 0, len, subprog; 8962 struct bpf_insn *insn; 8963 void *old_bpf_func; 8964 int err; 8965 8966 if (env->subprog_cnt <= 1) 8967 return 0; 8968 8969 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 8970 if (insn->code != (BPF_JMP | BPF_CALL) || 8971 insn->src_reg != BPF_PSEUDO_CALL) 8972 continue; 8973 /* Upon error here we cannot fall back to interpreter but 8974 * need a hard reject of the program. Thus -EFAULT is 8975 * propagated in any case. 8976 */ 8977 subprog = find_subprog(env, i + insn->imm + 1); 8978 if (subprog < 0) { 8979 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 8980 i + insn->imm + 1); 8981 return -EFAULT; 8982 } 8983 /* temporarily remember subprog id inside insn instead of 8984 * aux_data, since next loop will split up all insns into funcs 8985 */ 8986 insn->off = subprog; 8987 /* remember original imm in case JIT fails and fallback 8988 * to interpreter will be needed 8989 */ 8990 env->insn_aux_data[i].call_imm = insn->imm; 8991 /* point imm to __bpf_call_base+1 from JITs point of view */ 8992 insn->imm = 1; 8993 } 8994 8995 err = bpf_prog_alloc_jited_linfo(prog); 8996 if (err) 8997 goto out_undo_insn; 8998 8999 err = -ENOMEM; 9000 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); 9001 if (!func) 9002 goto out_undo_insn; 9003 9004 for (i = 0; i < env->subprog_cnt; i++) { 9005 subprog_start = subprog_end; 9006 subprog_end = env->subprog_info[i + 1].start; 9007 9008 len = subprog_end - subprog_start; 9009 /* BPF_PROG_RUN doesn't call subprogs directly, 9010 * hence main prog stats include the runtime of subprogs. 9011 * subprogs don't have IDs and not reachable via prog_get_next_id 9012 * func[i]->aux->stats will never be accessed and stays NULL 9013 */ 9014 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 9015 if (!func[i]) 9016 goto out_free; 9017 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 9018 len * sizeof(struct bpf_insn)); 9019 func[i]->type = prog->type; 9020 func[i]->len = len; 9021 if (bpf_prog_calc_tag(func[i])) 9022 goto out_free; 9023 func[i]->is_func = 1; 9024 func[i]->aux->func_idx = i; 9025 /* the btf and func_info will be freed only at prog->aux */ 9026 func[i]->aux->btf = prog->aux->btf; 9027 func[i]->aux->func_info = prog->aux->func_info; 9028 9029 /* Use bpf_prog_F_tag to indicate functions in stack traces. 9030 * Long term would need debug info to populate names 9031 */ 9032 func[i]->aux->name[0] = 'F'; 9033 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 9034 func[i]->jit_requested = 1; 9035 func[i]->aux->linfo = prog->aux->linfo; 9036 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 9037 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 9038 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 9039 func[i] = bpf_int_jit_compile(func[i]); 9040 if (!func[i]->jited) { 9041 err = -ENOTSUPP; 9042 goto out_free; 9043 } 9044 cond_resched(); 9045 } 9046 /* at this point all bpf functions were successfully JITed 9047 * now populate all bpf_calls with correct addresses and 9048 * run last pass of JIT 9049 */ 9050 for (i = 0; i < env->subprog_cnt; i++) { 9051 insn = func[i]->insnsi; 9052 for (j = 0; j < func[i]->len; j++, insn++) { 9053 if (insn->code != (BPF_JMP | BPF_CALL) || 9054 insn->src_reg != BPF_PSEUDO_CALL) 9055 continue; 9056 subprog = insn->off; 9057 insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - 9058 __bpf_call_base; 9059 } 9060 9061 /* we use the aux data to keep a list of the start addresses 9062 * of the JITed images for each function in the program 9063 * 9064 * for some architectures, such as powerpc64, the imm field 9065 * might not be large enough to hold the offset of the start 9066 * address of the callee's JITed image from __bpf_call_base 9067 * 9068 * in such cases, we can lookup the start address of a callee 9069 * by using its subprog id, available from the off field of 9070 * the call instruction, as an index for this list 9071 */ 9072 func[i]->aux->func = func; 9073 func[i]->aux->func_cnt = env->subprog_cnt; 9074 } 9075 for (i = 0; i < env->subprog_cnt; i++) { 9076 old_bpf_func = func[i]->bpf_func; 9077 tmp = bpf_int_jit_compile(func[i]); 9078 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 9079 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 9080 err = -ENOTSUPP; 9081 goto out_free; 9082 } 9083 cond_resched(); 9084 } 9085 9086 /* finally lock prog and jit images for all functions and 9087 * populate kallsysm 9088 */ 9089 for (i = 0; i < env->subprog_cnt; i++) { 9090 bpf_prog_lock_ro(func[i]); 9091 bpf_prog_kallsyms_add(func[i]); 9092 } 9093 9094 /* Last step: make now unused interpreter insns from main 9095 * prog consistent for later dump requests, so they can 9096 * later look the same as if they were interpreted only. 9097 */ 9098 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 9099 if (insn->code != (BPF_JMP | BPF_CALL) || 9100 insn->src_reg != BPF_PSEUDO_CALL) 9101 continue; 9102 insn->off = env->insn_aux_data[i].call_imm; 9103 subprog = find_subprog(env, i + insn->off + 1); 9104 insn->imm = subprog; 9105 } 9106 9107 prog->jited = 1; 9108 prog->bpf_func = func[0]->bpf_func; 9109 prog->aux->func = func; 9110 prog->aux->func_cnt = env->subprog_cnt; 9111 bpf_prog_free_unused_jited_linfo(prog); 9112 return 0; 9113 out_free: 9114 for (i = 0; i < env->subprog_cnt; i++) 9115 if (func[i]) 9116 bpf_jit_free(func[i]); 9117 kfree(func); 9118 out_undo_insn: 9119 /* cleanup main prog to be interpreted */ 9120 prog->jit_requested = 0; 9121 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 9122 if (insn->code != (BPF_JMP | BPF_CALL) || 9123 insn->src_reg != BPF_PSEUDO_CALL) 9124 continue; 9125 insn->off = 0; 9126 insn->imm = env->insn_aux_data[i].call_imm; 9127 } 9128 bpf_prog_free_jited_linfo(prog); 9129 return err; 9130 } 9131 9132 static int fixup_call_args(struct bpf_verifier_env *env) 9133 { 9134 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 9135 struct bpf_prog *prog = env->prog; 9136 struct bpf_insn *insn = prog->insnsi; 9137 int i, depth; 9138 #endif 9139 int err = 0; 9140 9141 if (env->prog->jit_requested && 9142 !bpf_prog_is_dev_bound(env->prog->aux)) { 9143 err = jit_subprogs(env); 9144 if (err == 0) 9145 return 0; 9146 if (err == -EFAULT) 9147 return err; 9148 } 9149 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 9150 for (i = 0; i < prog->len; i++, insn++) { 9151 if (insn->code != (BPF_JMP | BPF_CALL) || 9152 insn->src_reg != BPF_PSEUDO_CALL) 9153 continue; 9154 depth = get_callee_stack_depth(env, insn, i); 9155 if (depth < 0) 9156 return depth; 9157 bpf_patch_call_args(insn, depth); 9158 } 9159 err = 0; 9160 #endif 9161 return err; 9162 } 9163 9164 /* fixup insn->imm field of bpf_call instructions 9165 * and inline eligible helpers as explicit sequence of BPF instructions 9166 * 9167 * this function is called after eBPF program passed verification 9168 */ 9169 static int fixup_bpf_calls(struct bpf_verifier_env *env) 9170 { 9171 struct bpf_prog *prog = env->prog; 9172 bool expect_blinding = bpf_jit_blinding_enabled(prog); 9173 struct bpf_insn *insn = prog->insnsi; 9174 const struct bpf_func_proto *fn; 9175 const int insn_cnt = prog->len; 9176 const struct bpf_map_ops *ops; 9177 struct bpf_insn_aux_data *aux; 9178 struct bpf_insn insn_buf[16]; 9179 struct bpf_prog *new_prog; 9180 struct bpf_map *map_ptr; 9181 int i, ret, cnt, delta = 0; 9182 9183 for (i = 0; i < insn_cnt; i++, insn++) { 9184 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 9185 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 9186 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 9187 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 9188 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 9189 struct bpf_insn mask_and_div[] = { 9190 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 9191 /* Rx div 0 -> 0 */ 9192 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), 9193 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), 9194 BPF_JMP_IMM(BPF_JA, 0, 0, 1), 9195 *insn, 9196 }; 9197 struct bpf_insn mask_and_mod[] = { 9198 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 9199 /* Rx mod 0 -> Rx */ 9200 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), 9201 *insn, 9202 }; 9203 struct bpf_insn *patchlet; 9204 9205 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 9206 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 9207 patchlet = mask_and_div + (is64 ? 1 : 0); 9208 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); 9209 } else { 9210 patchlet = mask_and_mod + (is64 ? 1 : 0); 9211 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); 9212 } 9213 9214 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); 9215 if (!new_prog) 9216 return -ENOMEM; 9217 9218 delta += cnt - 1; 9219 env->prog = prog = new_prog; 9220 insn = new_prog->insnsi + i + delta; 9221 continue; 9222 } 9223 9224 if (BPF_CLASS(insn->code) == BPF_LD && 9225 (BPF_MODE(insn->code) == BPF_ABS || 9226 BPF_MODE(insn->code) == BPF_IND)) { 9227 cnt = env->ops->gen_ld_abs(insn, insn_buf); 9228 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 9229 verbose(env, "bpf verifier is misconfigured\n"); 9230 return -EINVAL; 9231 } 9232 9233 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9234 if (!new_prog) 9235 return -ENOMEM; 9236 9237 delta += cnt - 1; 9238 env->prog = prog = new_prog; 9239 insn = new_prog->insnsi + i + delta; 9240 continue; 9241 } 9242 9243 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 9244 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 9245 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 9246 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 9247 struct bpf_insn insn_buf[16]; 9248 struct bpf_insn *patch = &insn_buf[0]; 9249 bool issrc, isneg; 9250 u32 off_reg; 9251 9252 aux = &env->insn_aux_data[i + delta]; 9253 if (!aux->alu_state || 9254 aux->alu_state == BPF_ALU_NON_POINTER) 9255 continue; 9256 9257 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 9258 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 9259 BPF_ALU_SANITIZE_SRC; 9260 9261 off_reg = issrc ? insn->src_reg : insn->dst_reg; 9262 if (isneg) 9263 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 9264 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); 9265 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 9266 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 9267 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 9268 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 9269 if (issrc) { 9270 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, 9271 off_reg); 9272 insn->src_reg = BPF_REG_AX; 9273 } else { 9274 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, 9275 BPF_REG_AX); 9276 } 9277 if (isneg) 9278 insn->code = insn->code == code_add ? 9279 code_sub : code_add; 9280 *patch++ = *insn; 9281 if (issrc && isneg) 9282 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 9283 cnt = patch - insn_buf; 9284 9285 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9286 if (!new_prog) 9287 return -ENOMEM; 9288 9289 delta += cnt - 1; 9290 env->prog = prog = new_prog; 9291 insn = new_prog->insnsi + i + delta; 9292 continue; 9293 } 9294 9295 if (insn->code != (BPF_JMP | BPF_CALL)) 9296 continue; 9297 if (insn->src_reg == BPF_PSEUDO_CALL) 9298 continue; 9299 9300 if (insn->imm == BPF_FUNC_get_route_realm) 9301 prog->dst_needed = 1; 9302 if (insn->imm == BPF_FUNC_get_prandom_u32) 9303 bpf_user_rnd_init_once(); 9304 if (insn->imm == BPF_FUNC_override_return) 9305 prog->kprobe_override = 1; 9306 if (insn->imm == BPF_FUNC_tail_call) { 9307 /* If we tail call into other programs, we 9308 * cannot make any assumptions since they can 9309 * be replaced dynamically during runtime in 9310 * the program array. 9311 */ 9312 prog->cb_access = 1; 9313 env->prog->aux->stack_depth = MAX_BPF_STACK; 9314 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; 9315 9316 /* mark bpf_tail_call as different opcode to avoid 9317 * conditional branch in the interpeter for every normal 9318 * call and to prevent accidental JITing by JIT compiler 9319 * that doesn't support bpf_tail_call yet 9320 */ 9321 insn->imm = 0; 9322 insn->code = BPF_JMP | BPF_TAIL_CALL; 9323 9324 aux = &env->insn_aux_data[i + delta]; 9325 if (env->allow_ptr_leaks && !expect_blinding && 9326 prog->jit_requested && 9327 !bpf_map_key_poisoned(aux) && 9328 !bpf_map_ptr_poisoned(aux) && 9329 !bpf_map_ptr_unpriv(aux)) { 9330 struct bpf_jit_poke_descriptor desc = { 9331 .reason = BPF_POKE_REASON_TAIL_CALL, 9332 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), 9333 .tail_call.key = bpf_map_key_immediate(aux), 9334 }; 9335 9336 ret = bpf_jit_add_poke_descriptor(prog, &desc); 9337 if (ret < 0) { 9338 verbose(env, "adding tail call poke descriptor failed\n"); 9339 return ret; 9340 } 9341 9342 insn->imm = ret + 1; 9343 continue; 9344 } 9345 9346 if (!bpf_map_ptr_unpriv(aux)) 9347 continue; 9348 9349 /* instead of changing every JIT dealing with tail_call 9350 * emit two extra insns: 9351 * if (index >= max_entries) goto out; 9352 * index &= array->index_mask; 9353 * to avoid out-of-bounds cpu speculation 9354 */ 9355 if (bpf_map_ptr_poisoned(aux)) { 9356 verbose(env, "tail_call abusing map_ptr\n"); 9357 return -EINVAL; 9358 } 9359 9360 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 9361 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 9362 map_ptr->max_entries, 2); 9363 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 9364 container_of(map_ptr, 9365 struct bpf_array, 9366 map)->index_mask); 9367 insn_buf[2] = *insn; 9368 cnt = 3; 9369 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9370 if (!new_prog) 9371 return -ENOMEM; 9372 9373 delta += cnt - 1; 9374 env->prog = prog = new_prog; 9375 insn = new_prog->insnsi + i + delta; 9376 continue; 9377 } 9378 9379 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 9380 * and other inlining handlers are currently limited to 64 bit 9381 * only. 9382 */ 9383 if (prog->jit_requested && BITS_PER_LONG == 64 && 9384 (insn->imm == BPF_FUNC_map_lookup_elem || 9385 insn->imm == BPF_FUNC_map_update_elem || 9386 insn->imm == BPF_FUNC_map_delete_elem || 9387 insn->imm == BPF_FUNC_map_push_elem || 9388 insn->imm == BPF_FUNC_map_pop_elem || 9389 insn->imm == BPF_FUNC_map_peek_elem)) { 9390 aux = &env->insn_aux_data[i + delta]; 9391 if (bpf_map_ptr_poisoned(aux)) 9392 goto patch_call_imm; 9393 9394 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 9395 ops = map_ptr->ops; 9396 if (insn->imm == BPF_FUNC_map_lookup_elem && 9397 ops->map_gen_lookup) { 9398 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 9399 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 9400 verbose(env, "bpf verifier is misconfigured\n"); 9401 return -EINVAL; 9402 } 9403 9404 new_prog = bpf_patch_insn_data(env, i + delta, 9405 insn_buf, cnt); 9406 if (!new_prog) 9407 return -ENOMEM; 9408 9409 delta += cnt - 1; 9410 env->prog = prog = new_prog; 9411 insn = new_prog->insnsi + i + delta; 9412 continue; 9413 } 9414 9415 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 9416 (void *(*)(struct bpf_map *map, void *key))NULL)); 9417 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 9418 (int (*)(struct bpf_map *map, void *key))NULL)); 9419 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 9420 (int (*)(struct bpf_map *map, void *key, void *value, 9421 u64 flags))NULL)); 9422 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 9423 (int (*)(struct bpf_map *map, void *value, 9424 u64 flags))NULL)); 9425 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 9426 (int (*)(struct bpf_map *map, void *value))NULL)); 9427 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 9428 (int (*)(struct bpf_map *map, void *value))NULL)); 9429 9430 switch (insn->imm) { 9431 case BPF_FUNC_map_lookup_elem: 9432 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - 9433 __bpf_call_base; 9434 continue; 9435 case BPF_FUNC_map_update_elem: 9436 insn->imm = BPF_CAST_CALL(ops->map_update_elem) - 9437 __bpf_call_base; 9438 continue; 9439 case BPF_FUNC_map_delete_elem: 9440 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 9441 __bpf_call_base; 9442 continue; 9443 case BPF_FUNC_map_push_elem: 9444 insn->imm = BPF_CAST_CALL(ops->map_push_elem) - 9445 __bpf_call_base; 9446 continue; 9447 case BPF_FUNC_map_pop_elem: 9448 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - 9449 __bpf_call_base; 9450 continue; 9451 case BPF_FUNC_map_peek_elem: 9452 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - 9453 __bpf_call_base; 9454 continue; 9455 } 9456 9457 goto patch_call_imm; 9458 } 9459 9460 if (prog->jit_requested && BITS_PER_LONG == 64 && 9461 insn->imm == BPF_FUNC_jiffies64) { 9462 struct bpf_insn ld_jiffies_addr[2] = { 9463 BPF_LD_IMM64(BPF_REG_0, 9464 (unsigned long)&jiffies), 9465 }; 9466 9467 insn_buf[0] = ld_jiffies_addr[0]; 9468 insn_buf[1] = ld_jiffies_addr[1]; 9469 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 9470 BPF_REG_0, 0); 9471 cnt = 3; 9472 9473 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 9474 cnt); 9475 if (!new_prog) 9476 return -ENOMEM; 9477 9478 delta += cnt - 1; 9479 env->prog = prog = new_prog; 9480 insn = new_prog->insnsi + i + delta; 9481 continue; 9482 } 9483 9484 patch_call_imm: 9485 fn = env->ops->get_func_proto(insn->imm, env->prog); 9486 /* all functions that have prototype and verifier allowed 9487 * programs to call them, must be real in-kernel functions 9488 */ 9489 if (!fn->func) { 9490 verbose(env, 9491 "kernel subsystem misconfigured func %s#%d\n", 9492 func_id_name(insn->imm), insn->imm); 9493 return -EFAULT; 9494 } 9495 insn->imm = fn->func - __bpf_call_base; 9496 } 9497 9498 /* Since poke tab is now finalized, publish aux to tracker. */ 9499 for (i = 0; i < prog->aux->size_poke_tab; i++) { 9500 map_ptr = prog->aux->poke_tab[i].tail_call.map; 9501 if (!map_ptr->ops->map_poke_track || 9502 !map_ptr->ops->map_poke_untrack || 9503 !map_ptr->ops->map_poke_run) { 9504 verbose(env, "bpf verifier is misconfigured\n"); 9505 return -EINVAL; 9506 } 9507 9508 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 9509 if (ret < 0) { 9510 verbose(env, "tracking tail call prog failed\n"); 9511 return ret; 9512 } 9513 } 9514 9515 return 0; 9516 } 9517 9518 static void free_states(struct bpf_verifier_env *env) 9519 { 9520 struct bpf_verifier_state_list *sl, *sln; 9521 int i; 9522 9523 sl = env->free_list; 9524 while (sl) { 9525 sln = sl->next; 9526 free_verifier_state(&sl->state, false); 9527 kfree(sl); 9528 sl = sln; 9529 } 9530 env->free_list = NULL; 9531 9532 if (!env->explored_states) 9533 return; 9534 9535 for (i = 0; i < state_htab_size(env); i++) { 9536 sl = env->explored_states[i]; 9537 9538 while (sl) { 9539 sln = sl->next; 9540 free_verifier_state(&sl->state, false); 9541 kfree(sl); 9542 sl = sln; 9543 } 9544 env->explored_states[i] = NULL; 9545 } 9546 } 9547 9548 /* The verifier is using insn_aux_data[] to store temporary data during 9549 * verification and to store information for passes that run after the 9550 * verification like dead code sanitization. do_check_common() for subprogram N 9551 * may analyze many other subprograms. sanitize_insn_aux_data() clears all 9552 * temporary data after do_check_common() finds that subprogram N cannot be 9553 * verified independently. pass_cnt counts the number of times 9554 * do_check_common() was run and insn->aux->seen tells the pass number 9555 * insn_aux_data was touched. These variables are compared to clear temporary 9556 * data from failed pass. For testing and experiments do_check_common() can be 9557 * run multiple times even when prior attempt to verify is unsuccessful. 9558 */ 9559 static void sanitize_insn_aux_data(struct bpf_verifier_env *env) 9560 { 9561 struct bpf_insn *insn = env->prog->insnsi; 9562 struct bpf_insn_aux_data *aux; 9563 int i, class; 9564 9565 for (i = 0; i < env->prog->len; i++) { 9566 class = BPF_CLASS(insn[i].code); 9567 if (class != BPF_LDX && class != BPF_STX) 9568 continue; 9569 aux = &env->insn_aux_data[i]; 9570 if (aux->seen != env->pass_cnt) 9571 continue; 9572 memset(aux, 0, offsetof(typeof(*aux), orig_idx)); 9573 } 9574 } 9575 9576 static int do_check_common(struct bpf_verifier_env *env, int subprog) 9577 { 9578 struct bpf_verifier_state *state; 9579 struct bpf_reg_state *regs; 9580 int ret, i; 9581 9582 env->prev_linfo = NULL; 9583 env->pass_cnt++; 9584 9585 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 9586 if (!state) 9587 return -ENOMEM; 9588 state->curframe = 0; 9589 state->speculative = false; 9590 state->branches = 1; 9591 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 9592 if (!state->frame[0]) { 9593 kfree(state); 9594 return -ENOMEM; 9595 } 9596 env->cur_state = state; 9597 init_func_state(env, state->frame[0], 9598 BPF_MAIN_FUNC /* callsite */, 9599 0 /* frameno */, 9600 subprog); 9601 9602 regs = state->frame[state->curframe]->regs; 9603 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { 9604 ret = btf_prepare_func_args(env, subprog, regs); 9605 if (ret) 9606 goto out; 9607 for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 9608 if (regs[i].type == PTR_TO_CTX) 9609 mark_reg_known_zero(env, regs, i); 9610 else if (regs[i].type == SCALAR_VALUE) 9611 mark_reg_unknown(env, regs, i); 9612 } 9613 } else { 9614 /* 1st arg to a function */ 9615 regs[BPF_REG_1].type = PTR_TO_CTX; 9616 mark_reg_known_zero(env, regs, BPF_REG_1); 9617 ret = btf_check_func_arg_match(env, subprog, regs); 9618 if (ret == -EFAULT) 9619 /* unlikely verifier bug. abort. 9620 * ret == 0 and ret < 0 are sadly acceptable for 9621 * main() function due to backward compatibility. 9622 * Like socket filter program may be written as: 9623 * int bpf_prog(struct pt_regs *ctx) 9624 * and never dereference that ctx in the program. 9625 * 'struct pt_regs' is a type mismatch for socket 9626 * filter that should be using 'struct __sk_buff'. 9627 */ 9628 goto out; 9629 } 9630 9631 ret = do_check(env); 9632 out: 9633 /* check for NULL is necessary, since cur_state can be freed inside 9634 * do_check() under memory pressure. 9635 */ 9636 if (env->cur_state) { 9637 free_verifier_state(env->cur_state, true); 9638 env->cur_state = NULL; 9639 } 9640 while (!pop_stack(env, NULL, NULL)); 9641 free_states(env); 9642 if (ret) 9643 /* clean aux data in case subprog was rejected */ 9644 sanitize_insn_aux_data(env); 9645 return ret; 9646 } 9647 9648 /* Verify all global functions in a BPF program one by one based on their BTF. 9649 * All global functions must pass verification. Otherwise the whole program is rejected. 9650 * Consider: 9651 * int bar(int); 9652 * int foo(int f) 9653 * { 9654 * return bar(f); 9655 * } 9656 * int bar(int b) 9657 * { 9658 * ... 9659 * } 9660 * foo() will be verified first for R1=any_scalar_value. During verification it 9661 * will be assumed that bar() already verified successfully and call to bar() 9662 * from foo() will be checked for type match only. Later bar() will be verified 9663 * independently to check that it's safe for R1=any_scalar_value. 9664 */ 9665 static int do_check_subprogs(struct bpf_verifier_env *env) 9666 { 9667 struct bpf_prog_aux *aux = env->prog->aux; 9668 int i, ret; 9669 9670 if (!aux->func_info) 9671 return 0; 9672 9673 for (i = 1; i < env->subprog_cnt; i++) { 9674 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) 9675 continue; 9676 env->insn_idx = env->subprog_info[i].start; 9677 WARN_ON_ONCE(env->insn_idx == 0); 9678 ret = do_check_common(env, i); 9679 if (ret) { 9680 return ret; 9681 } else if (env->log.level & BPF_LOG_LEVEL) { 9682 verbose(env, 9683 "Func#%d is safe for any args that match its prototype\n", 9684 i); 9685 } 9686 } 9687 return 0; 9688 } 9689 9690 static int do_check_main(struct bpf_verifier_env *env) 9691 { 9692 int ret; 9693 9694 env->insn_idx = 0; 9695 ret = do_check_common(env, 0); 9696 if (!ret) 9697 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 9698 return ret; 9699 } 9700 9701 9702 static void print_verification_stats(struct bpf_verifier_env *env) 9703 { 9704 int i; 9705 9706 if (env->log.level & BPF_LOG_STATS) { 9707 verbose(env, "verification time %lld usec\n", 9708 div_u64(env->verification_time, 1000)); 9709 verbose(env, "stack depth "); 9710 for (i = 0; i < env->subprog_cnt; i++) { 9711 u32 depth = env->subprog_info[i].stack_depth; 9712 9713 verbose(env, "%d", depth); 9714 if (i + 1 < env->subprog_cnt) 9715 verbose(env, "+"); 9716 } 9717 verbose(env, "\n"); 9718 } 9719 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " 9720 "total_states %d peak_states %d mark_read %d\n", 9721 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, 9722 env->max_states_per_insn, env->total_states, 9723 env->peak_states, env->longest_mark_read_walk); 9724 } 9725 9726 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 9727 { 9728 const struct btf_type *t, *func_proto; 9729 const struct bpf_struct_ops *st_ops; 9730 const struct btf_member *member; 9731 struct bpf_prog *prog = env->prog; 9732 u32 btf_id, member_idx; 9733 const char *mname; 9734 9735 btf_id = prog->aux->attach_btf_id; 9736 st_ops = bpf_struct_ops_find(btf_id); 9737 if (!st_ops) { 9738 verbose(env, "attach_btf_id %u is not a supported struct\n", 9739 btf_id); 9740 return -ENOTSUPP; 9741 } 9742 9743 t = st_ops->type; 9744 member_idx = prog->expected_attach_type; 9745 if (member_idx >= btf_type_vlen(t)) { 9746 verbose(env, "attach to invalid member idx %u of struct %s\n", 9747 member_idx, st_ops->name); 9748 return -EINVAL; 9749 } 9750 9751 member = &btf_type_member(t)[member_idx]; 9752 mname = btf_name_by_offset(btf_vmlinux, member->name_off); 9753 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, 9754 NULL); 9755 if (!func_proto) { 9756 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 9757 mname, member_idx, st_ops->name); 9758 return -EINVAL; 9759 } 9760 9761 if (st_ops->check_member) { 9762 int err = st_ops->check_member(t, member); 9763 9764 if (err) { 9765 verbose(env, "attach to unsupported member %s of struct %s\n", 9766 mname, st_ops->name); 9767 return err; 9768 } 9769 } 9770 9771 prog->aux->attach_func_proto = func_proto; 9772 prog->aux->attach_func_name = mname; 9773 env->ops = st_ops->verifier_ops; 9774 9775 return 0; 9776 } 9777 9778 static int check_attach_btf_id(struct bpf_verifier_env *env) 9779 { 9780 struct bpf_prog *prog = env->prog; 9781 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; 9782 struct bpf_prog *tgt_prog = prog->aux->linked_prog; 9783 u32 btf_id = prog->aux->attach_btf_id; 9784 const char prefix[] = "btf_trace_"; 9785 int ret = 0, subprog = -1, i; 9786 struct bpf_trampoline *tr; 9787 const struct btf_type *t; 9788 bool conservative = true; 9789 const char *tname; 9790 struct btf *btf; 9791 long addr; 9792 u64 key; 9793 9794 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 9795 return check_struct_ops_btf_id(env); 9796 9797 if (prog->type != BPF_PROG_TYPE_TRACING && !prog_extension) 9798 return 0; 9799 9800 if (!btf_id) { 9801 verbose(env, "Tracing programs must provide btf_id\n"); 9802 return -EINVAL; 9803 } 9804 btf = bpf_prog_get_target_btf(prog); 9805 if (!btf) { 9806 verbose(env, 9807 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 9808 return -EINVAL; 9809 } 9810 t = btf_type_by_id(btf, btf_id); 9811 if (!t) { 9812 verbose(env, "attach_btf_id %u is invalid\n", btf_id); 9813 return -EINVAL; 9814 } 9815 tname = btf_name_by_offset(btf, t->name_off); 9816 if (!tname) { 9817 verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); 9818 return -EINVAL; 9819 } 9820 if (tgt_prog) { 9821 struct bpf_prog_aux *aux = tgt_prog->aux; 9822 9823 for (i = 0; i < aux->func_info_cnt; i++) 9824 if (aux->func_info[i].type_id == btf_id) { 9825 subprog = i; 9826 break; 9827 } 9828 if (subprog == -1) { 9829 verbose(env, "Subprog %s doesn't exist\n", tname); 9830 return -EINVAL; 9831 } 9832 conservative = aux->func_info_aux[subprog].unreliable; 9833 if (prog_extension) { 9834 if (conservative) { 9835 verbose(env, 9836 "Cannot replace static functions\n"); 9837 return -EINVAL; 9838 } 9839 if (!prog->jit_requested) { 9840 verbose(env, 9841 "Extension programs should be JITed\n"); 9842 return -EINVAL; 9843 } 9844 env->ops = bpf_verifier_ops[tgt_prog->type]; 9845 } 9846 if (!tgt_prog->jited) { 9847 verbose(env, "Can attach to only JITed progs\n"); 9848 return -EINVAL; 9849 } 9850 if (tgt_prog->type == prog->type) { 9851 /* Cannot fentry/fexit another fentry/fexit program. 9852 * Cannot attach program extension to another extension. 9853 * It's ok to attach fentry/fexit to extension program. 9854 */ 9855 verbose(env, "Cannot recursively attach\n"); 9856 return -EINVAL; 9857 } 9858 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && 9859 prog_extension && 9860 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || 9861 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { 9862 /* Program extensions can extend all program types 9863 * except fentry/fexit. The reason is the following. 9864 * The fentry/fexit programs are used for performance 9865 * analysis, stats and can be attached to any program 9866 * type except themselves. When extension program is 9867 * replacing XDP function it is necessary to allow 9868 * performance analysis of all functions. Both original 9869 * XDP program and its program extension. Hence 9870 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is 9871 * allowed. If extending of fentry/fexit was allowed it 9872 * would be possible to create long call chain 9873 * fentry->extension->fentry->extension beyond 9874 * reasonable stack size. Hence extending fentry is not 9875 * allowed. 9876 */ 9877 verbose(env, "Cannot extend fentry/fexit\n"); 9878 return -EINVAL; 9879 } 9880 key = ((u64)aux->id) << 32 | btf_id; 9881 } else { 9882 if (prog_extension) { 9883 verbose(env, "Cannot replace kernel functions\n"); 9884 return -EINVAL; 9885 } 9886 key = btf_id; 9887 } 9888 9889 switch (prog->expected_attach_type) { 9890 case BPF_TRACE_RAW_TP: 9891 if (tgt_prog) { 9892 verbose(env, 9893 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 9894 return -EINVAL; 9895 } 9896 if (!btf_type_is_typedef(t)) { 9897 verbose(env, "attach_btf_id %u is not a typedef\n", 9898 btf_id); 9899 return -EINVAL; 9900 } 9901 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 9902 verbose(env, "attach_btf_id %u points to wrong type name %s\n", 9903 btf_id, tname); 9904 return -EINVAL; 9905 } 9906 tname += sizeof(prefix) - 1; 9907 t = btf_type_by_id(btf, t->type); 9908 if (!btf_type_is_ptr(t)) 9909 /* should never happen in valid vmlinux build */ 9910 return -EINVAL; 9911 t = btf_type_by_id(btf, t->type); 9912 if (!btf_type_is_func_proto(t)) 9913 /* should never happen in valid vmlinux build */ 9914 return -EINVAL; 9915 9916 /* remember two read only pointers that are valid for 9917 * the life time of the kernel 9918 */ 9919 prog->aux->attach_func_name = tname; 9920 prog->aux->attach_func_proto = t; 9921 prog->aux->attach_btf_trace = true; 9922 return 0; 9923 default: 9924 if (!prog_extension) 9925 return -EINVAL; 9926 /* fallthrough */ 9927 case BPF_TRACE_FENTRY: 9928 case BPF_TRACE_FEXIT: 9929 if (!btf_type_is_func(t)) { 9930 verbose(env, "attach_btf_id %u is not a function\n", 9931 btf_id); 9932 return -EINVAL; 9933 } 9934 if (prog_extension && 9935 btf_check_type_match(env, prog, btf, t)) 9936 return -EINVAL; 9937 t = btf_type_by_id(btf, t->type); 9938 if (!btf_type_is_func_proto(t)) 9939 return -EINVAL; 9940 tr = bpf_trampoline_lookup(key); 9941 if (!tr) 9942 return -ENOMEM; 9943 prog->aux->attach_func_name = tname; 9944 /* t is either vmlinux type or another program's type */ 9945 prog->aux->attach_func_proto = t; 9946 mutex_lock(&tr->mutex); 9947 if (tr->func.addr) { 9948 prog->aux->trampoline = tr; 9949 goto out; 9950 } 9951 if (tgt_prog && conservative) { 9952 prog->aux->attach_func_proto = NULL; 9953 t = NULL; 9954 } 9955 ret = btf_distill_func_proto(&env->log, btf, t, 9956 tname, &tr->func.model); 9957 if (ret < 0) 9958 goto out; 9959 if (tgt_prog) { 9960 if (subprog == 0) 9961 addr = (long) tgt_prog->bpf_func; 9962 else 9963 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 9964 } else { 9965 addr = kallsyms_lookup_name(tname); 9966 if (!addr) { 9967 verbose(env, 9968 "The address of function %s cannot be found\n", 9969 tname); 9970 ret = -ENOENT; 9971 goto out; 9972 } 9973 } 9974 tr->func.addr = (void *)addr; 9975 prog->aux->trampoline = tr; 9976 out: 9977 mutex_unlock(&tr->mutex); 9978 if (ret) 9979 bpf_trampoline_put(tr); 9980 return ret; 9981 } 9982 } 9983 9984 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, 9985 union bpf_attr __user *uattr) 9986 { 9987 u64 start_time = ktime_get_ns(); 9988 struct bpf_verifier_env *env; 9989 struct bpf_verifier_log *log; 9990 int i, len, ret = -EINVAL; 9991 bool is_priv; 9992 9993 /* no program is valid */ 9994 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 9995 return -EINVAL; 9996 9997 /* 'struct bpf_verifier_env' can be global, but since it's not small, 9998 * allocate/free it every time bpf_check() is called 9999 */ 10000 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); 10001 if (!env) 10002 return -ENOMEM; 10003 log = &env->log; 10004 10005 len = (*prog)->len; 10006 env->insn_aux_data = 10007 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); 10008 ret = -ENOMEM; 10009 if (!env->insn_aux_data) 10010 goto err_free_env; 10011 for (i = 0; i < len; i++) 10012 env->insn_aux_data[i].orig_idx = i; 10013 env->prog = *prog; 10014 env->ops = bpf_verifier_ops[env->prog->type]; 10015 is_priv = capable(CAP_SYS_ADMIN); 10016 10017 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 10018 mutex_lock(&bpf_verifier_lock); 10019 if (!btf_vmlinux) 10020 btf_vmlinux = btf_parse_vmlinux(); 10021 mutex_unlock(&bpf_verifier_lock); 10022 } 10023 10024 /* grab the mutex to protect few globals used by verifier */ 10025 if (!is_priv) 10026 mutex_lock(&bpf_verifier_lock); 10027 10028 if (attr->log_level || attr->log_buf || attr->log_size) { 10029 /* user requested verbose verifier output 10030 * and supplied buffer to store the verification trace 10031 */ 10032 log->level = attr->log_level; 10033 log->ubuf = (char __user *) (unsigned long) attr->log_buf; 10034 log->len_total = attr->log_size; 10035 10036 ret = -EINVAL; 10037 /* log attributes have to be sane */ 10038 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || 10039 !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) 10040 goto err_unlock; 10041 } 10042 10043 if (IS_ERR(btf_vmlinux)) { 10044 /* Either gcc or pahole or kernel are broken. */ 10045 verbose(env, "in-kernel BTF is malformed\n"); 10046 ret = PTR_ERR(btf_vmlinux); 10047 goto skip_full_check; 10048 } 10049 10050 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 10051 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 10052 env->strict_alignment = true; 10053 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 10054 env->strict_alignment = false; 10055 10056 env->allow_ptr_leaks = is_priv; 10057 10058 if (is_priv) 10059 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; 10060 10061 ret = replace_map_fd_with_map_ptr(env); 10062 if (ret < 0) 10063 goto skip_full_check; 10064 10065 if (bpf_prog_is_dev_bound(env->prog->aux)) { 10066 ret = bpf_prog_offload_verifier_prep(env->prog); 10067 if (ret) 10068 goto skip_full_check; 10069 } 10070 10071 env->explored_states = kvcalloc(state_htab_size(env), 10072 sizeof(struct bpf_verifier_state_list *), 10073 GFP_USER); 10074 ret = -ENOMEM; 10075 if (!env->explored_states) 10076 goto skip_full_check; 10077 10078 ret = check_subprogs(env); 10079 if (ret < 0) 10080 goto skip_full_check; 10081 10082 ret = check_btf_info(env, attr, uattr); 10083 if (ret < 0) 10084 goto skip_full_check; 10085 10086 ret = check_attach_btf_id(env); 10087 if (ret) 10088 goto skip_full_check; 10089 10090 ret = check_cfg(env); 10091 if (ret < 0) 10092 goto skip_full_check; 10093 10094 ret = do_check_subprogs(env); 10095 ret = ret ?: do_check_main(env); 10096 10097 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) 10098 ret = bpf_prog_offload_finalize(env); 10099 10100 skip_full_check: 10101 kvfree(env->explored_states); 10102 10103 if (ret == 0) 10104 ret = check_max_stack_depth(env); 10105 10106 /* instruction rewrites happen after this point */ 10107 if (is_priv) { 10108 if (ret == 0) 10109 opt_hard_wire_dead_code_branches(env); 10110 if (ret == 0) 10111 ret = opt_remove_dead_code(env); 10112 if (ret == 0) 10113 ret = opt_remove_nops(env); 10114 } else { 10115 if (ret == 0) 10116 sanitize_dead_code(env); 10117 } 10118 10119 if (ret == 0) 10120 /* program is valid, convert *(u32*)(ctx + off) accesses */ 10121 ret = convert_ctx_accesses(env); 10122 10123 if (ret == 0) 10124 ret = fixup_bpf_calls(env); 10125 10126 /* do 32-bit optimization after insn patching has done so those patched 10127 * insns could be handled correctly. 10128 */ 10129 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { 10130 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); 10131 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret 10132 : false; 10133 } 10134 10135 if (ret == 0) 10136 ret = fixup_call_args(env); 10137 10138 env->verification_time = ktime_get_ns() - start_time; 10139 print_verification_stats(env); 10140 10141 if (log->level && bpf_verifier_log_full(log)) 10142 ret = -ENOSPC; 10143 if (log->level && !log->ubuf) { 10144 ret = -EFAULT; 10145 goto err_release_maps; 10146 } 10147 10148 if (ret == 0 && env->used_map_cnt) { 10149 /* if program passed verifier, update used_maps in bpf_prog_info */ 10150 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, 10151 sizeof(env->used_maps[0]), 10152 GFP_KERNEL); 10153 10154 if (!env->prog->aux->used_maps) { 10155 ret = -ENOMEM; 10156 goto err_release_maps; 10157 } 10158 10159 memcpy(env->prog->aux->used_maps, env->used_maps, 10160 sizeof(env->used_maps[0]) * env->used_map_cnt); 10161 env->prog->aux->used_map_cnt = env->used_map_cnt; 10162 10163 /* program is valid. Convert pseudo bpf_ld_imm64 into generic 10164 * bpf_ld_imm64 instructions 10165 */ 10166 convert_pseudo_ld_imm64(env); 10167 } 10168 10169 if (ret == 0) 10170 adjust_btf_func(env); 10171 10172 err_release_maps: 10173 if (!env->prog->aux->used_maps) 10174 /* if we didn't copy map pointers into bpf_prog_info, release 10175 * them now. Otherwise free_used_maps() will release them. 10176 */ 10177 release_maps(env); 10178 *prog = env->prog; 10179 err_unlock: 10180 if (!is_priv) 10181 mutex_unlock(&bpf_verifier_lock); 10182 vfree(env->insn_aux_data); 10183 err_free_env: 10184 kfree(env); 10185 return ret; 10186 } 10187