1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * DOC: Shader validator for VC4. 26 * 27 * The VC4 has no IOMMU between it and system memory, so a user with 28 * access to execute shaders could escalate privilege by overwriting 29 * system memory (using the VPM write address register in the 30 * general-purpose DMA mode) or reading system memory it shouldn't 31 * (reading it as a texture, or uniform data, or vertex data). 32 * 33 * This walks over a shader BO, ensuring that its accesses are 34 * appropriately bounded, and recording how many texture accesses are 35 * made and where so that we can do relocations for them in the 36 * uniform stream. 37 */ 38 39 #include "vc4_drv.h" 40 #include "vc4_qpu_defines.h" 41 42 #define LIVE_REG_COUNT (32 + 32 + 4) 43 44 struct vc4_shader_validation_state { 45 /* Current IP being validated. */ 46 uint32_t ip; 47 48 /* IP at the end of the BO, do not read shader[max_ip] */ 49 uint32_t max_ip; 50 51 uint64_t *shader; 52 53 struct vc4_texture_sample_info tmu_setup[2]; 54 int tmu_write_count[2]; 55 56 /* For registers that were last written to by a MIN instruction with 57 * one argument being a uniform, the address of the uniform. 58 * Otherwise, ~0. 59 * 60 * This is used for the validation of direct address memory reads. 61 */ 62 uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 63 bool live_max_clamp_regs[LIVE_REG_COUNT]; 64 uint32_t live_immediates[LIVE_REG_COUNT]; 65 66 /* Bitfield of which IPs are used as branch targets. 67 * 68 * Used for validation that the uniform stream is updated at the right 69 * points and clearing the texturing/clamping state. 70 */ 71 unsigned long *branch_targets; 72 73 /* Set when entering a basic block, and cleared when the uniform 74 * address update is found. This is used to make sure that we don't 75 * read uniforms when the address is undefined. 76 */ 77 bool needs_uniform_address_update; 78 79 /* Set when we find a backwards branch. If the branch is backwards, 80 * the taraget is probably doing an address reset to read uniforms, 81 * and so we need to be sure that a uniforms address is present in the 82 * stream, even if the shader didn't need to read uniforms in later 83 * basic blocks. 84 */ 85 bool needs_uniform_address_for_loop; 86 }; 87 88 static uint32_t 89 waddr_to_live_reg_index(uint32_t waddr, bool is_b) 90 { 91 if (waddr < 32) { 92 if (is_b) 93 return 32 + waddr; 94 else 95 return waddr; 96 } else if (waddr <= QPU_W_ACC3) { 97 return 64 + waddr - QPU_W_ACC0; 98 } else { 99 return ~0; 100 } 101 } 102 103 static uint32_t 104 raddr_add_a_to_live_reg_index(uint64_t inst) 105 { 106 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 107 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 108 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 109 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 110 111 if (add_a == QPU_MUX_A) 112 return raddr_a; 113 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 114 return 32 + raddr_b; 115 else if (add_a <= QPU_MUX_R3) 116 return 64 + add_a; 117 else 118 return ~0; 119 } 120 121 static bool 122 is_tmu_submit(uint32_t waddr) 123 { 124 return (waddr == QPU_W_TMU0_S || 125 waddr == QPU_W_TMU1_S); 126 } 127 128 static bool 129 is_tmu_write(uint32_t waddr) 130 { 131 return (waddr >= QPU_W_TMU0_S && 132 waddr <= QPU_W_TMU1_B); 133 } 134 135 static bool 136 record_texture_sample(struct vc4_validated_shader_info *validated_shader, 137 struct vc4_shader_validation_state *validation_state, 138 int tmu) 139 { 140 uint32_t s = validated_shader->num_texture_samples; 141 int i; 142 struct vc4_texture_sample_info *temp_samples; 143 144 temp_samples = krealloc(validated_shader->texture_samples, 145 (s + 1) * sizeof(*temp_samples), 146 GFP_KERNEL); 147 if (!temp_samples) 148 return false; 149 150 memcpy(&temp_samples[s], 151 &validation_state->tmu_setup[tmu], 152 sizeof(*temp_samples)); 153 154 validated_shader->num_texture_samples = s + 1; 155 validated_shader->texture_samples = temp_samples; 156 157 for (i = 0; i < 4; i++) 158 validation_state->tmu_setup[tmu].p_offset[i] = ~0; 159 160 return true; 161 } 162 163 static bool 164 check_tmu_write(struct vc4_validated_shader_info *validated_shader, 165 struct vc4_shader_validation_state *validation_state, 166 bool is_mul) 167 { 168 uint64_t inst = validation_state->shader[validation_state->ip]; 169 uint32_t waddr = (is_mul ? 170 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 171 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 172 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 173 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 174 int tmu = waddr > QPU_W_TMU0_B; 175 bool submit = is_tmu_submit(waddr); 176 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 177 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 178 179 if (is_direct) { 180 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 181 uint32_t clamp_reg, clamp_offset; 182 183 if (sig == QPU_SIG_SMALL_IMM) { 184 DRM_ERROR("direct TMU read used small immediate\n"); 185 return false; 186 } 187 188 /* Make sure that this texture load is an add of the base 189 * address of the UBO to a clamped offset within the UBO. 190 */ 191 if (is_mul || 192 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 193 DRM_ERROR("direct TMU load wasn't an add\n"); 194 return false; 195 } 196 197 /* We assert that the clamped address is the first 198 * argument, and the UBO base address is the second argument. 199 * This is arbitrary, but simpler than supporting flipping the 200 * two either way. 201 */ 202 clamp_reg = raddr_add_a_to_live_reg_index(inst); 203 if (clamp_reg == ~0) { 204 DRM_ERROR("direct TMU load wasn't clamped\n"); 205 return false; 206 } 207 208 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 209 if (clamp_offset == ~0) { 210 DRM_ERROR("direct TMU load wasn't clamped\n"); 211 return false; 212 } 213 214 /* Store the clamp value's offset in p1 (see reloc_tex() in 215 * vc4_validate.c). 216 */ 217 validation_state->tmu_setup[tmu].p_offset[1] = 218 clamp_offset; 219 220 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 221 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 222 DRM_ERROR("direct TMU load didn't add to a uniform\n"); 223 return false; 224 } 225 226 validation_state->tmu_setup[tmu].is_direct = true; 227 } else { 228 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 229 raddr_b == QPU_R_UNIF)) { 230 DRM_ERROR("uniform read in the same instruction as " 231 "texture setup.\n"); 232 return false; 233 } 234 } 235 236 if (validation_state->tmu_write_count[tmu] >= 4) { 237 DRM_ERROR("TMU%d got too many parameters before dispatch\n", 238 tmu); 239 return false; 240 } 241 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 242 validated_shader->uniforms_size; 243 validation_state->tmu_write_count[tmu]++; 244 /* Since direct uses a RADDR uniform reference, it will get counted in 245 * check_instruction_reads() 246 */ 247 if (!is_direct) { 248 if (validation_state->needs_uniform_address_update) { 249 DRM_ERROR("Texturing with undefined uniform address\n"); 250 return false; 251 } 252 253 validated_shader->uniforms_size += 4; 254 } 255 256 if (submit) { 257 if (!record_texture_sample(validated_shader, 258 validation_state, tmu)) { 259 return false; 260 } 261 262 validation_state->tmu_write_count[tmu] = 0; 263 } 264 265 return true; 266 } 267 268 static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 269 { 270 uint32_t o = validated_shader->num_uniform_addr_offsets; 271 uint32_t num_uniforms = validated_shader->uniforms_size / 4; 272 273 validated_shader->uniform_addr_offsets = 274 krealloc(validated_shader->uniform_addr_offsets, 275 (o + 1) * 276 sizeof(*validated_shader->uniform_addr_offsets), 277 GFP_KERNEL); 278 if (!validated_shader->uniform_addr_offsets) 279 return false; 280 281 validated_shader->uniform_addr_offsets[o] = num_uniforms; 282 validated_shader->num_uniform_addr_offsets++; 283 284 return true; 285 } 286 287 static bool 288 validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 289 struct vc4_shader_validation_state *validation_state, 290 bool is_mul) 291 { 292 uint64_t inst = validation_state->shader[validation_state->ip]; 293 u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 294 u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 295 u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 296 u32 add_lri = raddr_add_a_to_live_reg_index(inst); 297 /* We want our reset to be pointing at whatever uniform follows the 298 * uniforms base address. 299 */ 300 u32 expected_offset = validated_shader->uniforms_size + 4; 301 302 /* We only support absolute uniform address changes, and we 303 * require that they be in the current basic block before any 304 * of its uniform reads. 305 * 306 * One could potentially emit more efficient QPU code, by 307 * noticing that (say) an if statement does uniform control 308 * flow for all threads and that the if reads the same number 309 * of uniforms on each side. However, this scheme is easy to 310 * validate so it's all we allow for now. 311 */ 312 313 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { 314 DRM_ERROR("uniforms address change must be " 315 "normal math\n"); 316 return false; 317 } 318 319 if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 320 DRM_ERROR("Uniform address reset must be an ADD.\n"); 321 return false; 322 } 323 324 if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 325 DRM_ERROR("Uniform address reset must be unconditional.\n"); 326 return false; 327 } 328 329 if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 330 !(inst & QPU_PM)) { 331 DRM_ERROR("No packing allowed on uniforms reset\n"); 332 return false; 333 } 334 335 if (add_lri == -1) { 336 DRM_ERROR("First argument of uniform address write must be " 337 "an immediate value.\n"); 338 return false; 339 } 340 341 if (validation_state->live_immediates[add_lri] != expected_offset) { 342 DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", 343 validation_state->live_immediates[add_lri], 344 expected_offset); 345 return false; 346 } 347 348 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 349 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 350 DRM_ERROR("Second argument of uniform address write must be " 351 "a uniform.\n"); 352 return false; 353 } 354 355 validation_state->needs_uniform_address_update = false; 356 validation_state->needs_uniform_address_for_loop = false; 357 return require_uniform_address_uniform(validated_shader); 358 } 359 360 static bool 361 check_reg_write(struct vc4_validated_shader_info *validated_shader, 362 struct vc4_shader_validation_state *validation_state, 363 bool is_mul) 364 { 365 uint64_t inst = validation_state->shader[validation_state->ip]; 366 uint32_t waddr = (is_mul ? 367 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 368 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 369 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 370 bool ws = inst & QPU_WS; 371 bool is_b = is_mul ^ ws; 372 u32 lri = waddr_to_live_reg_index(waddr, is_b); 373 374 if (lri != -1) { 375 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 376 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 377 378 if (sig == QPU_SIG_LOAD_IMM && 379 QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 380 ((is_mul && cond_mul == QPU_COND_ALWAYS) || 381 (!is_mul && cond_add == QPU_COND_ALWAYS))) { 382 validation_state->live_immediates[lri] = 383 QPU_GET_FIELD(inst, QPU_LOAD_IMM); 384 } else { 385 validation_state->live_immediates[lri] = ~0; 386 } 387 } 388 389 switch (waddr) { 390 case QPU_W_UNIFORMS_ADDRESS: 391 if (is_b) { 392 DRM_ERROR("relative uniforms address change " 393 "unsupported\n"); 394 return false; 395 } 396 397 return validate_uniform_address_write(validated_shader, 398 validation_state, 399 is_mul); 400 401 case QPU_W_TLB_COLOR_MS: 402 case QPU_W_TLB_COLOR_ALL: 403 case QPU_W_TLB_Z: 404 /* These only interact with the tile buffer, not main memory, 405 * so they're safe. 406 */ 407 return true; 408 409 case QPU_W_TMU0_S: 410 case QPU_W_TMU0_T: 411 case QPU_W_TMU0_R: 412 case QPU_W_TMU0_B: 413 case QPU_W_TMU1_S: 414 case QPU_W_TMU1_T: 415 case QPU_W_TMU1_R: 416 case QPU_W_TMU1_B: 417 return check_tmu_write(validated_shader, validation_state, 418 is_mul); 419 420 case QPU_W_HOST_INT: 421 case QPU_W_TMU_NOSWAP: 422 case QPU_W_TLB_ALPHA_MASK: 423 case QPU_W_MUTEX_RELEASE: 424 /* XXX: I haven't thought about these, so don't support them 425 * for now. 426 */ 427 DRM_ERROR("Unsupported waddr %d\n", waddr); 428 return false; 429 430 case QPU_W_VPM_ADDR: 431 DRM_ERROR("General VPM DMA unsupported\n"); 432 return false; 433 434 case QPU_W_VPM: 435 case QPU_W_VPMVCD_SETUP: 436 /* We allow VPM setup in general, even including VPM DMA 437 * configuration setup, because the (unsafe) DMA can only be 438 * triggered by QPU_W_VPM_ADDR writes. 439 */ 440 return true; 441 442 case QPU_W_TLB_STENCIL_SETUP: 443 return true; 444 } 445 446 return true; 447 } 448 449 static void 450 track_live_clamps(struct vc4_validated_shader_info *validated_shader, 451 struct vc4_shader_validation_state *validation_state) 452 { 453 uint64_t inst = validation_state->shader[validation_state->ip]; 454 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 455 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 456 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 457 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 458 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 459 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 460 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 461 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 462 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 463 bool ws = inst & QPU_WS; 464 uint32_t lri_add_a, lri_add, lri_mul; 465 bool add_a_is_min_0; 466 467 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 468 * before we clear previous live state. 469 */ 470 lri_add_a = raddr_add_a_to_live_reg_index(inst); 471 add_a_is_min_0 = (lri_add_a != ~0 && 472 validation_state->live_max_clamp_regs[lri_add_a]); 473 474 /* Clear live state for registers written by our instruction. */ 475 lri_add = waddr_to_live_reg_index(waddr_add, ws); 476 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 477 if (lri_mul != ~0) { 478 validation_state->live_max_clamp_regs[lri_mul] = false; 479 validation_state->live_min_clamp_offsets[lri_mul] = ~0; 480 } 481 if (lri_add != ~0) { 482 validation_state->live_max_clamp_regs[lri_add] = false; 483 validation_state->live_min_clamp_offsets[lri_add] = ~0; 484 } else { 485 /* Nothing further to do for live tracking, since only ADDs 486 * generate new live clamp registers. 487 */ 488 return; 489 } 490 491 /* Now, handle remaining live clamp tracking for the ADD operation. */ 492 493 if (cond_add != QPU_COND_ALWAYS) 494 return; 495 496 if (op_add == QPU_A_MAX) { 497 /* Track live clamps of a value to a minimum of 0 (in either 498 * arg). 499 */ 500 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 501 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 502 return; 503 } 504 505 validation_state->live_max_clamp_regs[lri_add] = true; 506 } else if (op_add == QPU_A_MIN) { 507 /* Track live clamps of a value clamped to a minimum of 0 and 508 * a maximum of some uniform's offset. 509 */ 510 if (!add_a_is_min_0) 511 return; 512 513 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 514 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 515 sig != QPU_SIG_SMALL_IMM)) { 516 return; 517 } 518 519 validation_state->live_min_clamp_offsets[lri_add] = 520 validated_shader->uniforms_size; 521 } 522 } 523 524 static bool 525 check_instruction_writes(struct vc4_validated_shader_info *validated_shader, 526 struct vc4_shader_validation_state *validation_state) 527 { 528 uint64_t inst = validation_state->shader[validation_state->ip]; 529 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 530 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 531 bool ok; 532 533 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 534 DRM_ERROR("ADD and MUL both set up textures\n"); 535 return false; 536 } 537 538 ok = (check_reg_write(validated_shader, validation_state, false) && 539 check_reg_write(validated_shader, validation_state, true)); 540 541 track_live_clamps(validated_shader, validation_state); 542 543 return ok; 544 } 545 546 static bool 547 check_branch(uint64_t inst, 548 struct vc4_validated_shader_info *validated_shader, 549 struct vc4_shader_validation_state *validation_state, 550 int ip) 551 { 552 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 553 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 554 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 555 556 if ((int)branch_imm < 0) 557 validation_state->needs_uniform_address_for_loop = true; 558 559 /* We don't want to have to worry about validation of this, and 560 * there's no need for it. 561 */ 562 if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 563 DRM_ERROR("branch instruction at %d wrote a register.\n", 564 validation_state->ip); 565 return false; 566 } 567 568 return true; 569 } 570 571 static bool 572 check_instruction_reads(struct vc4_validated_shader_info *validated_shader, 573 struct vc4_shader_validation_state *validation_state) 574 { 575 uint64_t inst = validation_state->shader[validation_state->ip]; 576 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 577 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 578 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 579 580 if (raddr_a == QPU_R_UNIF || 581 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 582 /* This can't overflow the uint32_t, because we're reading 8 583 * bytes of instruction to increment by 4 here, so we'd 584 * already be OOM. 585 */ 586 validated_shader->uniforms_size += 4; 587 588 if (validation_state->needs_uniform_address_update) { 589 DRM_ERROR("Uniform read with undefined uniform " 590 "address\n"); 591 return false; 592 } 593 } 594 595 return true; 596 } 597 598 /* Make sure that all branches are absolute and point within the shader, and 599 * note their targets for later. 600 */ 601 static bool 602 vc4_validate_branches(struct vc4_shader_validation_state *validation_state) 603 { 604 uint32_t max_branch_target = 0; 605 bool found_shader_end = false; 606 int ip; 607 int shader_end_ip = 0; 608 int last_branch = -2; 609 610 for (ip = 0; ip < validation_state->max_ip; ip++) { 611 uint64_t inst = validation_state->shader[ip]; 612 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 613 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 614 uint32_t after_delay_ip = ip + 4; 615 uint32_t branch_target_ip; 616 617 if (sig == QPU_SIG_PROG_END) { 618 shader_end_ip = ip; 619 found_shader_end = true; 620 continue; 621 } 622 623 if (sig != QPU_SIG_BRANCH) 624 continue; 625 626 if (ip - last_branch < 4) { 627 DRM_ERROR("Branch at %d during delay slots\n", ip); 628 return false; 629 } 630 last_branch = ip; 631 632 if (inst & QPU_BRANCH_REG) { 633 DRM_ERROR("branching from register relative " 634 "not supported\n"); 635 return false; 636 } 637 638 if (!(inst & QPU_BRANCH_REL)) { 639 DRM_ERROR("relative branching required\n"); 640 return false; 641 } 642 643 /* The actual branch target is the instruction after the delay 644 * slots, plus whatever byte offset is in the low 32 bits of 645 * the instruction. Make sure we're not branching beyond the 646 * end of the shader object. 647 */ 648 if (branch_imm % sizeof(inst) != 0) { 649 DRM_ERROR("branch target not aligned\n"); 650 return false; 651 } 652 653 branch_target_ip = after_delay_ip + (branch_imm >> 3); 654 if (branch_target_ip >= validation_state->max_ip) { 655 DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", 656 ip, branch_target_ip, 657 validation_state->max_ip); 658 return false; 659 } 660 set_bit(branch_target_ip, validation_state->branch_targets); 661 662 /* Make sure that the non-branching path is also not outside 663 * the shader. 664 */ 665 if (after_delay_ip >= validation_state->max_ip) { 666 DRM_ERROR("Branch at %d continues past shader end " 667 "(%d/%d)\n", 668 ip, after_delay_ip, validation_state->max_ip); 669 return false; 670 } 671 set_bit(after_delay_ip, validation_state->branch_targets); 672 max_branch_target = max(max_branch_target, after_delay_ip); 673 674 /* There are two delay slots after program end is signaled 675 * that are still executed, then we're finished. 676 */ 677 if (found_shader_end && ip == shader_end_ip + 2) 678 break; 679 } 680 681 if (max_branch_target > shader_end_ip) { 682 DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); 683 return false; 684 } 685 686 return true; 687 } 688 689 /* Resets any known state for the shader, used when we may be branched to from 690 * multiple locations in the program (or at shader start). 691 */ 692 static void 693 reset_validation_state(struct vc4_shader_validation_state *validation_state) 694 { 695 int i; 696 697 for (i = 0; i < 8; i++) 698 validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 699 700 for (i = 0; i < LIVE_REG_COUNT; i++) { 701 validation_state->live_min_clamp_offsets[i] = ~0; 702 validation_state->live_max_clamp_regs[i] = false; 703 validation_state->live_immediates[i] = ~0; 704 } 705 } 706 707 static bool 708 texturing_in_progress(struct vc4_shader_validation_state *validation_state) 709 { 710 return (validation_state->tmu_write_count[0] != 0 || 711 validation_state->tmu_write_count[1] != 0); 712 } 713 714 static bool 715 vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 716 { 717 uint32_t ip = validation_state->ip; 718 719 if (!test_bit(ip, validation_state->branch_targets)) 720 return true; 721 722 if (texturing_in_progress(validation_state)) { 723 DRM_ERROR("Branch target landed during TMU setup\n"); 724 return false; 725 } 726 727 /* Reset our live values tracking, since this instruction may have 728 * multiple predecessors. 729 * 730 * One could potentially do analysis to determine that, for 731 * example, all predecessors have a live max clamp in the same 732 * register, but we don't bother with that. 733 */ 734 reset_validation_state(validation_state); 735 736 /* Since we've entered a basic block from potentially multiple 737 * predecessors, we need the uniforms address to be updated before any 738 * unforms are read. We require that after any branch point, the next 739 * uniform to be loaded is a uniform address offset. That uniform's 740 * offset will be marked by the uniform address register write 741 * validation, or a one-off the end-of-program check. 742 */ 743 validation_state->needs_uniform_address_update = true; 744 745 return true; 746 } 747 748 struct vc4_validated_shader_info * 749 vc4_validate_shader(struct drm_gem_cma_object *shader_obj) 750 { 751 bool found_shader_end = false; 752 int shader_end_ip = 0; 753 uint32_t ip; 754 struct vc4_validated_shader_info *validated_shader = NULL; 755 struct vc4_shader_validation_state validation_state; 756 757 memset(&validation_state, 0, sizeof(validation_state)); 758 validation_state.shader = shader_obj->vaddr; 759 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 760 761 reset_validation_state(&validation_state); 762 763 validation_state.branch_targets = 764 kcalloc(BITS_TO_LONGS(validation_state.max_ip), 765 sizeof(unsigned long), GFP_KERNEL); 766 if (!validation_state.branch_targets) 767 goto fail; 768 769 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 770 if (!validated_shader) 771 goto fail; 772 773 if (!vc4_validate_branches(&validation_state)) 774 goto fail; 775 776 for (ip = 0; ip < validation_state.max_ip; ip++) { 777 uint64_t inst = validation_state.shader[ip]; 778 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 779 780 validation_state.ip = ip; 781 782 if (!vc4_handle_branch_target(&validation_state)) 783 goto fail; 784 785 switch (sig) { 786 case QPU_SIG_NONE: 787 case QPU_SIG_WAIT_FOR_SCOREBOARD: 788 case QPU_SIG_SCOREBOARD_UNLOCK: 789 case QPU_SIG_COLOR_LOAD: 790 case QPU_SIG_LOAD_TMU0: 791 case QPU_SIG_LOAD_TMU1: 792 case QPU_SIG_PROG_END: 793 case QPU_SIG_SMALL_IMM: 794 if (!check_instruction_writes(validated_shader, 795 &validation_state)) { 796 DRM_ERROR("Bad write at ip %d\n", ip); 797 goto fail; 798 } 799 800 if (!check_instruction_reads(validated_shader, 801 &validation_state)) 802 goto fail; 803 804 if (sig == QPU_SIG_PROG_END) { 805 found_shader_end = true; 806 shader_end_ip = ip; 807 } 808 809 break; 810 811 case QPU_SIG_LOAD_IMM: 812 if (!check_instruction_writes(validated_shader, 813 &validation_state)) { 814 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); 815 goto fail; 816 } 817 break; 818 819 case QPU_SIG_BRANCH: 820 if (!check_branch(inst, validated_shader, 821 &validation_state, ip)) 822 goto fail; 823 break; 824 default: 825 DRM_ERROR("Unsupported QPU signal %d at " 826 "instruction %d\n", sig, ip); 827 goto fail; 828 } 829 830 /* There are two delay slots after program end is signaled 831 * that are still executed, then we're finished. 832 */ 833 if (found_shader_end && ip == shader_end_ip + 2) 834 break; 835 } 836 837 if (ip == validation_state.max_ip) { 838 DRM_ERROR("shader failed to terminate before " 839 "shader BO end at %zd\n", 840 shader_obj->base.size); 841 goto fail; 842 } 843 844 /* If we did a backwards branch and we haven't emitted a uniforms 845 * reset since then, we still need the uniforms stream to have the 846 * uniforms address available so that the backwards branch can do its 847 * uniforms reset. 848 * 849 * We could potentially prove that the backwards branch doesn't 850 * contain any uses of uniforms until program exit, but that doesn't 851 * seem to be worth the trouble. 852 */ 853 if (validation_state.needs_uniform_address_for_loop) { 854 if (!require_uniform_address_uniform(validated_shader)) 855 goto fail; 856 validated_shader->uniforms_size += 4; 857 } 858 859 /* Again, no chance of integer overflow here because the worst case 860 * scenario is 8 bytes of uniforms plus handles per 8-byte 861 * instruction. 862 */ 863 validated_shader->uniforms_src_size = 864 (validated_shader->uniforms_size + 865 4 * validated_shader->num_texture_samples); 866 867 kfree(validation_state.branch_targets); 868 869 return validated_shader; 870 871 fail: 872 kfree(validation_state.branch_targets); 873 if (validated_shader) { 874 kfree(validated_shader->texture_samples); 875 kfree(validated_shader); 876 } 877 return NULL; 878 } 879