1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * DOC: Shader validator for VC4. 26 * 27 * The VC4 has no IOMMU between it and system memory, so a user with 28 * access to execute shaders could escalate privilege by overwriting 29 * system memory (using the VPM write address register in the 30 * general-purpose DMA mode) or reading system memory it shouldn't 31 * (reading it as a texture, or uniform data, or vertex data). 32 * 33 * This walks over a shader BO, ensuring that its accesses are 34 * appropriately bounded, and recording how many texture accesses are 35 * made and where so that we can do relocations for them in the 36 * uniform stream. 37 */ 38 39 #include "vc4_drv.h" 40 #include "vc4_qpu_defines.h" 41 42 #define LIVE_REG_COUNT (32 + 32 + 4) 43 44 struct vc4_shader_validation_state { 45 /* Current IP being validated. */ 46 uint32_t ip; 47 48 /* IP at the end of the BO, do not read shader[max_ip] */ 49 uint32_t max_ip; 50 51 uint64_t *shader; 52 53 struct vc4_texture_sample_info tmu_setup[2]; 54 int tmu_write_count[2]; 55 56 /* For registers that were last written to by a MIN instruction with 57 * one argument being a uniform, the address of the uniform. 58 * Otherwise, ~0. 59 * 60 * This is used for the validation of direct address memory reads. 61 */ 62 uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 63 bool live_max_clamp_regs[LIVE_REG_COUNT]; 64 uint32_t live_immediates[LIVE_REG_COUNT]; 65 66 /* Bitfield of which IPs are used as branch targets. 67 * 68 * Used for validation that the uniform stream is updated at the right 69 * points and clearing the texturing/clamping state. 70 */ 71 unsigned long *branch_targets; 72 73 /* Set when entering a basic block, and cleared when the uniform 74 * address update is found. This is used to make sure that we don't 75 * read uniforms when the address is undefined. 76 */ 77 bool needs_uniform_address_update; 78 79 /* Set when we find a backwards branch. If the branch is backwards, 80 * the taraget is probably doing an address reset to read uniforms, 81 * and so we need to be sure that a uniforms address is present in the 82 * stream, even if the shader didn't need to read uniforms in later 83 * basic blocks. 84 */ 85 bool needs_uniform_address_for_loop; 86 }; 87 88 static uint32_t 89 waddr_to_live_reg_index(uint32_t waddr, bool is_b) 90 { 91 if (waddr < 32) { 92 if (is_b) 93 return 32 + waddr; 94 else 95 return waddr; 96 } else if (waddr <= QPU_W_ACC3) { 97 return 64 + waddr - QPU_W_ACC0; 98 } else { 99 return ~0; 100 } 101 } 102 103 static uint32_t 104 raddr_add_a_to_live_reg_index(uint64_t inst) 105 { 106 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 107 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 108 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 109 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 110 111 if (add_a == QPU_MUX_A) 112 return raddr_a; 113 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 114 return 32 + raddr_b; 115 else if (add_a <= QPU_MUX_R3) 116 return 64 + add_a; 117 else 118 return ~0; 119 } 120 121 static bool 122 is_tmu_submit(uint32_t waddr) 123 { 124 return (waddr == QPU_W_TMU0_S || 125 waddr == QPU_W_TMU1_S); 126 } 127 128 static bool 129 is_tmu_write(uint32_t waddr) 130 { 131 return (waddr >= QPU_W_TMU0_S && 132 waddr <= QPU_W_TMU1_B); 133 } 134 135 static bool 136 record_texture_sample(struct vc4_validated_shader_info *validated_shader, 137 struct vc4_shader_validation_state *validation_state, 138 int tmu) 139 { 140 uint32_t s = validated_shader->num_texture_samples; 141 int i; 142 struct vc4_texture_sample_info *temp_samples; 143 144 temp_samples = krealloc(validated_shader->texture_samples, 145 (s + 1) * sizeof(*temp_samples), 146 GFP_KERNEL); 147 if (!temp_samples) 148 return false; 149 150 memcpy(&temp_samples[s], 151 &validation_state->tmu_setup[tmu], 152 sizeof(*temp_samples)); 153 154 validated_shader->num_texture_samples = s + 1; 155 validated_shader->texture_samples = temp_samples; 156 157 for (i = 0; i < 4; i++) 158 validation_state->tmu_setup[tmu].p_offset[i] = ~0; 159 160 return true; 161 } 162 163 static bool 164 check_tmu_write(struct vc4_validated_shader_info *validated_shader, 165 struct vc4_shader_validation_state *validation_state, 166 bool is_mul) 167 { 168 uint64_t inst = validation_state->shader[validation_state->ip]; 169 uint32_t waddr = (is_mul ? 170 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 171 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 172 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 173 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 174 int tmu = waddr > QPU_W_TMU0_B; 175 bool submit = is_tmu_submit(waddr); 176 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 177 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 178 179 if (is_direct) { 180 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 181 uint32_t clamp_reg, clamp_offset; 182 183 if (sig == QPU_SIG_SMALL_IMM) { 184 DRM_ERROR("direct TMU read used small immediate\n"); 185 return false; 186 } 187 188 /* Make sure that this texture load is an add of the base 189 * address of the UBO to a clamped offset within the UBO. 190 */ 191 if (is_mul || 192 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 193 DRM_ERROR("direct TMU load wasn't an add\n"); 194 return false; 195 } 196 197 /* We assert that the clamped address is the first 198 * argument, and the UBO base address is the second argument. 199 * This is arbitrary, but simpler than supporting flipping the 200 * two either way. 201 */ 202 clamp_reg = raddr_add_a_to_live_reg_index(inst); 203 if (clamp_reg == ~0) { 204 DRM_ERROR("direct TMU load wasn't clamped\n"); 205 return false; 206 } 207 208 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 209 if (clamp_offset == ~0) { 210 DRM_ERROR("direct TMU load wasn't clamped\n"); 211 return false; 212 } 213 214 /* Store the clamp value's offset in p1 (see reloc_tex() in 215 * vc4_validate.c). 216 */ 217 validation_state->tmu_setup[tmu].p_offset[1] = 218 clamp_offset; 219 220 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 221 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 222 DRM_ERROR("direct TMU load didn't add to a uniform\n"); 223 return false; 224 } 225 226 validation_state->tmu_setup[tmu].is_direct = true; 227 } else { 228 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 229 raddr_b == QPU_R_UNIF)) { 230 DRM_ERROR("uniform read in the same instruction as " 231 "texture setup.\n"); 232 return false; 233 } 234 } 235 236 if (validation_state->tmu_write_count[tmu] >= 4) { 237 DRM_ERROR("TMU%d got too many parameters before dispatch\n", 238 tmu); 239 return false; 240 } 241 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 242 validated_shader->uniforms_size; 243 validation_state->tmu_write_count[tmu]++; 244 /* Since direct uses a RADDR uniform reference, it will get counted in 245 * check_instruction_reads() 246 */ 247 if (!is_direct) { 248 if (validation_state->needs_uniform_address_update) { 249 DRM_ERROR("Texturing with undefined uniform address\n"); 250 return false; 251 } 252 253 validated_shader->uniforms_size += 4; 254 } 255 256 if (submit) { 257 if (!record_texture_sample(validated_shader, 258 validation_state, tmu)) { 259 return false; 260 } 261 262 validation_state->tmu_write_count[tmu] = 0; 263 } 264 265 return true; 266 } 267 268 static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 269 { 270 uint32_t o = validated_shader->num_uniform_addr_offsets; 271 uint32_t num_uniforms = validated_shader->uniforms_size / 4; 272 273 validated_shader->uniform_addr_offsets = 274 krealloc(validated_shader->uniform_addr_offsets, 275 (o + 1) * 276 sizeof(*validated_shader->uniform_addr_offsets), 277 GFP_KERNEL); 278 if (!validated_shader->uniform_addr_offsets) 279 return false; 280 281 validated_shader->uniform_addr_offsets[o] = num_uniforms; 282 validated_shader->num_uniform_addr_offsets++; 283 284 return true; 285 } 286 287 static bool 288 validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 289 struct vc4_shader_validation_state *validation_state, 290 bool is_mul) 291 { 292 uint64_t inst = validation_state->shader[validation_state->ip]; 293 u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 294 u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 295 u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 296 u32 add_lri = raddr_add_a_to_live_reg_index(inst); 297 /* We want our reset to be pointing at whatever uniform follows the 298 * uniforms base address. 299 */ 300 u32 expected_offset = validated_shader->uniforms_size + 4; 301 302 /* We only support absolute uniform address changes, and we 303 * require that they be in the current basic block before any 304 * of its uniform reads. 305 * 306 * One could potentially emit more efficient QPU code, by 307 * noticing that (say) an if statement does uniform control 308 * flow for all threads and that the if reads the same number 309 * of uniforms on each side. However, this scheme is easy to 310 * validate so it's all we allow for now. 311 */ 312 switch (QPU_GET_FIELD(inst, QPU_SIG)) { 313 case QPU_SIG_NONE: 314 case QPU_SIG_SCOREBOARD_UNLOCK: 315 case QPU_SIG_COLOR_LOAD: 316 case QPU_SIG_LOAD_TMU0: 317 case QPU_SIG_LOAD_TMU1: 318 break; 319 default: 320 DRM_ERROR("uniforms address change must be " 321 "normal math\n"); 322 return false; 323 } 324 325 if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 326 DRM_ERROR("Uniform address reset must be an ADD.\n"); 327 return false; 328 } 329 330 if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 331 DRM_ERROR("Uniform address reset must be unconditional.\n"); 332 return false; 333 } 334 335 if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 336 !(inst & QPU_PM)) { 337 DRM_ERROR("No packing allowed on uniforms reset\n"); 338 return false; 339 } 340 341 if (add_lri == -1) { 342 DRM_ERROR("First argument of uniform address write must be " 343 "an immediate value.\n"); 344 return false; 345 } 346 347 if (validation_state->live_immediates[add_lri] != expected_offset) { 348 DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", 349 validation_state->live_immediates[add_lri], 350 expected_offset); 351 return false; 352 } 353 354 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 355 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 356 DRM_ERROR("Second argument of uniform address write must be " 357 "a uniform.\n"); 358 return false; 359 } 360 361 validation_state->needs_uniform_address_update = false; 362 validation_state->needs_uniform_address_for_loop = false; 363 return require_uniform_address_uniform(validated_shader); 364 } 365 366 static bool 367 check_reg_write(struct vc4_validated_shader_info *validated_shader, 368 struct vc4_shader_validation_state *validation_state, 369 bool is_mul) 370 { 371 uint64_t inst = validation_state->shader[validation_state->ip]; 372 uint32_t waddr = (is_mul ? 373 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 374 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 375 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 376 bool ws = inst & QPU_WS; 377 bool is_b = is_mul ^ ws; 378 u32 lri = waddr_to_live_reg_index(waddr, is_b); 379 380 if (lri != -1) { 381 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 382 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 383 384 if (sig == QPU_SIG_LOAD_IMM && 385 QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 386 ((is_mul && cond_mul == QPU_COND_ALWAYS) || 387 (!is_mul && cond_add == QPU_COND_ALWAYS))) { 388 validation_state->live_immediates[lri] = 389 QPU_GET_FIELD(inst, QPU_LOAD_IMM); 390 } else { 391 validation_state->live_immediates[lri] = ~0; 392 } 393 } 394 395 switch (waddr) { 396 case QPU_W_UNIFORMS_ADDRESS: 397 if (is_b) { 398 DRM_ERROR("relative uniforms address change " 399 "unsupported\n"); 400 return false; 401 } 402 403 return validate_uniform_address_write(validated_shader, 404 validation_state, 405 is_mul); 406 407 case QPU_W_TLB_COLOR_MS: 408 case QPU_W_TLB_COLOR_ALL: 409 case QPU_W_TLB_Z: 410 /* These only interact with the tile buffer, not main memory, 411 * so they're safe. 412 */ 413 return true; 414 415 case QPU_W_TMU0_S: 416 case QPU_W_TMU0_T: 417 case QPU_W_TMU0_R: 418 case QPU_W_TMU0_B: 419 case QPU_W_TMU1_S: 420 case QPU_W_TMU1_T: 421 case QPU_W_TMU1_R: 422 case QPU_W_TMU1_B: 423 return check_tmu_write(validated_shader, validation_state, 424 is_mul); 425 426 case QPU_W_HOST_INT: 427 case QPU_W_TMU_NOSWAP: 428 case QPU_W_TLB_ALPHA_MASK: 429 case QPU_W_MUTEX_RELEASE: 430 /* XXX: I haven't thought about these, so don't support them 431 * for now. 432 */ 433 DRM_ERROR("Unsupported waddr %d\n", waddr); 434 return false; 435 436 case QPU_W_VPM_ADDR: 437 DRM_ERROR("General VPM DMA unsupported\n"); 438 return false; 439 440 case QPU_W_VPM: 441 case QPU_W_VPMVCD_SETUP: 442 /* We allow VPM setup in general, even including VPM DMA 443 * configuration setup, because the (unsafe) DMA can only be 444 * triggered by QPU_W_VPM_ADDR writes. 445 */ 446 return true; 447 448 case QPU_W_TLB_STENCIL_SETUP: 449 return true; 450 } 451 452 return true; 453 } 454 455 static void 456 track_live_clamps(struct vc4_validated_shader_info *validated_shader, 457 struct vc4_shader_validation_state *validation_state) 458 { 459 uint64_t inst = validation_state->shader[validation_state->ip]; 460 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 461 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 462 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 463 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 464 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 465 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 466 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 467 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 468 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 469 bool ws = inst & QPU_WS; 470 uint32_t lri_add_a, lri_add, lri_mul; 471 bool add_a_is_min_0; 472 473 /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 474 * before we clear previous live state. 475 */ 476 lri_add_a = raddr_add_a_to_live_reg_index(inst); 477 add_a_is_min_0 = (lri_add_a != ~0 && 478 validation_state->live_max_clamp_regs[lri_add_a]); 479 480 /* Clear live state for registers written by our instruction. */ 481 lri_add = waddr_to_live_reg_index(waddr_add, ws); 482 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 483 if (lri_mul != ~0) { 484 validation_state->live_max_clamp_regs[lri_mul] = false; 485 validation_state->live_min_clamp_offsets[lri_mul] = ~0; 486 } 487 if (lri_add != ~0) { 488 validation_state->live_max_clamp_regs[lri_add] = false; 489 validation_state->live_min_clamp_offsets[lri_add] = ~0; 490 } else { 491 /* Nothing further to do for live tracking, since only ADDs 492 * generate new live clamp registers. 493 */ 494 return; 495 } 496 497 /* Now, handle remaining live clamp tracking for the ADD operation. */ 498 499 if (cond_add != QPU_COND_ALWAYS) 500 return; 501 502 if (op_add == QPU_A_MAX) { 503 /* Track live clamps of a value to a minimum of 0 (in either 504 * arg). 505 */ 506 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 507 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 508 return; 509 } 510 511 validation_state->live_max_clamp_regs[lri_add] = true; 512 } else if (op_add == QPU_A_MIN) { 513 /* Track live clamps of a value clamped to a minimum of 0 and 514 * a maximum of some uniform's offset. 515 */ 516 if (!add_a_is_min_0) 517 return; 518 519 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 520 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 521 sig != QPU_SIG_SMALL_IMM)) { 522 return; 523 } 524 525 validation_state->live_min_clamp_offsets[lri_add] = 526 validated_shader->uniforms_size; 527 } 528 } 529 530 static bool 531 check_instruction_writes(struct vc4_validated_shader_info *validated_shader, 532 struct vc4_shader_validation_state *validation_state) 533 { 534 uint64_t inst = validation_state->shader[validation_state->ip]; 535 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 536 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 537 bool ok; 538 539 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 540 DRM_ERROR("ADD and MUL both set up textures\n"); 541 return false; 542 } 543 544 ok = (check_reg_write(validated_shader, validation_state, false) && 545 check_reg_write(validated_shader, validation_state, true)); 546 547 track_live_clamps(validated_shader, validation_state); 548 549 return ok; 550 } 551 552 static bool 553 check_branch(uint64_t inst, 554 struct vc4_validated_shader_info *validated_shader, 555 struct vc4_shader_validation_state *validation_state, 556 int ip) 557 { 558 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 559 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 560 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 561 562 if ((int)branch_imm < 0) 563 validation_state->needs_uniform_address_for_loop = true; 564 565 /* We don't want to have to worry about validation of this, and 566 * there's no need for it. 567 */ 568 if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 569 DRM_ERROR("branch instruction at %d wrote a register.\n", 570 validation_state->ip); 571 return false; 572 } 573 574 return true; 575 } 576 577 static bool 578 check_instruction_reads(struct vc4_validated_shader_info *validated_shader, 579 struct vc4_shader_validation_state *validation_state) 580 { 581 uint64_t inst = validation_state->shader[validation_state->ip]; 582 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 583 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 584 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 585 586 if (raddr_a == QPU_R_UNIF || 587 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 588 /* This can't overflow the uint32_t, because we're reading 8 589 * bytes of instruction to increment by 4 here, so we'd 590 * already be OOM. 591 */ 592 validated_shader->uniforms_size += 4; 593 594 if (validation_state->needs_uniform_address_update) { 595 DRM_ERROR("Uniform read with undefined uniform " 596 "address\n"); 597 return false; 598 } 599 } 600 601 return true; 602 } 603 604 /* Make sure that all branches are absolute and point within the shader, and 605 * note their targets for later. 606 */ 607 static bool 608 vc4_validate_branches(struct vc4_shader_validation_state *validation_state) 609 { 610 uint32_t max_branch_target = 0; 611 bool found_shader_end = false; 612 int ip; 613 int shader_end_ip = 0; 614 int last_branch = -2; 615 616 for (ip = 0; ip < validation_state->max_ip; ip++) { 617 uint64_t inst = validation_state->shader[ip]; 618 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 619 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 620 uint32_t after_delay_ip = ip + 4; 621 uint32_t branch_target_ip; 622 623 if (sig == QPU_SIG_PROG_END) { 624 shader_end_ip = ip; 625 found_shader_end = true; 626 continue; 627 } 628 629 if (sig != QPU_SIG_BRANCH) 630 continue; 631 632 if (ip - last_branch < 4) { 633 DRM_ERROR("Branch at %d during delay slots\n", ip); 634 return false; 635 } 636 last_branch = ip; 637 638 if (inst & QPU_BRANCH_REG) { 639 DRM_ERROR("branching from register relative " 640 "not supported\n"); 641 return false; 642 } 643 644 if (!(inst & QPU_BRANCH_REL)) { 645 DRM_ERROR("relative branching required\n"); 646 return false; 647 } 648 649 /* The actual branch target is the instruction after the delay 650 * slots, plus whatever byte offset is in the low 32 bits of 651 * the instruction. Make sure we're not branching beyond the 652 * end of the shader object. 653 */ 654 if (branch_imm % sizeof(inst) != 0) { 655 DRM_ERROR("branch target not aligned\n"); 656 return false; 657 } 658 659 branch_target_ip = after_delay_ip + (branch_imm >> 3); 660 if (branch_target_ip >= validation_state->max_ip) { 661 DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", 662 ip, branch_target_ip, 663 validation_state->max_ip); 664 return false; 665 } 666 set_bit(branch_target_ip, validation_state->branch_targets); 667 668 /* Make sure that the non-branching path is also not outside 669 * the shader. 670 */ 671 if (after_delay_ip >= validation_state->max_ip) { 672 DRM_ERROR("Branch at %d continues past shader end " 673 "(%d/%d)\n", 674 ip, after_delay_ip, validation_state->max_ip); 675 return false; 676 } 677 set_bit(after_delay_ip, validation_state->branch_targets); 678 max_branch_target = max(max_branch_target, after_delay_ip); 679 680 /* There are two delay slots after program end is signaled 681 * that are still executed, then we're finished. 682 */ 683 if (found_shader_end && ip == shader_end_ip + 2) 684 break; 685 } 686 687 if (max_branch_target > shader_end_ip) { 688 DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); 689 return false; 690 } 691 692 return true; 693 } 694 695 /* Resets any known state for the shader, used when we may be branched to from 696 * multiple locations in the program (or at shader start). 697 */ 698 static void 699 reset_validation_state(struct vc4_shader_validation_state *validation_state) 700 { 701 int i; 702 703 for (i = 0; i < 8; i++) 704 validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 705 706 for (i = 0; i < LIVE_REG_COUNT; i++) { 707 validation_state->live_min_clamp_offsets[i] = ~0; 708 validation_state->live_max_clamp_regs[i] = false; 709 validation_state->live_immediates[i] = ~0; 710 } 711 } 712 713 static bool 714 texturing_in_progress(struct vc4_shader_validation_state *validation_state) 715 { 716 return (validation_state->tmu_write_count[0] != 0 || 717 validation_state->tmu_write_count[1] != 0); 718 } 719 720 static bool 721 vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 722 { 723 uint32_t ip = validation_state->ip; 724 725 if (!test_bit(ip, validation_state->branch_targets)) 726 return true; 727 728 if (texturing_in_progress(validation_state)) { 729 DRM_ERROR("Branch target landed during TMU setup\n"); 730 return false; 731 } 732 733 /* Reset our live values tracking, since this instruction may have 734 * multiple predecessors. 735 * 736 * One could potentially do analysis to determine that, for 737 * example, all predecessors have a live max clamp in the same 738 * register, but we don't bother with that. 739 */ 740 reset_validation_state(validation_state); 741 742 /* Since we've entered a basic block from potentially multiple 743 * predecessors, we need the uniforms address to be updated before any 744 * unforms are read. We require that after any branch point, the next 745 * uniform to be loaded is a uniform address offset. That uniform's 746 * offset will be marked by the uniform address register write 747 * validation, or a one-off the end-of-program check. 748 */ 749 validation_state->needs_uniform_address_update = true; 750 751 return true; 752 } 753 754 struct vc4_validated_shader_info * 755 vc4_validate_shader(struct drm_gem_cma_object *shader_obj) 756 { 757 bool found_shader_end = false; 758 int shader_end_ip = 0; 759 uint32_t ip; 760 struct vc4_validated_shader_info *validated_shader = NULL; 761 struct vc4_shader_validation_state validation_state; 762 763 memset(&validation_state, 0, sizeof(validation_state)); 764 validation_state.shader = shader_obj->vaddr; 765 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 766 767 reset_validation_state(&validation_state); 768 769 validation_state.branch_targets = 770 kcalloc(BITS_TO_LONGS(validation_state.max_ip), 771 sizeof(unsigned long), GFP_KERNEL); 772 if (!validation_state.branch_targets) 773 goto fail; 774 775 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 776 if (!validated_shader) 777 goto fail; 778 779 if (!vc4_validate_branches(&validation_state)) 780 goto fail; 781 782 for (ip = 0; ip < validation_state.max_ip; ip++) { 783 uint64_t inst = validation_state.shader[ip]; 784 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 785 786 validation_state.ip = ip; 787 788 if (!vc4_handle_branch_target(&validation_state)) 789 goto fail; 790 791 switch (sig) { 792 case QPU_SIG_NONE: 793 case QPU_SIG_WAIT_FOR_SCOREBOARD: 794 case QPU_SIG_SCOREBOARD_UNLOCK: 795 case QPU_SIG_COLOR_LOAD: 796 case QPU_SIG_LOAD_TMU0: 797 case QPU_SIG_LOAD_TMU1: 798 case QPU_SIG_PROG_END: 799 case QPU_SIG_SMALL_IMM: 800 if (!check_instruction_writes(validated_shader, 801 &validation_state)) { 802 DRM_ERROR("Bad write at ip %d\n", ip); 803 goto fail; 804 } 805 806 if (!check_instruction_reads(validated_shader, 807 &validation_state)) 808 goto fail; 809 810 if (sig == QPU_SIG_PROG_END) { 811 found_shader_end = true; 812 shader_end_ip = ip; 813 } 814 815 break; 816 817 case QPU_SIG_LOAD_IMM: 818 if (!check_instruction_writes(validated_shader, 819 &validation_state)) { 820 DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); 821 goto fail; 822 } 823 break; 824 825 case QPU_SIG_BRANCH: 826 if (!check_branch(inst, validated_shader, 827 &validation_state, ip)) 828 goto fail; 829 break; 830 default: 831 DRM_ERROR("Unsupported QPU signal %d at " 832 "instruction %d\n", sig, ip); 833 goto fail; 834 } 835 836 /* There are two delay slots after program end is signaled 837 * that are still executed, then we're finished. 838 */ 839 if (found_shader_end && ip == shader_end_ip + 2) 840 break; 841 } 842 843 if (ip == validation_state.max_ip) { 844 DRM_ERROR("shader failed to terminate before " 845 "shader BO end at %zd\n", 846 shader_obj->base.size); 847 goto fail; 848 } 849 850 /* If we did a backwards branch and we haven't emitted a uniforms 851 * reset since then, we still need the uniforms stream to have the 852 * uniforms address available so that the backwards branch can do its 853 * uniforms reset. 854 * 855 * We could potentially prove that the backwards branch doesn't 856 * contain any uses of uniforms until program exit, but that doesn't 857 * seem to be worth the trouble. 858 */ 859 if (validation_state.needs_uniform_address_for_loop) { 860 if (!require_uniform_address_uniform(validated_shader)) 861 goto fail; 862 validated_shader->uniforms_size += 4; 863 } 864 865 /* Again, no chance of integer overflow here because the worst case 866 * scenario is 8 bytes of uniforms plus handles per 8-byte 867 * instruction. 868 */ 869 validated_shader->uniforms_src_size = 870 (validated_shader->uniforms_size + 871 4 * validated_shader->num_texture_samples); 872 873 kfree(validation_state.branch_targets); 874 875 return validated_shader; 876 877 fail: 878 kfree(validation_state.branch_targets); 879 if (validated_shader) { 880 kfree(validated_shader->texture_samples); 881 kfree(validated_shader); 882 } 883 return NULL; 884 } 885