1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gem/i915_gem_lmem.h" 7 8 #include "gen8_engine_cs.h" 9 #include "i915_drv.h" 10 #include "i915_perf.h" 11 #include "intel_engine.h" 12 #include "intel_gpu_commands.h" 13 #include "intel_gt.h" 14 #include "intel_lrc.h" 15 #include "intel_lrc_reg.h" 16 #include "intel_ring.h" 17 #include "shmem_utils.h" 18 19 static void set_offsets(u32 *regs, 20 const u8 *data, 21 const struct intel_engine_cs *engine, 22 bool close) 23 #define NOP(x) (BIT(7) | (x)) 24 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 25 #define POSTED BIT(0) 26 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 27 #define REG16(x) \ 28 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 29 (((x) >> 2) & 0x7f) 30 #define END 0 31 { 32 const u32 base = engine->mmio_base; 33 34 while (*data) { 35 u8 count, flags; 36 37 if (*data & BIT(7)) { /* skip */ 38 count = *data++ & ~BIT(7); 39 regs += count; 40 continue; 41 } 42 43 count = *data & 0x3f; 44 flags = *data >> 6; 45 data++; 46 47 *regs = MI_LOAD_REGISTER_IMM(count); 48 if (flags & POSTED) 49 *regs |= MI_LRI_FORCE_POSTED; 50 if (GRAPHICS_VER(engine->i915) >= 11) 51 *regs |= MI_LRI_LRM_CS_MMIO; 52 regs++; 53 54 GEM_BUG_ON(!count); 55 do { 56 u32 offset = 0; 57 u8 v; 58 59 do { 60 v = *data++; 61 offset <<= 7; 62 offset |= v & ~BIT(7); 63 } while (v & BIT(7)); 64 65 regs[0] = base + (offset << 2); 66 regs += 2; 67 } while (--count); 68 } 69 70 if (close) { 71 /* Close the batch; used mainly by live_lrc_layout() */ 72 *regs = MI_BATCH_BUFFER_END; 73 if (GRAPHICS_VER(engine->i915) >= 11) 74 *regs |= BIT(0); 75 } 76 } 77 78 static const u8 gen8_xcs_offsets[] = { 79 NOP(1), 80 LRI(11, 0), 81 REG16(0x244), 82 REG(0x034), 83 REG(0x030), 84 REG(0x038), 85 REG(0x03c), 86 REG(0x168), 87 REG(0x140), 88 REG(0x110), 89 REG(0x11c), 90 REG(0x114), 91 REG(0x118), 92 93 NOP(9), 94 LRI(9, 0), 95 REG16(0x3a8), 96 REG16(0x28c), 97 REG16(0x288), 98 REG16(0x284), 99 REG16(0x280), 100 REG16(0x27c), 101 REG16(0x278), 102 REG16(0x274), 103 REG16(0x270), 104 105 NOP(13), 106 LRI(2, 0), 107 REG16(0x200), 108 REG(0x028), 109 110 END 111 }; 112 113 static const u8 gen9_xcs_offsets[] = { 114 NOP(1), 115 LRI(14, POSTED), 116 REG16(0x244), 117 REG(0x034), 118 REG(0x030), 119 REG(0x038), 120 REG(0x03c), 121 REG(0x168), 122 REG(0x140), 123 REG(0x110), 124 REG(0x11c), 125 REG(0x114), 126 REG(0x118), 127 REG(0x1c0), 128 REG(0x1c4), 129 REG(0x1c8), 130 131 NOP(3), 132 LRI(9, POSTED), 133 REG16(0x3a8), 134 REG16(0x28c), 135 REG16(0x288), 136 REG16(0x284), 137 REG16(0x280), 138 REG16(0x27c), 139 REG16(0x278), 140 REG16(0x274), 141 REG16(0x270), 142 143 NOP(13), 144 LRI(1, POSTED), 145 REG16(0x200), 146 147 NOP(13), 148 LRI(44, POSTED), 149 REG(0x028), 150 REG(0x09c), 151 REG(0x0c0), 152 REG(0x178), 153 REG(0x17c), 154 REG16(0x358), 155 REG(0x170), 156 REG(0x150), 157 REG(0x154), 158 REG(0x158), 159 REG16(0x41c), 160 REG16(0x600), 161 REG16(0x604), 162 REG16(0x608), 163 REG16(0x60c), 164 REG16(0x610), 165 REG16(0x614), 166 REG16(0x618), 167 REG16(0x61c), 168 REG16(0x620), 169 REG16(0x624), 170 REG16(0x628), 171 REG16(0x62c), 172 REG16(0x630), 173 REG16(0x634), 174 REG16(0x638), 175 REG16(0x63c), 176 REG16(0x640), 177 REG16(0x644), 178 REG16(0x648), 179 REG16(0x64c), 180 REG16(0x650), 181 REG16(0x654), 182 REG16(0x658), 183 REG16(0x65c), 184 REG16(0x660), 185 REG16(0x664), 186 REG16(0x668), 187 REG16(0x66c), 188 REG16(0x670), 189 REG16(0x674), 190 REG16(0x678), 191 REG16(0x67c), 192 REG(0x068), 193 194 END 195 }; 196 197 static const u8 gen12_xcs_offsets[] = { 198 NOP(1), 199 LRI(13, POSTED), 200 REG16(0x244), 201 REG(0x034), 202 REG(0x030), 203 REG(0x038), 204 REG(0x03c), 205 REG(0x168), 206 REG(0x140), 207 REG(0x110), 208 REG(0x1c0), 209 REG(0x1c4), 210 REG(0x1c8), 211 REG(0x180), 212 REG16(0x2b4), 213 214 NOP(5), 215 LRI(9, POSTED), 216 REG16(0x3a8), 217 REG16(0x28c), 218 REG16(0x288), 219 REG16(0x284), 220 REG16(0x280), 221 REG16(0x27c), 222 REG16(0x278), 223 REG16(0x274), 224 REG16(0x270), 225 226 END 227 }; 228 229 static const u8 gen8_rcs_offsets[] = { 230 NOP(1), 231 LRI(14, POSTED), 232 REG16(0x244), 233 REG(0x034), 234 REG(0x030), 235 REG(0x038), 236 REG(0x03c), 237 REG(0x168), 238 REG(0x140), 239 REG(0x110), 240 REG(0x11c), 241 REG(0x114), 242 REG(0x118), 243 REG(0x1c0), 244 REG(0x1c4), 245 REG(0x1c8), 246 247 NOP(3), 248 LRI(9, POSTED), 249 REG16(0x3a8), 250 REG16(0x28c), 251 REG16(0x288), 252 REG16(0x284), 253 REG16(0x280), 254 REG16(0x27c), 255 REG16(0x278), 256 REG16(0x274), 257 REG16(0x270), 258 259 NOP(13), 260 LRI(1, 0), 261 REG(0x0c8), 262 263 END 264 }; 265 266 static const u8 gen9_rcs_offsets[] = { 267 NOP(1), 268 LRI(14, POSTED), 269 REG16(0x244), 270 REG(0x34), 271 REG(0x30), 272 REG(0x38), 273 REG(0x3c), 274 REG(0x168), 275 REG(0x140), 276 REG(0x110), 277 REG(0x11c), 278 REG(0x114), 279 REG(0x118), 280 REG(0x1c0), 281 REG(0x1c4), 282 REG(0x1c8), 283 284 NOP(3), 285 LRI(9, POSTED), 286 REG16(0x3a8), 287 REG16(0x28c), 288 REG16(0x288), 289 REG16(0x284), 290 REG16(0x280), 291 REG16(0x27c), 292 REG16(0x278), 293 REG16(0x274), 294 REG16(0x270), 295 296 NOP(13), 297 LRI(1, 0), 298 REG(0xc8), 299 300 NOP(13), 301 LRI(44, POSTED), 302 REG(0x28), 303 REG(0x9c), 304 REG(0xc0), 305 REG(0x178), 306 REG(0x17c), 307 REG16(0x358), 308 REG(0x170), 309 REG(0x150), 310 REG(0x154), 311 REG(0x158), 312 REG16(0x41c), 313 REG16(0x600), 314 REG16(0x604), 315 REG16(0x608), 316 REG16(0x60c), 317 REG16(0x610), 318 REG16(0x614), 319 REG16(0x618), 320 REG16(0x61c), 321 REG16(0x620), 322 REG16(0x624), 323 REG16(0x628), 324 REG16(0x62c), 325 REG16(0x630), 326 REG16(0x634), 327 REG16(0x638), 328 REG16(0x63c), 329 REG16(0x640), 330 REG16(0x644), 331 REG16(0x648), 332 REG16(0x64c), 333 REG16(0x650), 334 REG16(0x654), 335 REG16(0x658), 336 REG16(0x65c), 337 REG16(0x660), 338 REG16(0x664), 339 REG16(0x668), 340 REG16(0x66c), 341 REG16(0x670), 342 REG16(0x674), 343 REG16(0x678), 344 REG16(0x67c), 345 REG(0x68), 346 347 END 348 }; 349 350 static const u8 gen11_rcs_offsets[] = { 351 NOP(1), 352 LRI(15, POSTED), 353 REG16(0x244), 354 REG(0x034), 355 REG(0x030), 356 REG(0x038), 357 REG(0x03c), 358 REG(0x168), 359 REG(0x140), 360 REG(0x110), 361 REG(0x11c), 362 REG(0x114), 363 REG(0x118), 364 REG(0x1c0), 365 REG(0x1c4), 366 REG(0x1c8), 367 REG(0x180), 368 369 NOP(1), 370 LRI(9, POSTED), 371 REG16(0x3a8), 372 REG16(0x28c), 373 REG16(0x288), 374 REG16(0x284), 375 REG16(0x280), 376 REG16(0x27c), 377 REG16(0x278), 378 REG16(0x274), 379 REG16(0x270), 380 381 LRI(1, POSTED), 382 REG(0x1b0), 383 384 NOP(10), 385 LRI(1, 0), 386 REG(0x0c8), 387 388 END 389 }; 390 391 static const u8 gen12_rcs_offsets[] = { 392 NOP(1), 393 LRI(13, POSTED), 394 REG16(0x244), 395 REG(0x034), 396 REG(0x030), 397 REG(0x038), 398 REG(0x03c), 399 REG(0x168), 400 REG(0x140), 401 REG(0x110), 402 REG(0x1c0), 403 REG(0x1c4), 404 REG(0x1c8), 405 REG(0x180), 406 REG16(0x2b4), 407 408 NOP(5), 409 LRI(9, POSTED), 410 REG16(0x3a8), 411 REG16(0x28c), 412 REG16(0x288), 413 REG16(0x284), 414 REG16(0x280), 415 REG16(0x27c), 416 REG16(0x278), 417 REG16(0x274), 418 REG16(0x270), 419 420 LRI(3, POSTED), 421 REG(0x1b0), 422 REG16(0x5a8), 423 REG16(0x5ac), 424 425 NOP(6), 426 LRI(1, 0), 427 REG(0x0c8), 428 NOP(3 + 9 + 1), 429 430 LRI(51, POSTED), 431 REG16(0x588), 432 REG16(0x588), 433 REG16(0x588), 434 REG16(0x588), 435 REG16(0x588), 436 REG16(0x588), 437 REG(0x028), 438 REG(0x09c), 439 REG(0x0c0), 440 REG(0x178), 441 REG(0x17c), 442 REG16(0x358), 443 REG(0x170), 444 REG(0x150), 445 REG(0x154), 446 REG(0x158), 447 REG16(0x41c), 448 REG16(0x600), 449 REG16(0x604), 450 REG16(0x608), 451 REG16(0x60c), 452 REG16(0x610), 453 REG16(0x614), 454 REG16(0x618), 455 REG16(0x61c), 456 REG16(0x620), 457 REG16(0x624), 458 REG16(0x628), 459 REG16(0x62c), 460 REG16(0x630), 461 REG16(0x634), 462 REG16(0x638), 463 REG16(0x63c), 464 REG16(0x640), 465 REG16(0x644), 466 REG16(0x648), 467 REG16(0x64c), 468 REG16(0x650), 469 REG16(0x654), 470 REG16(0x658), 471 REG16(0x65c), 472 REG16(0x660), 473 REG16(0x664), 474 REG16(0x668), 475 REG16(0x66c), 476 REG16(0x670), 477 REG16(0x674), 478 REG16(0x678), 479 REG16(0x67c), 480 REG(0x068), 481 REG(0x084), 482 NOP(1), 483 484 END 485 }; 486 487 static const u8 xehp_rcs_offsets[] = { 488 NOP(1), 489 LRI(13, POSTED), 490 REG16(0x244), 491 REG(0x034), 492 REG(0x030), 493 REG(0x038), 494 REG(0x03c), 495 REG(0x168), 496 REG(0x140), 497 REG(0x110), 498 REG(0x1c0), 499 REG(0x1c4), 500 REG(0x1c8), 501 REG(0x180), 502 REG16(0x2b4), 503 504 NOP(5), 505 LRI(9, POSTED), 506 REG16(0x3a8), 507 REG16(0x28c), 508 REG16(0x288), 509 REG16(0x284), 510 REG16(0x280), 511 REG16(0x27c), 512 REG16(0x278), 513 REG16(0x274), 514 REG16(0x270), 515 516 LRI(3, POSTED), 517 REG(0x1b0), 518 REG16(0x5a8), 519 REG16(0x5ac), 520 521 NOP(6), 522 LRI(1, 0), 523 REG(0x0c8), 524 525 END 526 }; 527 528 #undef END 529 #undef REG16 530 #undef REG 531 #undef LRI 532 #undef NOP 533 534 static const u8 *reg_offsets(const struct intel_engine_cs *engine) 535 { 536 /* 537 * The gen12+ lists only have the registers we program in the basic 538 * default state. We rely on the context image using relative 539 * addressing to automatic fixup the register state between the 540 * physical engines for virtual engine. 541 */ 542 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && 543 !intel_engine_has_relative_mmio(engine)); 544 545 if (engine->class == RENDER_CLASS) { 546 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 547 return xehp_rcs_offsets; 548 else if (GRAPHICS_VER(engine->i915) >= 12) 549 return gen12_rcs_offsets; 550 else if (GRAPHICS_VER(engine->i915) >= 11) 551 return gen11_rcs_offsets; 552 else if (GRAPHICS_VER(engine->i915) >= 9) 553 return gen9_rcs_offsets; 554 else 555 return gen8_rcs_offsets; 556 } else { 557 if (GRAPHICS_VER(engine->i915) >= 12) 558 return gen12_xcs_offsets; 559 else if (GRAPHICS_VER(engine->i915) >= 9) 560 return gen9_xcs_offsets; 561 else 562 return gen8_xcs_offsets; 563 } 564 } 565 566 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 567 { 568 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 569 return 0x70; 570 else if (GRAPHICS_VER(engine->i915) >= 12) 571 return 0x60; 572 else if (GRAPHICS_VER(engine->i915) >= 9) 573 return 0x54; 574 else if (engine->class == RENDER_CLASS) 575 return 0x58; 576 else 577 return -1; 578 } 579 580 static int lrc_ring_gpr0(const struct intel_engine_cs *engine) 581 { 582 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 583 return 0x84; 584 else if (GRAPHICS_VER(engine->i915) >= 12) 585 return 0x74; 586 else if (GRAPHICS_VER(engine->i915) >= 9) 587 return 0x68; 588 else if (engine->class == RENDER_CLASS) 589 return 0xd8; 590 else 591 return -1; 592 } 593 594 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 595 { 596 if (GRAPHICS_VER(engine->i915) >= 12) 597 return 0x12; 598 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS) 599 return 0x18; 600 else 601 return -1; 602 } 603 604 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 605 { 606 int x; 607 608 x = lrc_ring_wa_bb_per_ctx(engine); 609 if (x < 0) 610 return x; 611 612 return x + 2; 613 } 614 615 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 616 { 617 int x; 618 619 x = lrc_ring_indirect_ptr(engine); 620 if (x < 0) 621 return x; 622 623 return x + 2; 624 } 625 626 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 627 { 628 629 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 630 /* 631 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL 632 * simply to match the RCS context image layout. 633 */ 634 return 0xc6; 635 else if (engine->class != RENDER_CLASS) 636 return -1; 637 else if (GRAPHICS_VER(engine->i915) >= 12) 638 return 0xb6; 639 else if (GRAPHICS_VER(engine->i915) >= 11) 640 return 0xaa; 641 else 642 return -1; 643 } 644 645 static u32 646 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 647 { 648 switch (GRAPHICS_VER(engine->i915)) { 649 default: 650 MISSING_CASE(GRAPHICS_VER(engine->i915)); 651 fallthrough; 652 case 12: 653 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 654 case 11: 655 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 656 case 9: 657 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 658 case 8: 659 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 660 } 661 } 662 663 static void 664 lrc_setup_indirect_ctx(u32 *regs, 665 const struct intel_engine_cs *engine, 666 u32 ctx_bb_ggtt_addr, 667 u32 size) 668 { 669 GEM_BUG_ON(!size); 670 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 671 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 672 regs[lrc_ring_indirect_ptr(engine) + 1] = 673 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 674 675 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 676 regs[lrc_ring_indirect_offset(engine) + 1] = 677 lrc_ring_indirect_offset_default(engine) << 6; 678 } 679 680 static void init_common_regs(u32 * const regs, 681 const struct intel_context *ce, 682 const struct intel_engine_cs *engine, 683 bool inhibit) 684 { 685 u32 ctl; 686 687 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 688 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 689 if (inhibit) 690 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 691 if (GRAPHICS_VER(engine->i915) < 11) 692 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 693 CTX_CTRL_RS_CTX_ENABLE); 694 regs[CTX_CONTEXT_CONTROL] = ctl; 695 696 regs[CTX_TIMESTAMP] = ce->runtime.last; 697 } 698 699 static void init_wa_bb_regs(u32 * const regs, 700 const struct intel_engine_cs *engine) 701 { 702 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 703 704 if (wa_ctx->per_ctx.size) { 705 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 706 707 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 708 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 709 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 710 } 711 712 if (wa_ctx->indirect_ctx.size) { 713 lrc_setup_indirect_ctx(regs, engine, 714 i915_ggtt_offset(wa_ctx->vma) + 715 wa_ctx->indirect_ctx.offset, 716 wa_ctx->indirect_ctx.size); 717 } 718 } 719 720 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) 721 { 722 if (i915_vm_is_4lvl(&ppgtt->vm)) { 723 /* 64b PPGTT (48bit canonical) 724 * PDP0_DESCRIPTOR contains the base address to PML4 and 725 * other PDP Descriptors are ignored. 726 */ 727 ASSIGN_CTX_PML4(ppgtt, regs); 728 } else { 729 ASSIGN_CTX_PDP(ppgtt, regs, 3); 730 ASSIGN_CTX_PDP(ppgtt, regs, 2); 731 ASSIGN_CTX_PDP(ppgtt, regs, 1); 732 ASSIGN_CTX_PDP(ppgtt, regs, 0); 733 } 734 } 735 736 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 737 { 738 if (i915_is_ggtt(vm)) 739 return i915_vm_to_ggtt(vm)->alias; 740 else 741 return i915_vm_to_ppgtt(vm); 742 } 743 744 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 745 { 746 int x; 747 748 x = lrc_ring_mi_mode(engine); 749 if (x != -1) { 750 regs[x + 1] &= ~STOP_RING; 751 regs[x + 1] |= STOP_RING << 16; 752 } 753 } 754 755 static void __lrc_init_regs(u32 *regs, 756 const struct intel_context *ce, 757 const struct intel_engine_cs *engine, 758 bool inhibit) 759 { 760 /* 761 * A context is actually a big batch buffer with several 762 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 763 * values we are setting here are only for the first context restore: 764 * on a subsequent save, the GPU will recreate this batchbuffer with new 765 * values (including all the missing MI_LOAD_REGISTER_IMM commands that 766 * we are not initializing here). 767 * 768 * Must keep consistent with virtual_update_register_offsets(). 769 */ 770 771 if (inhibit) 772 memset(regs, 0, PAGE_SIZE); 773 774 set_offsets(regs, reg_offsets(engine), engine, inhibit); 775 776 init_common_regs(regs, ce, engine, inhibit); 777 init_ppgtt_regs(regs, vm_alias(ce->vm)); 778 779 init_wa_bb_regs(regs, engine); 780 781 __reset_stop_ring(regs, engine); 782 } 783 784 void lrc_init_regs(const struct intel_context *ce, 785 const struct intel_engine_cs *engine, 786 bool inhibit) 787 { 788 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); 789 } 790 791 void lrc_reset_regs(const struct intel_context *ce, 792 const struct intel_engine_cs *engine) 793 { 794 __reset_stop_ring(ce->lrc_reg_state, engine); 795 } 796 797 static void 798 set_redzone(void *vaddr, const struct intel_engine_cs *engine) 799 { 800 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 801 return; 802 803 vaddr += engine->context_size; 804 805 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 806 } 807 808 static void 809 check_redzone(const void *vaddr, const struct intel_engine_cs *engine) 810 { 811 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 812 return; 813 814 vaddr += engine->context_size; 815 816 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 817 drm_err_once(&engine->i915->drm, 818 "%s context redzone overwritten!\n", 819 engine->name); 820 } 821 822 void lrc_init_state(struct intel_context *ce, 823 struct intel_engine_cs *engine, 824 void *state) 825 { 826 bool inhibit = true; 827 828 set_redzone(state, engine); 829 830 if (engine->default_state) { 831 shmem_read(engine->default_state, 0, 832 state, engine->context_size); 833 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 834 inhibit = false; 835 } 836 837 /* Clear the ppHWSP (inc. per-context counters) */ 838 memset(state, 0, PAGE_SIZE); 839 840 /* 841 * The second page of the context object contains some registers which 842 * must be set up prior to the first execution. 843 */ 844 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); 845 } 846 847 static struct i915_vma * 848 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) 849 { 850 struct drm_i915_gem_object *obj; 851 struct i915_vma *vma; 852 u32 context_size; 853 854 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 855 856 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 857 context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 858 859 if (GRAPHICS_VER(engine->i915) == 12) { 860 ce->wa_bb_page = context_size / PAGE_SIZE; 861 context_size += PAGE_SIZE; 862 } 863 864 obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); 865 if (IS_ERR(obj)) 866 obj = i915_gem_object_create_shmem(engine->i915, context_size); 867 if (IS_ERR(obj)) 868 return ERR_CAST(obj); 869 870 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 871 if (IS_ERR(vma)) { 872 i915_gem_object_put(obj); 873 return vma; 874 } 875 876 return vma; 877 } 878 879 static struct intel_timeline * 880 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) 881 { 882 struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 883 884 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); 885 } 886 887 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) 888 { 889 struct intel_ring *ring; 890 struct i915_vma *vma; 891 int err; 892 893 GEM_BUG_ON(ce->state); 894 895 vma = __lrc_alloc_state(ce, engine); 896 if (IS_ERR(vma)) 897 return PTR_ERR(vma); 898 899 ring = intel_engine_create_ring(engine, ce->ring_size); 900 if (IS_ERR(ring)) { 901 err = PTR_ERR(ring); 902 goto err_vma; 903 } 904 905 if (!page_mask_bits(ce->timeline)) { 906 struct intel_timeline *tl; 907 908 /* 909 * Use the static global HWSP for the kernel context, and 910 * a dynamically allocated cacheline for everyone else. 911 */ 912 if (unlikely(ce->timeline)) 913 tl = pinned_timeline(ce, engine); 914 else 915 tl = intel_timeline_create(engine->gt); 916 if (IS_ERR(tl)) { 917 err = PTR_ERR(tl); 918 goto err_ring; 919 } 920 921 ce->timeline = tl; 922 } 923 924 ce->ring = ring; 925 ce->state = vma; 926 927 return 0; 928 929 err_ring: 930 intel_ring_put(ring); 931 err_vma: 932 i915_vma_put(vma); 933 return err; 934 } 935 936 void lrc_reset(struct intel_context *ce) 937 { 938 GEM_BUG_ON(!intel_context_is_pinned(ce)); 939 940 intel_ring_reset(ce->ring, ce->ring->emit); 941 942 /* Scrub away the garbage */ 943 lrc_init_regs(ce, ce->engine, true); 944 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); 945 } 946 947 int 948 lrc_pre_pin(struct intel_context *ce, 949 struct intel_engine_cs *engine, 950 struct i915_gem_ww_ctx *ww, 951 void **vaddr) 952 { 953 GEM_BUG_ON(!ce->state); 954 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 955 956 *vaddr = i915_gem_object_pin_map(ce->state->obj, 957 i915_coherent_map_type(ce->engine->i915, 958 ce->state->obj, 959 false) | 960 I915_MAP_OVERRIDE); 961 962 return PTR_ERR_OR_ZERO(*vaddr); 963 } 964 965 int 966 lrc_pin(struct intel_context *ce, 967 struct intel_engine_cs *engine, 968 void *vaddr) 969 { 970 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 971 972 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) 973 lrc_init_state(ce, engine, vaddr); 974 975 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); 976 return 0; 977 } 978 979 void lrc_unpin(struct intel_context *ce) 980 { 981 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 982 ce->engine); 983 } 984 985 void lrc_post_unpin(struct intel_context *ce) 986 { 987 i915_gem_object_unpin_map(ce->state->obj); 988 } 989 990 void lrc_fini(struct intel_context *ce) 991 { 992 if (!ce->state) 993 return; 994 995 intel_ring_put(fetch_and_zero(&ce->ring)); 996 i915_vma_put(fetch_and_zero(&ce->state)); 997 } 998 999 void lrc_destroy(struct kref *kref) 1000 { 1001 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 1002 1003 GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 1004 GEM_BUG_ON(intel_context_is_pinned(ce)); 1005 1006 lrc_fini(ce); 1007 1008 intel_context_fini(ce); 1009 intel_context_free(ce); 1010 } 1011 1012 static u32 * 1013 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 1014 { 1015 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1016 MI_SRM_LRM_GLOBAL_GTT | 1017 MI_LRI_LRM_CS_MMIO; 1018 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1019 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1020 CTX_TIMESTAMP * sizeof(u32); 1021 *cs++ = 0; 1022 1023 *cs++ = MI_LOAD_REGISTER_REG | 1024 MI_LRR_SOURCE_CS_MMIO | 1025 MI_LRI_LRM_CS_MMIO; 1026 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1027 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1028 1029 *cs++ = MI_LOAD_REGISTER_REG | 1030 MI_LRR_SOURCE_CS_MMIO | 1031 MI_LRI_LRM_CS_MMIO; 1032 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1033 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1034 1035 return cs; 1036 } 1037 1038 static u32 * 1039 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 1040 { 1041 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 1042 1043 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1044 MI_SRM_LRM_GLOBAL_GTT | 1045 MI_LRI_LRM_CS_MMIO; 1046 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1047 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1048 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 1049 *cs++ = 0; 1050 1051 return cs; 1052 } 1053 1054 static u32 * 1055 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 1056 { 1057 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 1058 1059 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1060 MI_SRM_LRM_GLOBAL_GTT | 1061 MI_LRI_LRM_CS_MMIO; 1062 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1063 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1064 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 1065 *cs++ = 0; 1066 1067 *cs++ = MI_LOAD_REGISTER_REG | 1068 MI_LRR_SOURCE_CS_MMIO | 1069 MI_LRI_LRM_CS_MMIO; 1070 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1071 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 1072 1073 return cs; 1074 } 1075 1076 static u32 * 1077 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 1078 { 1079 cs = gen12_emit_timestamp_wa(ce, cs); 1080 cs = gen12_emit_cmd_buf_wa(ce, cs); 1081 cs = gen12_emit_restore_scratch(ce, cs); 1082 1083 return cs; 1084 } 1085 1086 static u32 * 1087 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 1088 { 1089 cs = gen12_emit_timestamp_wa(ce, cs); 1090 cs = gen12_emit_restore_scratch(ce, cs); 1091 1092 return cs; 1093 } 1094 1095 static u32 context_wa_bb_offset(const struct intel_context *ce) 1096 { 1097 return PAGE_SIZE * ce->wa_bb_page; 1098 } 1099 1100 static u32 *context_indirect_bb(const struct intel_context *ce) 1101 { 1102 void *ptr; 1103 1104 GEM_BUG_ON(!ce->wa_bb_page); 1105 1106 ptr = ce->lrc_reg_state; 1107 ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 1108 ptr += context_wa_bb_offset(ce); 1109 1110 return ptr; 1111 } 1112 1113 static void 1114 setup_indirect_ctx_bb(const struct intel_context *ce, 1115 const struct intel_engine_cs *engine, 1116 u32 *(*emit)(const struct intel_context *, u32 *)) 1117 { 1118 u32 * const start = context_indirect_bb(ce); 1119 u32 *cs; 1120 1121 cs = emit(ce, start); 1122 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 1123 while ((unsigned long)cs % CACHELINE_BYTES) 1124 *cs++ = MI_NOOP; 1125 1126 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, 1127 i915_ggtt_offset(ce->state) + 1128 context_wa_bb_offset(ce), 1129 (cs - start) * sizeof(*cs)); 1130 } 1131 1132 /* 1133 * The context descriptor encodes various attributes of a context, 1134 * including its GTT address and some flags. Because it's fairly 1135 * expensive to calculate, we'll just do it once and cache the result, 1136 * which remains valid until the context is unpinned. 1137 * 1138 * This is what a descriptor looks like, from LSB to MSB:: 1139 * 1140 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 1141 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 1142 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 1143 * bits 53-54: mbz, reserved for use by hardware 1144 * bits 55-63: group ID, currently unused and set to 0 1145 * 1146 * Starting from Gen11, the upper dword of the descriptor has a new format: 1147 * 1148 * bits 32-36: reserved 1149 * bits 37-47: SW context ID 1150 * bits 48:53: engine instance 1151 * bit 54: mbz, reserved for use by hardware 1152 * bits 55-60: SW counter 1153 * bits 61-63: engine class 1154 * 1155 * On Xe_HP, the upper dword of the descriptor has a new format: 1156 * 1157 * bits 32-37: virtual function number 1158 * bit 38: mbz, reserved for use by hardware 1159 * bits 39-54: SW context ID 1160 * bits 55-57: reserved 1161 * bits 58-63: SW counter 1162 * 1163 * engine info, SW context ID and SW counter need to form a unique number 1164 * (Context ID) per lrc. 1165 */ 1166 static u32 lrc_descriptor(const struct intel_context *ce) 1167 { 1168 u32 desc; 1169 1170 desc = INTEL_LEGACY_32B_CONTEXT; 1171 if (i915_vm_is_4lvl(ce->vm)) 1172 desc = INTEL_LEGACY_64B_CONTEXT; 1173 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 1174 1175 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 1176 if (GRAPHICS_VER(ce->vm->i915) == 8) 1177 desc |= GEN8_CTX_L3LLC_COHERENT; 1178 1179 return i915_ggtt_offset(ce->state) | desc; 1180 } 1181 1182 u32 lrc_update_regs(const struct intel_context *ce, 1183 const struct intel_engine_cs *engine, 1184 u32 head) 1185 { 1186 struct intel_ring *ring = ce->ring; 1187 u32 *regs = ce->lrc_reg_state; 1188 1189 GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 1190 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 1191 1192 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1193 regs[CTX_RING_HEAD] = head; 1194 regs[CTX_RING_TAIL] = ring->tail; 1195 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1196 1197 /* RPCS */ 1198 if (engine->class == RENDER_CLASS) { 1199 regs[CTX_R_PWR_CLK_STATE] = 1200 intel_sseu_make_rpcs(engine->gt, &ce->sseu); 1201 1202 i915_oa_init_reg_state(ce, engine); 1203 } 1204 1205 if (ce->wa_bb_page) { 1206 u32 *(*fn)(const struct intel_context *ce, u32 *cs); 1207 1208 fn = gen12_emit_indirect_ctx_xcs; 1209 if (ce->engine->class == RENDER_CLASS) 1210 fn = gen12_emit_indirect_ctx_rcs; 1211 1212 /* Mutually exclusive wrt to global indirect bb */ 1213 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 1214 setup_indirect_ctx_bb(ce, engine, fn); 1215 } 1216 1217 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; 1218 } 1219 1220 void lrc_update_offsets(struct intel_context *ce, 1221 struct intel_engine_cs *engine) 1222 { 1223 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); 1224 } 1225 1226 void lrc_check_regs(const struct intel_context *ce, 1227 const struct intel_engine_cs *engine, 1228 const char *when) 1229 { 1230 const struct intel_ring *ring = ce->ring; 1231 u32 *regs = ce->lrc_reg_state; 1232 bool valid = true; 1233 int x; 1234 1235 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 1236 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 1237 engine->name, 1238 regs[CTX_RING_START], 1239 i915_ggtt_offset(ring->vma)); 1240 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1241 valid = false; 1242 } 1243 1244 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 1245 (RING_CTL_SIZE(ring->size) | RING_VALID)) { 1246 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 1247 engine->name, 1248 regs[CTX_RING_CTL], 1249 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 1250 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1251 valid = false; 1252 } 1253 1254 x = lrc_ring_mi_mode(engine); 1255 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 1256 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 1257 engine->name, regs[x + 1]); 1258 regs[x + 1] &= ~STOP_RING; 1259 regs[x + 1] |= STOP_RING << 16; 1260 valid = false; 1261 } 1262 1263 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); 1264 } 1265 1266 /* 1267 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 1268 * PIPE_CONTROL instruction. This is required for the flush to happen correctly 1269 * but there is a slight complication as this is applied in WA batch where the 1270 * values are only initialized once so we cannot take register value at the 1271 * beginning and reuse it further; hence we save its value to memory, upload a 1272 * constant value with bit21 set and then we restore it back with the saved value. 1273 * To simplify the WA, a constant value is formed by using the default value 1274 * of this register. This shouldn't be a problem because we are only modifying 1275 * it for a short period and this batch in non-premptible. We can ofcourse 1276 * use additional instructions that read the actual value of the register 1277 * at that time and set our bit of interest but it makes the WA complicated. 1278 * 1279 * This WA is also required for Gen9 so extracting as a function avoids 1280 * code duplication. 1281 */ 1282 static u32 * 1283 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1284 { 1285 /* NB no one else is allowed to scribble over scratch + 256! */ 1286 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1287 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1288 *batch++ = intel_gt_scratch_offset(engine->gt, 1289 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1290 *batch++ = 0; 1291 1292 *batch++ = MI_LOAD_REGISTER_IMM(1); 1293 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1294 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 1295 1296 batch = gen8_emit_pipe_control(batch, 1297 PIPE_CONTROL_CS_STALL | 1298 PIPE_CONTROL_DC_FLUSH_ENABLE, 1299 0); 1300 1301 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1302 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1303 *batch++ = intel_gt_scratch_offset(engine->gt, 1304 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1305 *batch++ = 0; 1306 1307 return batch; 1308 } 1309 1310 /* 1311 * Typically we only have one indirect_ctx and per_ctx batch buffer which are 1312 * initialized at the beginning and shared across all contexts but this field 1313 * helps us to have multiple batches at different offsets and select them based 1314 * on a criteria. At the moment this batch always start at the beginning of the page 1315 * and at this point we don't have multiple wa_ctx batch buffers. 1316 * 1317 * The number of WA applied are not known at the beginning; we use this field 1318 * to return the no of DWORDS written. 1319 * 1320 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 1321 * so it adds NOOPs as padding to make it cacheline aligned. 1322 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 1323 * makes a complete batch buffer. 1324 */ 1325 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1326 { 1327 /* WaDisableCtxRestoreArbitration:bdw,chv */ 1328 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1329 1330 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 1331 if (IS_BROADWELL(engine->i915)) 1332 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1333 1334 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 1335 /* Actual scratch location is at 128 bytes offset */ 1336 batch = gen8_emit_pipe_control(batch, 1337 PIPE_CONTROL_FLUSH_L3 | 1338 PIPE_CONTROL_STORE_DATA_INDEX | 1339 PIPE_CONTROL_CS_STALL | 1340 PIPE_CONTROL_QW_WRITE, 1341 LRC_PPHWSP_SCRATCH_ADDR); 1342 1343 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1344 1345 /* Pad to end of cacheline */ 1346 while ((unsigned long)batch % CACHELINE_BYTES) 1347 *batch++ = MI_NOOP; 1348 1349 /* 1350 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 1351 * execution depends on the length specified in terms of cache lines 1352 * in the register CTX_RCS_INDIRECT_CTX 1353 */ 1354 1355 return batch; 1356 } 1357 1358 struct lri { 1359 i915_reg_t reg; 1360 u32 value; 1361 }; 1362 1363 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 1364 { 1365 GEM_BUG_ON(!count || count > 63); 1366 1367 *batch++ = MI_LOAD_REGISTER_IMM(count); 1368 do { 1369 *batch++ = i915_mmio_reg_offset(lri->reg); 1370 *batch++ = lri->value; 1371 } while (lri++, --count); 1372 *batch++ = MI_NOOP; 1373 1374 return batch; 1375 } 1376 1377 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1378 { 1379 static const struct lri lri[] = { 1380 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 1381 { 1382 COMMON_SLICE_CHICKEN2, 1383 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 1384 0), 1385 }, 1386 1387 /* BSpec: 11391 */ 1388 { 1389 FF_SLICE_CHICKEN, 1390 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 1391 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 1392 }, 1393 1394 /* BSpec: 11299 */ 1395 { 1396 _3D_CHICKEN3, 1397 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 1398 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 1399 } 1400 }; 1401 1402 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1403 1404 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1405 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1406 1407 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 1408 batch = gen8_emit_pipe_control(batch, 1409 PIPE_CONTROL_FLUSH_L3 | 1410 PIPE_CONTROL_STORE_DATA_INDEX | 1411 PIPE_CONTROL_CS_STALL | 1412 PIPE_CONTROL_QW_WRITE, 1413 LRC_PPHWSP_SCRATCH_ADDR); 1414 1415 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 1416 1417 /* WaMediaPoolStateCmdInWABB:bxt,glk */ 1418 if (HAS_POOLED_EU(engine->i915)) { 1419 /* 1420 * EU pool configuration is setup along with golden context 1421 * during context initialization. This value depends on 1422 * device type (2x6 or 3x6) and needs to be updated based 1423 * on which subslice is disabled especially for 2x6 1424 * devices, however it is safe to load default 1425 * configuration of 3x6 device instead of masking off 1426 * corresponding bits because HW ignores bits of a disabled 1427 * subslice and drops down to appropriate config. Please 1428 * see render_state_setup() in i915_gem_render_state.c for 1429 * possible configurations, to avoid duplication they are 1430 * not shown here again. 1431 */ 1432 *batch++ = GEN9_MEDIA_POOL_STATE; 1433 *batch++ = GEN9_MEDIA_POOL_ENABLE; 1434 *batch++ = 0x00777000; 1435 *batch++ = 0; 1436 *batch++ = 0; 1437 *batch++ = 0; 1438 } 1439 1440 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1441 1442 /* Pad to end of cacheline */ 1443 while ((unsigned long)batch % CACHELINE_BYTES) 1444 *batch++ = MI_NOOP; 1445 1446 return batch; 1447 } 1448 1449 #define CTX_WA_BB_SIZE (PAGE_SIZE) 1450 1451 static int lrc_create_wa_ctx(struct intel_engine_cs *engine) 1452 { 1453 struct drm_i915_gem_object *obj; 1454 struct i915_vma *vma; 1455 int err; 1456 1457 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); 1458 if (IS_ERR(obj)) 1459 return PTR_ERR(obj); 1460 1461 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 1462 if (IS_ERR(vma)) { 1463 err = PTR_ERR(vma); 1464 goto err; 1465 } 1466 1467 engine->wa_ctx.vma = vma; 1468 return 0; 1469 1470 err: 1471 i915_gem_object_put(obj); 1472 return err; 1473 } 1474 1475 void lrc_fini_wa_ctx(struct intel_engine_cs *engine) 1476 { 1477 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1478 } 1479 1480 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 1481 1482 void lrc_init_wa_ctx(struct intel_engine_cs *engine) 1483 { 1484 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1485 struct i915_wa_ctx_bb *wa_bb[] = { 1486 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx 1487 }; 1488 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; 1489 struct i915_gem_ww_ctx ww; 1490 void *batch, *batch_ptr; 1491 unsigned int i; 1492 int err; 1493 1494 if (engine->class != RENDER_CLASS) 1495 return; 1496 1497 switch (GRAPHICS_VER(engine->i915)) { 1498 case 12: 1499 case 11: 1500 return; 1501 case 9: 1502 wa_bb_fn[0] = gen9_init_indirectctx_bb; 1503 wa_bb_fn[1] = NULL; 1504 break; 1505 case 8: 1506 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1507 wa_bb_fn[1] = NULL; 1508 break; 1509 default: 1510 MISSING_CASE(GRAPHICS_VER(engine->i915)); 1511 return; 1512 } 1513 1514 err = lrc_create_wa_ctx(engine); 1515 if (err) { 1516 /* 1517 * We continue even if we fail to initialize WA batch 1518 * because we only expect rare glitches but nothing 1519 * critical to prevent us from using GPU 1520 */ 1521 drm_err(&engine->i915->drm, 1522 "Ignoring context switch w/a allocation error:%d\n", 1523 err); 1524 return; 1525 } 1526 1527 if (!engine->wa_ctx.vma) 1528 return; 1529 1530 i915_gem_ww_ctx_init(&ww, true); 1531 retry: 1532 err = i915_gem_object_lock(wa_ctx->vma->obj, &ww); 1533 if (!err) 1534 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH); 1535 if (err) 1536 goto err; 1537 1538 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 1539 if (IS_ERR(batch)) { 1540 err = PTR_ERR(batch); 1541 goto err_unpin; 1542 } 1543 1544 /* 1545 * Emit the two workaround batch buffers, recording the offset from the 1546 * start of the workaround batch buffer object for each and their 1547 * respective sizes. 1548 */ 1549 batch_ptr = batch; 1550 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 1551 wa_bb[i]->offset = batch_ptr - batch; 1552 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 1553 CACHELINE_BYTES))) { 1554 err = -EINVAL; 1555 break; 1556 } 1557 if (wa_bb_fn[i]) 1558 batch_ptr = wa_bb_fn[i](engine, batch_ptr); 1559 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 1560 } 1561 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); 1562 1563 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 1564 __i915_gem_object_release_map(wa_ctx->vma->obj); 1565 1566 /* Verify that we can handle failure to setup the wa_ctx */ 1567 if (!err) 1568 err = i915_inject_probe_error(engine->i915, -ENODEV); 1569 1570 err_unpin: 1571 if (err) 1572 i915_vma_unpin(wa_ctx->vma); 1573 err: 1574 if (err == -EDEADLK) { 1575 err = i915_gem_ww_ctx_backoff(&ww); 1576 if (!err) 1577 goto retry; 1578 } 1579 i915_gem_ww_ctx_fini(&ww); 1580 1581 if (err) { 1582 i915_vma_put(engine->wa_ctx.vma); 1583 1584 /* Clear all flags to prevent further use */ 1585 memset(wa_ctx, 0, sizeof(*wa_ctx)); 1586 } 1587 } 1588 1589 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 1590 { 1591 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1592 ce->runtime.num_underflow++; 1593 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 1594 #endif 1595 } 1596 1597 void lrc_update_runtime(struct intel_context *ce) 1598 { 1599 u32 old; 1600 s32 dt; 1601 1602 if (intel_context_is_barrier(ce)) 1603 return; 1604 1605 old = ce->runtime.last; 1606 ce->runtime.last = lrc_get_runtime(ce); 1607 dt = ce->runtime.last - old; 1608 1609 if (unlikely(dt < 0)) { 1610 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 1611 old, ce->runtime.last, dt); 1612 st_update_runtime_underflow(ce, dt); 1613 return; 1614 } 1615 1616 ewma_runtime_add(&ce->runtime.avg, dt); 1617 ce->runtime.total += dt; 1618 } 1619 1620 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1621 #include "selftest_lrc.c" 1622 #endif 1623