1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gen8_engine_cs.h" 7 #include "i915_drv.h" 8 #include "i915_perf.h" 9 #include "intel_engine.h" 10 #include "intel_gpu_commands.h" 11 #include "intel_gt.h" 12 #include "intel_lrc.h" 13 #include "intel_lrc_reg.h" 14 #include "intel_ring.h" 15 #include "shmem_utils.h" 16 17 static void set_offsets(u32 *regs, 18 const u8 *data, 19 const struct intel_engine_cs *engine, 20 bool close) 21 #define NOP(x) (BIT(7) | (x)) 22 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 23 #define POSTED BIT(0) 24 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 25 #define REG16(x) \ 26 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 27 (((x) >> 2) & 0x7f) 28 #define END 0 29 { 30 const u32 base = engine->mmio_base; 31 32 while (*data) { 33 u8 count, flags; 34 35 if (*data & BIT(7)) { /* skip */ 36 count = *data++ & ~BIT(7); 37 regs += count; 38 continue; 39 } 40 41 count = *data & 0x3f; 42 flags = *data >> 6; 43 data++; 44 45 *regs = MI_LOAD_REGISTER_IMM(count); 46 if (flags & POSTED) 47 *regs |= MI_LRI_FORCE_POSTED; 48 if (INTEL_GEN(engine->i915) >= 11) 49 *regs |= MI_LRI_LRM_CS_MMIO; 50 regs++; 51 52 GEM_BUG_ON(!count); 53 do { 54 u32 offset = 0; 55 u8 v; 56 57 do { 58 v = *data++; 59 offset <<= 7; 60 offset |= v & ~BIT(7); 61 } while (v & BIT(7)); 62 63 regs[0] = base + (offset << 2); 64 regs += 2; 65 } while (--count); 66 } 67 68 if (close) { 69 /* Close the batch; used mainly by live_lrc_layout() */ 70 *regs = MI_BATCH_BUFFER_END; 71 if (INTEL_GEN(engine->i915) >= 10) 72 *regs |= BIT(0); 73 } 74 } 75 76 static const u8 gen8_xcs_offsets[] = { 77 NOP(1), 78 LRI(11, 0), 79 REG16(0x244), 80 REG(0x034), 81 REG(0x030), 82 REG(0x038), 83 REG(0x03c), 84 REG(0x168), 85 REG(0x140), 86 REG(0x110), 87 REG(0x11c), 88 REG(0x114), 89 REG(0x118), 90 91 NOP(9), 92 LRI(9, 0), 93 REG16(0x3a8), 94 REG16(0x28c), 95 REG16(0x288), 96 REG16(0x284), 97 REG16(0x280), 98 REG16(0x27c), 99 REG16(0x278), 100 REG16(0x274), 101 REG16(0x270), 102 103 NOP(13), 104 LRI(2, 0), 105 REG16(0x200), 106 REG(0x028), 107 108 END 109 }; 110 111 static const u8 gen9_xcs_offsets[] = { 112 NOP(1), 113 LRI(14, POSTED), 114 REG16(0x244), 115 REG(0x034), 116 REG(0x030), 117 REG(0x038), 118 REG(0x03c), 119 REG(0x168), 120 REG(0x140), 121 REG(0x110), 122 REG(0x11c), 123 REG(0x114), 124 REG(0x118), 125 REG(0x1c0), 126 REG(0x1c4), 127 REG(0x1c8), 128 129 NOP(3), 130 LRI(9, POSTED), 131 REG16(0x3a8), 132 REG16(0x28c), 133 REG16(0x288), 134 REG16(0x284), 135 REG16(0x280), 136 REG16(0x27c), 137 REG16(0x278), 138 REG16(0x274), 139 REG16(0x270), 140 141 NOP(13), 142 LRI(1, POSTED), 143 REG16(0x200), 144 145 NOP(13), 146 LRI(44, POSTED), 147 REG(0x028), 148 REG(0x09c), 149 REG(0x0c0), 150 REG(0x178), 151 REG(0x17c), 152 REG16(0x358), 153 REG(0x170), 154 REG(0x150), 155 REG(0x154), 156 REG(0x158), 157 REG16(0x41c), 158 REG16(0x600), 159 REG16(0x604), 160 REG16(0x608), 161 REG16(0x60c), 162 REG16(0x610), 163 REG16(0x614), 164 REG16(0x618), 165 REG16(0x61c), 166 REG16(0x620), 167 REG16(0x624), 168 REG16(0x628), 169 REG16(0x62c), 170 REG16(0x630), 171 REG16(0x634), 172 REG16(0x638), 173 REG16(0x63c), 174 REG16(0x640), 175 REG16(0x644), 176 REG16(0x648), 177 REG16(0x64c), 178 REG16(0x650), 179 REG16(0x654), 180 REG16(0x658), 181 REG16(0x65c), 182 REG16(0x660), 183 REG16(0x664), 184 REG16(0x668), 185 REG16(0x66c), 186 REG16(0x670), 187 REG16(0x674), 188 REG16(0x678), 189 REG16(0x67c), 190 REG(0x068), 191 192 END 193 }; 194 195 static const u8 gen12_xcs_offsets[] = { 196 NOP(1), 197 LRI(13, POSTED), 198 REG16(0x244), 199 REG(0x034), 200 REG(0x030), 201 REG(0x038), 202 REG(0x03c), 203 REG(0x168), 204 REG(0x140), 205 REG(0x110), 206 REG(0x1c0), 207 REG(0x1c4), 208 REG(0x1c8), 209 REG(0x180), 210 REG16(0x2b4), 211 212 NOP(5), 213 LRI(9, POSTED), 214 REG16(0x3a8), 215 REG16(0x28c), 216 REG16(0x288), 217 REG16(0x284), 218 REG16(0x280), 219 REG16(0x27c), 220 REG16(0x278), 221 REG16(0x274), 222 REG16(0x270), 223 224 END 225 }; 226 227 static const u8 gen8_rcs_offsets[] = { 228 NOP(1), 229 LRI(14, POSTED), 230 REG16(0x244), 231 REG(0x034), 232 REG(0x030), 233 REG(0x038), 234 REG(0x03c), 235 REG(0x168), 236 REG(0x140), 237 REG(0x110), 238 REG(0x11c), 239 REG(0x114), 240 REG(0x118), 241 REG(0x1c0), 242 REG(0x1c4), 243 REG(0x1c8), 244 245 NOP(3), 246 LRI(9, POSTED), 247 REG16(0x3a8), 248 REG16(0x28c), 249 REG16(0x288), 250 REG16(0x284), 251 REG16(0x280), 252 REG16(0x27c), 253 REG16(0x278), 254 REG16(0x274), 255 REG16(0x270), 256 257 NOP(13), 258 LRI(1, 0), 259 REG(0x0c8), 260 261 END 262 }; 263 264 static const u8 gen9_rcs_offsets[] = { 265 NOP(1), 266 LRI(14, POSTED), 267 REG16(0x244), 268 REG(0x34), 269 REG(0x30), 270 REG(0x38), 271 REG(0x3c), 272 REG(0x168), 273 REG(0x140), 274 REG(0x110), 275 REG(0x11c), 276 REG(0x114), 277 REG(0x118), 278 REG(0x1c0), 279 REG(0x1c4), 280 REG(0x1c8), 281 282 NOP(3), 283 LRI(9, POSTED), 284 REG16(0x3a8), 285 REG16(0x28c), 286 REG16(0x288), 287 REG16(0x284), 288 REG16(0x280), 289 REG16(0x27c), 290 REG16(0x278), 291 REG16(0x274), 292 REG16(0x270), 293 294 NOP(13), 295 LRI(1, 0), 296 REG(0xc8), 297 298 NOP(13), 299 LRI(44, POSTED), 300 REG(0x28), 301 REG(0x9c), 302 REG(0xc0), 303 REG(0x178), 304 REG(0x17c), 305 REG16(0x358), 306 REG(0x170), 307 REG(0x150), 308 REG(0x154), 309 REG(0x158), 310 REG16(0x41c), 311 REG16(0x600), 312 REG16(0x604), 313 REG16(0x608), 314 REG16(0x60c), 315 REG16(0x610), 316 REG16(0x614), 317 REG16(0x618), 318 REG16(0x61c), 319 REG16(0x620), 320 REG16(0x624), 321 REG16(0x628), 322 REG16(0x62c), 323 REG16(0x630), 324 REG16(0x634), 325 REG16(0x638), 326 REG16(0x63c), 327 REG16(0x640), 328 REG16(0x644), 329 REG16(0x648), 330 REG16(0x64c), 331 REG16(0x650), 332 REG16(0x654), 333 REG16(0x658), 334 REG16(0x65c), 335 REG16(0x660), 336 REG16(0x664), 337 REG16(0x668), 338 REG16(0x66c), 339 REG16(0x670), 340 REG16(0x674), 341 REG16(0x678), 342 REG16(0x67c), 343 REG(0x68), 344 345 END 346 }; 347 348 static const u8 gen11_rcs_offsets[] = { 349 NOP(1), 350 LRI(15, POSTED), 351 REG16(0x244), 352 REG(0x034), 353 REG(0x030), 354 REG(0x038), 355 REG(0x03c), 356 REG(0x168), 357 REG(0x140), 358 REG(0x110), 359 REG(0x11c), 360 REG(0x114), 361 REG(0x118), 362 REG(0x1c0), 363 REG(0x1c4), 364 REG(0x1c8), 365 REG(0x180), 366 367 NOP(1), 368 LRI(9, POSTED), 369 REG16(0x3a8), 370 REG16(0x28c), 371 REG16(0x288), 372 REG16(0x284), 373 REG16(0x280), 374 REG16(0x27c), 375 REG16(0x278), 376 REG16(0x274), 377 REG16(0x270), 378 379 LRI(1, POSTED), 380 REG(0x1b0), 381 382 NOP(10), 383 LRI(1, 0), 384 REG(0x0c8), 385 386 END 387 }; 388 389 static const u8 gen12_rcs_offsets[] = { 390 NOP(1), 391 LRI(13, POSTED), 392 REG16(0x244), 393 REG(0x034), 394 REG(0x030), 395 REG(0x038), 396 REG(0x03c), 397 REG(0x168), 398 REG(0x140), 399 REG(0x110), 400 REG(0x1c0), 401 REG(0x1c4), 402 REG(0x1c8), 403 REG(0x180), 404 REG16(0x2b4), 405 406 NOP(5), 407 LRI(9, POSTED), 408 REG16(0x3a8), 409 REG16(0x28c), 410 REG16(0x288), 411 REG16(0x284), 412 REG16(0x280), 413 REG16(0x27c), 414 REG16(0x278), 415 REG16(0x274), 416 REG16(0x270), 417 418 LRI(3, POSTED), 419 REG(0x1b0), 420 REG16(0x5a8), 421 REG16(0x5ac), 422 423 NOP(6), 424 LRI(1, 0), 425 REG(0x0c8), 426 NOP(3 + 9 + 1), 427 428 LRI(51, POSTED), 429 REG16(0x588), 430 REG16(0x588), 431 REG16(0x588), 432 REG16(0x588), 433 REG16(0x588), 434 REG16(0x588), 435 REG(0x028), 436 REG(0x09c), 437 REG(0x0c0), 438 REG(0x178), 439 REG(0x17c), 440 REG16(0x358), 441 REG(0x170), 442 REG(0x150), 443 REG(0x154), 444 REG(0x158), 445 REG16(0x41c), 446 REG16(0x600), 447 REG16(0x604), 448 REG16(0x608), 449 REG16(0x60c), 450 REG16(0x610), 451 REG16(0x614), 452 REG16(0x618), 453 REG16(0x61c), 454 REG16(0x620), 455 REG16(0x624), 456 REG16(0x628), 457 REG16(0x62c), 458 REG16(0x630), 459 REG16(0x634), 460 REG16(0x638), 461 REG16(0x63c), 462 REG16(0x640), 463 REG16(0x644), 464 REG16(0x648), 465 REG16(0x64c), 466 REG16(0x650), 467 REG16(0x654), 468 REG16(0x658), 469 REG16(0x65c), 470 REG16(0x660), 471 REG16(0x664), 472 REG16(0x668), 473 REG16(0x66c), 474 REG16(0x670), 475 REG16(0x674), 476 REG16(0x678), 477 REG16(0x67c), 478 REG(0x068), 479 REG(0x084), 480 NOP(1), 481 482 END 483 }; 484 485 #undef END 486 #undef REG16 487 #undef REG 488 #undef LRI 489 #undef NOP 490 491 static const u8 *reg_offsets(const struct intel_engine_cs *engine) 492 { 493 /* 494 * The gen12+ lists only have the registers we program in the basic 495 * default state. We rely on the context image using relative 496 * addressing to automatic fixup the register state between the 497 * physical engines for virtual engine. 498 */ 499 GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && 500 !intel_engine_has_relative_mmio(engine)); 501 502 if (engine->class == RENDER_CLASS) { 503 if (INTEL_GEN(engine->i915) >= 12) 504 return gen12_rcs_offsets; 505 else if (INTEL_GEN(engine->i915) >= 11) 506 return gen11_rcs_offsets; 507 else if (INTEL_GEN(engine->i915) >= 9) 508 return gen9_rcs_offsets; 509 else 510 return gen8_rcs_offsets; 511 } else { 512 if (INTEL_GEN(engine->i915) >= 12) 513 return gen12_xcs_offsets; 514 else if (INTEL_GEN(engine->i915) >= 9) 515 return gen9_xcs_offsets; 516 else 517 return gen8_xcs_offsets; 518 } 519 } 520 521 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 522 { 523 if (INTEL_GEN(engine->i915) >= 12) 524 return 0x60; 525 else if (INTEL_GEN(engine->i915) >= 9) 526 return 0x54; 527 else if (engine->class == RENDER_CLASS) 528 return 0x58; 529 else 530 return -1; 531 } 532 533 static int lrc_ring_gpr0(const struct intel_engine_cs *engine) 534 { 535 if (INTEL_GEN(engine->i915) >= 12) 536 return 0x74; 537 else if (INTEL_GEN(engine->i915) >= 9) 538 return 0x68; 539 else if (engine->class == RENDER_CLASS) 540 return 0xd8; 541 else 542 return -1; 543 } 544 545 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 546 { 547 if (INTEL_GEN(engine->i915) >= 12) 548 return 0x12; 549 else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) 550 return 0x18; 551 else 552 return -1; 553 } 554 555 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 556 { 557 int x; 558 559 x = lrc_ring_wa_bb_per_ctx(engine); 560 if (x < 0) 561 return x; 562 563 return x + 2; 564 } 565 566 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 567 { 568 int x; 569 570 x = lrc_ring_indirect_ptr(engine); 571 if (x < 0) 572 return x; 573 574 return x + 2; 575 } 576 577 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 578 { 579 if (engine->class != RENDER_CLASS) 580 return -1; 581 582 if (INTEL_GEN(engine->i915) >= 12) 583 return 0xb6; 584 else if (INTEL_GEN(engine->i915) >= 11) 585 return 0xaa; 586 else 587 return -1; 588 } 589 590 static u32 591 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 592 { 593 switch (INTEL_GEN(engine->i915)) { 594 default: 595 MISSING_CASE(INTEL_GEN(engine->i915)); 596 fallthrough; 597 case 12: 598 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 599 case 11: 600 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 601 case 10: 602 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 603 case 9: 604 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 605 case 8: 606 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 607 } 608 } 609 610 static void 611 lrc_setup_indirect_ctx(u32 *regs, 612 const struct intel_engine_cs *engine, 613 u32 ctx_bb_ggtt_addr, 614 u32 size) 615 { 616 GEM_BUG_ON(!size); 617 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 618 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 619 regs[lrc_ring_indirect_ptr(engine) + 1] = 620 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 621 622 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 623 regs[lrc_ring_indirect_offset(engine) + 1] = 624 lrc_ring_indirect_offset_default(engine) << 6; 625 } 626 627 static void init_common_regs(u32 * const regs, 628 const struct intel_context *ce, 629 const struct intel_engine_cs *engine, 630 bool inhibit) 631 { 632 u32 ctl; 633 634 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 635 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 636 if (inhibit) 637 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 638 if (INTEL_GEN(engine->i915) < 11) 639 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 640 CTX_CTRL_RS_CTX_ENABLE); 641 regs[CTX_CONTEXT_CONTROL] = ctl; 642 643 regs[CTX_TIMESTAMP] = ce->runtime.last; 644 } 645 646 static void init_wa_bb_regs(u32 * const regs, 647 const struct intel_engine_cs *engine) 648 { 649 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 650 651 if (wa_ctx->per_ctx.size) { 652 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 653 654 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 655 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 656 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 657 } 658 659 if (wa_ctx->indirect_ctx.size) { 660 lrc_setup_indirect_ctx(regs, engine, 661 i915_ggtt_offset(wa_ctx->vma) + 662 wa_ctx->indirect_ctx.offset, 663 wa_ctx->indirect_ctx.size); 664 } 665 } 666 667 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) 668 { 669 if (i915_vm_is_4lvl(&ppgtt->vm)) { 670 /* 64b PPGTT (48bit canonical) 671 * PDP0_DESCRIPTOR contains the base address to PML4 and 672 * other PDP Descriptors are ignored. 673 */ 674 ASSIGN_CTX_PML4(ppgtt, regs); 675 } else { 676 ASSIGN_CTX_PDP(ppgtt, regs, 3); 677 ASSIGN_CTX_PDP(ppgtt, regs, 2); 678 ASSIGN_CTX_PDP(ppgtt, regs, 1); 679 ASSIGN_CTX_PDP(ppgtt, regs, 0); 680 } 681 } 682 683 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 684 { 685 if (i915_is_ggtt(vm)) 686 return i915_vm_to_ggtt(vm)->alias; 687 else 688 return i915_vm_to_ppgtt(vm); 689 } 690 691 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 692 { 693 int x; 694 695 x = lrc_ring_mi_mode(engine); 696 if (x != -1) { 697 regs[x + 1] &= ~STOP_RING; 698 regs[x + 1] |= STOP_RING << 16; 699 } 700 } 701 702 static void __lrc_init_regs(u32 *regs, 703 const struct intel_context *ce, 704 const struct intel_engine_cs *engine, 705 bool inhibit) 706 { 707 /* 708 * A context is actually a big batch buffer with several 709 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 710 * values we are setting here are only for the first context restore: 711 * on a subsequent save, the GPU will recreate this batchbuffer with new 712 * values (including all the missing MI_LOAD_REGISTER_IMM commands that 713 * we are not initializing here). 714 * 715 * Must keep consistent with virtual_update_register_offsets(). 716 */ 717 718 if (inhibit) 719 memset(regs, 0, PAGE_SIZE); 720 721 set_offsets(regs, reg_offsets(engine), engine, inhibit); 722 723 init_common_regs(regs, ce, engine, inhibit); 724 init_ppgtt_regs(regs, vm_alias(ce->vm)); 725 726 init_wa_bb_regs(regs, engine); 727 728 __reset_stop_ring(regs, engine); 729 } 730 731 void lrc_init_regs(const struct intel_context *ce, 732 const struct intel_engine_cs *engine, 733 bool inhibit) 734 { 735 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); 736 } 737 738 void lrc_reset_regs(const struct intel_context *ce, 739 const struct intel_engine_cs *engine) 740 { 741 __reset_stop_ring(ce->lrc_reg_state, engine); 742 } 743 744 static void 745 set_redzone(void *vaddr, const struct intel_engine_cs *engine) 746 { 747 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 748 return; 749 750 vaddr += engine->context_size; 751 752 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 753 } 754 755 static void 756 check_redzone(const void *vaddr, const struct intel_engine_cs *engine) 757 { 758 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 759 return; 760 761 vaddr += engine->context_size; 762 763 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 764 drm_err_once(&engine->i915->drm, 765 "%s context redzone overwritten!\n", 766 engine->name); 767 } 768 769 void lrc_init_state(struct intel_context *ce, 770 struct intel_engine_cs *engine, 771 void *state) 772 { 773 bool inhibit = true; 774 775 set_redzone(state, engine); 776 777 if (engine->default_state) { 778 shmem_read(engine->default_state, 0, 779 state, engine->context_size); 780 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 781 inhibit = false; 782 } 783 784 /* Clear the ppHWSP (inc. per-context counters) */ 785 memset(state, 0, PAGE_SIZE); 786 787 /* 788 * The second page of the context object contains some registers which 789 * must be set up prior to the first execution. 790 */ 791 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); 792 } 793 794 static struct i915_vma * 795 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) 796 { 797 struct drm_i915_gem_object *obj; 798 struct i915_vma *vma; 799 u32 context_size; 800 801 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 802 803 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 804 context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 805 806 if (INTEL_GEN(engine->i915) == 12) { 807 ce->wa_bb_page = context_size / PAGE_SIZE; 808 context_size += PAGE_SIZE; 809 } 810 811 obj = i915_gem_object_create_shmem(engine->i915, context_size); 812 if (IS_ERR(obj)) 813 return ERR_CAST(obj); 814 815 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 816 if (IS_ERR(vma)) { 817 i915_gem_object_put(obj); 818 return vma; 819 } 820 821 return vma; 822 } 823 824 static struct intel_timeline * 825 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) 826 { 827 struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 828 829 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); 830 } 831 832 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) 833 { 834 struct intel_ring *ring; 835 struct i915_vma *vma; 836 int err; 837 838 GEM_BUG_ON(ce->state); 839 840 vma = __lrc_alloc_state(ce, engine); 841 if (IS_ERR(vma)) 842 return PTR_ERR(vma); 843 844 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); 845 if (IS_ERR(ring)) { 846 err = PTR_ERR(ring); 847 goto err_vma; 848 } 849 850 if (!page_mask_bits(ce->timeline)) { 851 struct intel_timeline *tl; 852 853 /* 854 * Use the static global HWSP for the kernel context, and 855 * a dynamically allocated cacheline for everyone else. 856 */ 857 if (unlikely(ce->timeline)) 858 tl = pinned_timeline(ce, engine); 859 else 860 tl = intel_timeline_create(engine->gt); 861 if (IS_ERR(tl)) { 862 err = PTR_ERR(tl); 863 goto err_ring; 864 } 865 866 ce->timeline = tl; 867 } 868 869 ce->ring = ring; 870 ce->state = vma; 871 872 return 0; 873 874 err_ring: 875 intel_ring_put(ring); 876 err_vma: 877 i915_vma_put(vma); 878 return err; 879 } 880 881 void lrc_reset(struct intel_context *ce) 882 { 883 GEM_BUG_ON(!intel_context_is_pinned(ce)); 884 885 intel_ring_reset(ce->ring, ce->ring->emit); 886 887 /* Scrub away the garbage */ 888 lrc_init_regs(ce, ce->engine, true); 889 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); 890 } 891 892 int 893 lrc_pre_pin(struct intel_context *ce, 894 struct intel_engine_cs *engine, 895 struct i915_gem_ww_ctx *ww, 896 void **vaddr) 897 { 898 GEM_BUG_ON(!ce->state); 899 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 900 901 *vaddr = i915_gem_object_pin_map(ce->state->obj, 902 i915_coherent_map_type(ce->engine->i915) | 903 I915_MAP_OVERRIDE); 904 905 return PTR_ERR_OR_ZERO(*vaddr); 906 } 907 908 int 909 lrc_pin(struct intel_context *ce, 910 struct intel_engine_cs *engine, 911 void *vaddr) 912 { 913 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 914 915 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) 916 lrc_init_state(ce, engine, vaddr); 917 918 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); 919 return 0; 920 } 921 922 void lrc_unpin(struct intel_context *ce) 923 { 924 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 925 ce->engine); 926 } 927 928 void lrc_post_unpin(struct intel_context *ce) 929 { 930 i915_gem_object_unpin_map(ce->state->obj); 931 } 932 933 void lrc_fini(struct intel_context *ce) 934 { 935 if (!ce->state) 936 return; 937 938 intel_ring_put(fetch_and_zero(&ce->ring)); 939 i915_vma_put(fetch_and_zero(&ce->state)); 940 } 941 942 void lrc_destroy(struct kref *kref) 943 { 944 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 945 946 GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 947 GEM_BUG_ON(intel_context_is_pinned(ce)); 948 949 lrc_fini(ce); 950 951 intel_context_fini(ce); 952 intel_context_free(ce); 953 } 954 955 static u32 * 956 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 957 { 958 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 959 MI_SRM_LRM_GLOBAL_GTT | 960 MI_LRI_LRM_CS_MMIO; 961 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 962 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 963 CTX_TIMESTAMP * sizeof(u32); 964 *cs++ = 0; 965 966 *cs++ = MI_LOAD_REGISTER_REG | 967 MI_LRR_SOURCE_CS_MMIO | 968 MI_LRI_LRM_CS_MMIO; 969 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 970 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 971 972 *cs++ = MI_LOAD_REGISTER_REG | 973 MI_LRR_SOURCE_CS_MMIO | 974 MI_LRI_LRM_CS_MMIO; 975 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 976 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 977 978 return cs; 979 } 980 981 static u32 * 982 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 983 { 984 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 985 986 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 987 MI_SRM_LRM_GLOBAL_GTT | 988 MI_LRI_LRM_CS_MMIO; 989 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 990 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 991 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 992 *cs++ = 0; 993 994 return cs; 995 } 996 997 static u32 * 998 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 999 { 1000 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 1001 1002 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1003 MI_SRM_LRM_GLOBAL_GTT | 1004 MI_LRI_LRM_CS_MMIO; 1005 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1006 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1007 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 1008 *cs++ = 0; 1009 1010 *cs++ = MI_LOAD_REGISTER_REG | 1011 MI_LRR_SOURCE_CS_MMIO | 1012 MI_LRI_LRM_CS_MMIO; 1013 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1014 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 1015 1016 return cs; 1017 } 1018 1019 static u32 * 1020 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 1021 { 1022 cs = gen12_emit_timestamp_wa(ce, cs); 1023 cs = gen12_emit_cmd_buf_wa(ce, cs); 1024 cs = gen12_emit_restore_scratch(ce, cs); 1025 1026 return cs; 1027 } 1028 1029 static u32 * 1030 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 1031 { 1032 cs = gen12_emit_timestamp_wa(ce, cs); 1033 cs = gen12_emit_restore_scratch(ce, cs); 1034 1035 return cs; 1036 } 1037 1038 static u32 context_wa_bb_offset(const struct intel_context *ce) 1039 { 1040 return PAGE_SIZE * ce->wa_bb_page; 1041 } 1042 1043 static u32 *context_indirect_bb(const struct intel_context *ce) 1044 { 1045 void *ptr; 1046 1047 GEM_BUG_ON(!ce->wa_bb_page); 1048 1049 ptr = ce->lrc_reg_state; 1050 ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 1051 ptr += context_wa_bb_offset(ce); 1052 1053 return ptr; 1054 } 1055 1056 static void 1057 setup_indirect_ctx_bb(const struct intel_context *ce, 1058 const struct intel_engine_cs *engine, 1059 u32 *(*emit)(const struct intel_context *, u32 *)) 1060 { 1061 u32 * const start = context_indirect_bb(ce); 1062 u32 *cs; 1063 1064 cs = emit(ce, start); 1065 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 1066 while ((unsigned long)cs % CACHELINE_BYTES) 1067 *cs++ = MI_NOOP; 1068 1069 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, 1070 i915_ggtt_offset(ce->state) + 1071 context_wa_bb_offset(ce), 1072 (cs - start) * sizeof(*cs)); 1073 } 1074 1075 /* 1076 * The context descriptor encodes various attributes of a context, 1077 * including its GTT address and some flags. Because it's fairly 1078 * expensive to calculate, we'll just do it once and cache the result, 1079 * which remains valid until the context is unpinned. 1080 * 1081 * This is what a descriptor looks like, from LSB to MSB:: 1082 * 1083 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 1084 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 1085 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 1086 * bits 53-54: mbz, reserved for use by hardware 1087 * bits 55-63: group ID, currently unused and set to 0 1088 * 1089 * Starting from Gen11, the upper dword of the descriptor has a new format: 1090 * 1091 * bits 32-36: reserved 1092 * bits 37-47: SW context ID 1093 * bits 48:53: engine instance 1094 * bit 54: mbz, reserved for use by hardware 1095 * bits 55-60: SW counter 1096 * bits 61-63: engine class 1097 * 1098 * engine info, SW context ID and SW counter need to form a unique number 1099 * (Context ID) per lrc. 1100 */ 1101 static u32 lrc_descriptor(const struct intel_context *ce) 1102 { 1103 u32 desc; 1104 1105 desc = INTEL_LEGACY_32B_CONTEXT; 1106 if (i915_vm_is_4lvl(ce->vm)) 1107 desc = INTEL_LEGACY_64B_CONTEXT; 1108 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 1109 1110 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 1111 if (IS_GEN(ce->vm->i915, 8)) 1112 desc |= GEN8_CTX_L3LLC_COHERENT; 1113 1114 return i915_ggtt_offset(ce->state) | desc; 1115 } 1116 1117 u32 lrc_update_regs(const struct intel_context *ce, 1118 const struct intel_engine_cs *engine, 1119 u32 head) 1120 { 1121 struct intel_ring *ring = ce->ring; 1122 u32 *regs = ce->lrc_reg_state; 1123 1124 GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 1125 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 1126 1127 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1128 regs[CTX_RING_HEAD] = head; 1129 regs[CTX_RING_TAIL] = ring->tail; 1130 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1131 1132 /* RPCS */ 1133 if (engine->class == RENDER_CLASS) { 1134 regs[CTX_R_PWR_CLK_STATE] = 1135 intel_sseu_make_rpcs(engine->gt, &ce->sseu); 1136 1137 i915_oa_init_reg_state(ce, engine); 1138 } 1139 1140 if (ce->wa_bb_page) { 1141 u32 *(*fn)(const struct intel_context *ce, u32 *cs); 1142 1143 fn = gen12_emit_indirect_ctx_xcs; 1144 if (ce->engine->class == RENDER_CLASS) 1145 fn = gen12_emit_indirect_ctx_rcs; 1146 1147 /* Mutually exclusive wrt to global indirect bb */ 1148 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 1149 setup_indirect_ctx_bb(ce, engine, fn); 1150 } 1151 1152 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; 1153 } 1154 1155 void lrc_update_offsets(struct intel_context *ce, 1156 struct intel_engine_cs *engine) 1157 { 1158 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); 1159 } 1160 1161 void lrc_check_regs(const struct intel_context *ce, 1162 const struct intel_engine_cs *engine, 1163 const char *when) 1164 { 1165 const struct intel_ring *ring = ce->ring; 1166 u32 *regs = ce->lrc_reg_state; 1167 bool valid = true; 1168 int x; 1169 1170 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 1171 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 1172 engine->name, 1173 regs[CTX_RING_START], 1174 i915_ggtt_offset(ring->vma)); 1175 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1176 valid = false; 1177 } 1178 1179 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 1180 (RING_CTL_SIZE(ring->size) | RING_VALID)) { 1181 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 1182 engine->name, 1183 regs[CTX_RING_CTL], 1184 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 1185 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1186 valid = false; 1187 } 1188 1189 x = lrc_ring_mi_mode(engine); 1190 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 1191 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 1192 engine->name, regs[x + 1]); 1193 regs[x + 1] &= ~STOP_RING; 1194 regs[x + 1] |= STOP_RING << 16; 1195 valid = false; 1196 } 1197 1198 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); 1199 } 1200 1201 /* 1202 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 1203 * PIPE_CONTROL instruction. This is required for the flush to happen correctly 1204 * but there is a slight complication as this is applied in WA batch where the 1205 * values are only initialized once so we cannot take register value at the 1206 * beginning and reuse it further; hence we save its value to memory, upload a 1207 * constant value with bit21 set and then we restore it back with the saved value. 1208 * To simplify the WA, a constant value is formed by using the default value 1209 * of this register. This shouldn't be a problem because we are only modifying 1210 * it for a short period and this batch in non-premptible. We can ofcourse 1211 * use additional instructions that read the actual value of the register 1212 * at that time and set our bit of interest but it makes the WA complicated. 1213 * 1214 * This WA is also required for Gen9 so extracting as a function avoids 1215 * code duplication. 1216 */ 1217 static u32 * 1218 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1219 { 1220 /* NB no one else is allowed to scribble over scratch + 256! */ 1221 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1222 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1223 *batch++ = intel_gt_scratch_offset(engine->gt, 1224 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1225 *batch++ = 0; 1226 1227 *batch++ = MI_LOAD_REGISTER_IMM(1); 1228 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1229 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 1230 1231 batch = gen8_emit_pipe_control(batch, 1232 PIPE_CONTROL_CS_STALL | 1233 PIPE_CONTROL_DC_FLUSH_ENABLE, 1234 0); 1235 1236 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1237 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1238 *batch++ = intel_gt_scratch_offset(engine->gt, 1239 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1240 *batch++ = 0; 1241 1242 return batch; 1243 } 1244 1245 /* 1246 * Typically we only have one indirect_ctx and per_ctx batch buffer which are 1247 * initialized at the beginning and shared across all contexts but this field 1248 * helps us to have multiple batches at different offsets and select them based 1249 * on a criteria. At the moment this batch always start at the beginning of the page 1250 * and at this point we don't have multiple wa_ctx batch buffers. 1251 * 1252 * The number of WA applied are not known at the beginning; we use this field 1253 * to return the no of DWORDS written. 1254 * 1255 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 1256 * so it adds NOOPs as padding to make it cacheline aligned. 1257 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 1258 * makes a complete batch buffer. 1259 */ 1260 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1261 { 1262 /* WaDisableCtxRestoreArbitration:bdw,chv */ 1263 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1264 1265 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 1266 if (IS_BROADWELL(engine->i915)) 1267 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1268 1269 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 1270 /* Actual scratch location is at 128 bytes offset */ 1271 batch = gen8_emit_pipe_control(batch, 1272 PIPE_CONTROL_FLUSH_L3 | 1273 PIPE_CONTROL_STORE_DATA_INDEX | 1274 PIPE_CONTROL_CS_STALL | 1275 PIPE_CONTROL_QW_WRITE, 1276 LRC_PPHWSP_SCRATCH_ADDR); 1277 1278 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1279 1280 /* Pad to end of cacheline */ 1281 while ((unsigned long)batch % CACHELINE_BYTES) 1282 *batch++ = MI_NOOP; 1283 1284 /* 1285 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 1286 * execution depends on the length specified in terms of cache lines 1287 * in the register CTX_RCS_INDIRECT_CTX 1288 */ 1289 1290 return batch; 1291 } 1292 1293 struct lri { 1294 i915_reg_t reg; 1295 u32 value; 1296 }; 1297 1298 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 1299 { 1300 GEM_BUG_ON(!count || count > 63); 1301 1302 *batch++ = MI_LOAD_REGISTER_IMM(count); 1303 do { 1304 *batch++ = i915_mmio_reg_offset(lri->reg); 1305 *batch++ = lri->value; 1306 } while (lri++, --count); 1307 *batch++ = MI_NOOP; 1308 1309 return batch; 1310 } 1311 1312 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1313 { 1314 static const struct lri lri[] = { 1315 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 1316 { 1317 COMMON_SLICE_CHICKEN2, 1318 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 1319 0), 1320 }, 1321 1322 /* BSpec: 11391 */ 1323 { 1324 FF_SLICE_CHICKEN, 1325 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 1326 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 1327 }, 1328 1329 /* BSpec: 11299 */ 1330 { 1331 _3D_CHICKEN3, 1332 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 1333 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 1334 } 1335 }; 1336 1337 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1338 1339 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1340 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1341 1342 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 1343 batch = gen8_emit_pipe_control(batch, 1344 PIPE_CONTROL_FLUSH_L3 | 1345 PIPE_CONTROL_STORE_DATA_INDEX | 1346 PIPE_CONTROL_CS_STALL | 1347 PIPE_CONTROL_QW_WRITE, 1348 LRC_PPHWSP_SCRATCH_ADDR); 1349 1350 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 1351 1352 /* WaMediaPoolStateCmdInWABB:bxt,glk */ 1353 if (HAS_POOLED_EU(engine->i915)) { 1354 /* 1355 * EU pool configuration is setup along with golden context 1356 * during context initialization. This value depends on 1357 * device type (2x6 or 3x6) and needs to be updated based 1358 * on which subslice is disabled especially for 2x6 1359 * devices, however it is safe to load default 1360 * configuration of 3x6 device instead of masking off 1361 * corresponding bits because HW ignores bits of a disabled 1362 * subslice and drops down to appropriate config. Please 1363 * see render_state_setup() in i915_gem_render_state.c for 1364 * possible configurations, to avoid duplication they are 1365 * not shown here again. 1366 */ 1367 *batch++ = GEN9_MEDIA_POOL_STATE; 1368 *batch++ = GEN9_MEDIA_POOL_ENABLE; 1369 *batch++ = 0x00777000; 1370 *batch++ = 0; 1371 *batch++ = 0; 1372 *batch++ = 0; 1373 } 1374 1375 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1376 1377 /* Pad to end of cacheline */ 1378 while ((unsigned long)batch % CACHELINE_BYTES) 1379 *batch++ = MI_NOOP; 1380 1381 return batch; 1382 } 1383 1384 static u32 * 1385 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1386 { 1387 int i; 1388 1389 /* 1390 * WaPipeControlBefore3DStateSamplePattern: cnl 1391 * 1392 * Ensure the engine is idle prior to programming a 1393 * 3DSTATE_SAMPLE_PATTERN during a context restore. 1394 */ 1395 batch = gen8_emit_pipe_control(batch, 1396 PIPE_CONTROL_CS_STALL, 1397 0); 1398 /* 1399 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for 1400 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in 1401 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is 1402 * confusing. Since gen8_emit_pipe_control() already advances the 1403 * batch by 6 dwords, we advance the other 10 here, completing a 1404 * cacheline. It's not clear if the workaround requires this padding 1405 * before other commands, or if it's just the regular padding we would 1406 * already have for the workaround bb, so leave it here for now. 1407 */ 1408 for (i = 0; i < 10; i++) 1409 *batch++ = MI_NOOP; 1410 1411 /* Pad to end of cacheline */ 1412 while ((unsigned long)batch % CACHELINE_BYTES) 1413 *batch++ = MI_NOOP; 1414 1415 return batch; 1416 } 1417 1418 #define CTX_WA_BB_SIZE (PAGE_SIZE) 1419 1420 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) 1421 { 1422 struct drm_i915_gem_object *obj; 1423 struct i915_vma *vma; 1424 int err; 1425 1426 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); 1427 if (IS_ERR(obj)) 1428 return PTR_ERR(obj); 1429 1430 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 1431 if (IS_ERR(vma)) { 1432 err = PTR_ERR(vma); 1433 goto err; 1434 } 1435 1436 err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); 1437 if (err) 1438 goto err; 1439 1440 engine->wa_ctx.vma = vma; 1441 return 0; 1442 1443 err: 1444 i915_gem_object_put(obj); 1445 return err; 1446 } 1447 1448 void lrc_fini_wa_ctx(struct intel_engine_cs *engine) 1449 { 1450 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1451 1452 /* Called on error unwind, clear all flags to prevent further use */ 1453 memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); 1454 } 1455 1456 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 1457 1458 void lrc_init_wa_ctx(struct intel_engine_cs *engine) 1459 { 1460 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1461 struct i915_wa_ctx_bb *wa_bb[] = { 1462 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx 1463 }; 1464 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; 1465 void *batch, *batch_ptr; 1466 unsigned int i; 1467 int err; 1468 1469 if (engine->class != RENDER_CLASS) 1470 return; 1471 1472 switch (INTEL_GEN(engine->i915)) { 1473 case 12: 1474 case 11: 1475 return; 1476 case 10: 1477 wa_bb_fn[0] = gen10_init_indirectctx_bb; 1478 wa_bb_fn[1] = NULL; 1479 break; 1480 case 9: 1481 wa_bb_fn[0] = gen9_init_indirectctx_bb; 1482 wa_bb_fn[1] = NULL; 1483 break; 1484 case 8: 1485 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1486 wa_bb_fn[1] = NULL; 1487 break; 1488 default: 1489 MISSING_CASE(INTEL_GEN(engine->i915)); 1490 return; 1491 } 1492 1493 err = lrc_setup_wa_ctx(engine); 1494 if (err) { 1495 /* 1496 * We continue even if we fail to initialize WA batch 1497 * because we only expect rare glitches but nothing 1498 * critical to prevent us from using GPU 1499 */ 1500 drm_err(&engine->i915->drm, 1501 "Ignoring context switch w/a allocation error:%d\n", 1502 err); 1503 return; 1504 } 1505 1506 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 1507 1508 /* 1509 * Emit the two workaround batch buffers, recording the offset from the 1510 * start of the workaround batch buffer object for each and their 1511 * respective sizes. 1512 */ 1513 batch_ptr = batch; 1514 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 1515 wa_bb[i]->offset = batch_ptr - batch; 1516 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 1517 CACHELINE_BYTES))) { 1518 err = -EINVAL; 1519 break; 1520 } 1521 if (wa_bb_fn[i]) 1522 batch_ptr = wa_bb_fn[i](engine, batch_ptr); 1523 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 1524 } 1525 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); 1526 1527 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 1528 __i915_gem_object_release_map(wa_ctx->vma->obj); 1529 1530 /* Verify that we can handle failure to setup the wa_ctx */ 1531 if (err || i915_inject_probe_error(engine->i915, -ENODEV)) 1532 lrc_fini_wa_ctx(engine); 1533 } 1534 1535 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 1536 { 1537 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1538 ce->runtime.num_underflow++; 1539 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 1540 #endif 1541 } 1542 1543 void lrc_update_runtime(struct intel_context *ce) 1544 { 1545 u32 old; 1546 s32 dt; 1547 1548 if (intel_context_is_barrier(ce)) 1549 return; 1550 1551 old = ce->runtime.last; 1552 ce->runtime.last = lrc_get_runtime(ce); 1553 dt = ce->runtime.last - old; 1554 1555 if (unlikely(dt < 0)) { 1556 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 1557 old, ce->runtime.last, dt); 1558 st_update_runtime_underflow(ce, dt); 1559 return; 1560 } 1561 1562 ewma_runtime_add(&ce->runtime.avg, dt); 1563 ce->runtime.total += dt; 1564 } 1565 1566 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1567 #include "selftest_lrc.c" 1568 #endif 1569