1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gen8_engine_cs.h" 7 #include "i915_drv.h" 8 #include "i915_perf.h" 9 #include "intel_engine.h" 10 #include "intel_gpu_commands.h" 11 #include "intel_gt.h" 12 #include "intel_lrc.h" 13 #include "intel_lrc_reg.h" 14 #include "intel_ring.h" 15 #include "shmem_utils.h" 16 17 static inline unsigned int dword_in_page(void *addr) 18 { 19 return offset_in_page(addr) / sizeof(u32); 20 } 21 22 static void set_offsets(u32 *regs, 23 const u8 *data, 24 const struct intel_engine_cs *engine, 25 bool close) 26 #define NOP(x) (BIT(7) | (x)) 27 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 28 #define POSTED BIT(0) 29 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 30 #define REG16(x) \ 31 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 32 (((x) >> 2) & 0x7f) 33 #define END 0 34 { 35 const u32 base = engine->mmio_base; 36 37 while (*data) { 38 u8 count, flags; 39 40 if (*data & BIT(7)) { /* skip */ 41 count = *data++ & ~BIT(7); 42 regs += count; 43 continue; 44 } 45 46 count = *data & 0x3f; 47 flags = *data >> 6; 48 data++; 49 50 *regs = MI_LOAD_REGISTER_IMM(count); 51 if (flags & POSTED) 52 *regs |= MI_LRI_FORCE_POSTED; 53 if (INTEL_GEN(engine->i915) >= 11) 54 *regs |= MI_LRI_LRM_CS_MMIO; 55 regs++; 56 57 GEM_BUG_ON(!count); 58 do { 59 u32 offset = 0; 60 u8 v; 61 62 do { 63 v = *data++; 64 offset <<= 7; 65 offset |= v & ~BIT(7); 66 } while (v & BIT(7)); 67 68 regs[0] = base + (offset << 2); 69 regs += 2; 70 } while (--count); 71 } 72 73 if (close) { 74 /* Close the batch; used mainly by live_lrc_layout() */ 75 *regs = MI_BATCH_BUFFER_END; 76 if (INTEL_GEN(engine->i915) >= 10) 77 *regs |= BIT(0); 78 } 79 } 80 81 static const u8 gen8_xcs_offsets[] = { 82 NOP(1), 83 LRI(11, 0), 84 REG16(0x244), 85 REG(0x034), 86 REG(0x030), 87 REG(0x038), 88 REG(0x03c), 89 REG(0x168), 90 REG(0x140), 91 REG(0x110), 92 REG(0x11c), 93 REG(0x114), 94 REG(0x118), 95 96 NOP(9), 97 LRI(9, 0), 98 REG16(0x3a8), 99 REG16(0x28c), 100 REG16(0x288), 101 REG16(0x284), 102 REG16(0x280), 103 REG16(0x27c), 104 REG16(0x278), 105 REG16(0x274), 106 REG16(0x270), 107 108 NOP(13), 109 LRI(2, 0), 110 REG16(0x200), 111 REG(0x028), 112 113 END 114 }; 115 116 static const u8 gen9_xcs_offsets[] = { 117 NOP(1), 118 LRI(14, POSTED), 119 REG16(0x244), 120 REG(0x034), 121 REG(0x030), 122 REG(0x038), 123 REG(0x03c), 124 REG(0x168), 125 REG(0x140), 126 REG(0x110), 127 REG(0x11c), 128 REG(0x114), 129 REG(0x118), 130 REG(0x1c0), 131 REG(0x1c4), 132 REG(0x1c8), 133 134 NOP(3), 135 LRI(9, POSTED), 136 REG16(0x3a8), 137 REG16(0x28c), 138 REG16(0x288), 139 REG16(0x284), 140 REG16(0x280), 141 REG16(0x27c), 142 REG16(0x278), 143 REG16(0x274), 144 REG16(0x270), 145 146 NOP(13), 147 LRI(1, POSTED), 148 REG16(0x200), 149 150 NOP(13), 151 LRI(44, POSTED), 152 REG(0x028), 153 REG(0x09c), 154 REG(0x0c0), 155 REG(0x178), 156 REG(0x17c), 157 REG16(0x358), 158 REG(0x170), 159 REG(0x150), 160 REG(0x154), 161 REG(0x158), 162 REG16(0x41c), 163 REG16(0x600), 164 REG16(0x604), 165 REG16(0x608), 166 REG16(0x60c), 167 REG16(0x610), 168 REG16(0x614), 169 REG16(0x618), 170 REG16(0x61c), 171 REG16(0x620), 172 REG16(0x624), 173 REG16(0x628), 174 REG16(0x62c), 175 REG16(0x630), 176 REG16(0x634), 177 REG16(0x638), 178 REG16(0x63c), 179 REG16(0x640), 180 REG16(0x644), 181 REG16(0x648), 182 REG16(0x64c), 183 REG16(0x650), 184 REG16(0x654), 185 REG16(0x658), 186 REG16(0x65c), 187 REG16(0x660), 188 REG16(0x664), 189 REG16(0x668), 190 REG16(0x66c), 191 REG16(0x670), 192 REG16(0x674), 193 REG16(0x678), 194 REG16(0x67c), 195 REG(0x068), 196 197 END 198 }; 199 200 static const u8 gen12_xcs_offsets[] = { 201 NOP(1), 202 LRI(13, POSTED), 203 REG16(0x244), 204 REG(0x034), 205 REG(0x030), 206 REG(0x038), 207 REG(0x03c), 208 REG(0x168), 209 REG(0x140), 210 REG(0x110), 211 REG(0x1c0), 212 REG(0x1c4), 213 REG(0x1c8), 214 REG(0x180), 215 REG16(0x2b4), 216 217 NOP(5), 218 LRI(9, POSTED), 219 REG16(0x3a8), 220 REG16(0x28c), 221 REG16(0x288), 222 REG16(0x284), 223 REG16(0x280), 224 REG16(0x27c), 225 REG16(0x278), 226 REG16(0x274), 227 REG16(0x270), 228 229 END 230 }; 231 232 static const u8 gen8_rcs_offsets[] = { 233 NOP(1), 234 LRI(14, POSTED), 235 REG16(0x244), 236 REG(0x034), 237 REG(0x030), 238 REG(0x038), 239 REG(0x03c), 240 REG(0x168), 241 REG(0x140), 242 REG(0x110), 243 REG(0x11c), 244 REG(0x114), 245 REG(0x118), 246 REG(0x1c0), 247 REG(0x1c4), 248 REG(0x1c8), 249 250 NOP(3), 251 LRI(9, POSTED), 252 REG16(0x3a8), 253 REG16(0x28c), 254 REG16(0x288), 255 REG16(0x284), 256 REG16(0x280), 257 REG16(0x27c), 258 REG16(0x278), 259 REG16(0x274), 260 REG16(0x270), 261 262 NOP(13), 263 LRI(1, 0), 264 REG(0x0c8), 265 266 END 267 }; 268 269 static const u8 gen9_rcs_offsets[] = { 270 NOP(1), 271 LRI(14, POSTED), 272 REG16(0x244), 273 REG(0x34), 274 REG(0x30), 275 REG(0x38), 276 REG(0x3c), 277 REG(0x168), 278 REG(0x140), 279 REG(0x110), 280 REG(0x11c), 281 REG(0x114), 282 REG(0x118), 283 REG(0x1c0), 284 REG(0x1c4), 285 REG(0x1c8), 286 287 NOP(3), 288 LRI(9, POSTED), 289 REG16(0x3a8), 290 REG16(0x28c), 291 REG16(0x288), 292 REG16(0x284), 293 REG16(0x280), 294 REG16(0x27c), 295 REG16(0x278), 296 REG16(0x274), 297 REG16(0x270), 298 299 NOP(13), 300 LRI(1, 0), 301 REG(0xc8), 302 303 NOP(13), 304 LRI(44, POSTED), 305 REG(0x28), 306 REG(0x9c), 307 REG(0xc0), 308 REG(0x178), 309 REG(0x17c), 310 REG16(0x358), 311 REG(0x170), 312 REG(0x150), 313 REG(0x154), 314 REG(0x158), 315 REG16(0x41c), 316 REG16(0x600), 317 REG16(0x604), 318 REG16(0x608), 319 REG16(0x60c), 320 REG16(0x610), 321 REG16(0x614), 322 REG16(0x618), 323 REG16(0x61c), 324 REG16(0x620), 325 REG16(0x624), 326 REG16(0x628), 327 REG16(0x62c), 328 REG16(0x630), 329 REG16(0x634), 330 REG16(0x638), 331 REG16(0x63c), 332 REG16(0x640), 333 REG16(0x644), 334 REG16(0x648), 335 REG16(0x64c), 336 REG16(0x650), 337 REG16(0x654), 338 REG16(0x658), 339 REG16(0x65c), 340 REG16(0x660), 341 REG16(0x664), 342 REG16(0x668), 343 REG16(0x66c), 344 REG16(0x670), 345 REG16(0x674), 346 REG16(0x678), 347 REG16(0x67c), 348 REG(0x68), 349 350 END 351 }; 352 353 static const u8 gen11_rcs_offsets[] = { 354 NOP(1), 355 LRI(15, POSTED), 356 REG16(0x244), 357 REG(0x034), 358 REG(0x030), 359 REG(0x038), 360 REG(0x03c), 361 REG(0x168), 362 REG(0x140), 363 REG(0x110), 364 REG(0x11c), 365 REG(0x114), 366 REG(0x118), 367 REG(0x1c0), 368 REG(0x1c4), 369 REG(0x1c8), 370 REG(0x180), 371 372 NOP(1), 373 LRI(9, POSTED), 374 REG16(0x3a8), 375 REG16(0x28c), 376 REG16(0x288), 377 REG16(0x284), 378 REG16(0x280), 379 REG16(0x27c), 380 REG16(0x278), 381 REG16(0x274), 382 REG16(0x270), 383 384 LRI(1, POSTED), 385 REG(0x1b0), 386 387 NOP(10), 388 LRI(1, 0), 389 REG(0x0c8), 390 391 END 392 }; 393 394 static const u8 gen12_rcs_offsets[] = { 395 NOP(1), 396 LRI(13, POSTED), 397 REG16(0x244), 398 REG(0x034), 399 REG(0x030), 400 REG(0x038), 401 REG(0x03c), 402 REG(0x168), 403 REG(0x140), 404 REG(0x110), 405 REG(0x1c0), 406 REG(0x1c4), 407 REG(0x1c8), 408 REG(0x180), 409 REG16(0x2b4), 410 411 NOP(5), 412 LRI(9, POSTED), 413 REG16(0x3a8), 414 REG16(0x28c), 415 REG16(0x288), 416 REG16(0x284), 417 REG16(0x280), 418 REG16(0x27c), 419 REG16(0x278), 420 REG16(0x274), 421 REG16(0x270), 422 423 LRI(3, POSTED), 424 REG(0x1b0), 425 REG16(0x5a8), 426 REG16(0x5ac), 427 428 NOP(6), 429 LRI(1, 0), 430 REG(0x0c8), 431 NOP(3 + 9 + 1), 432 433 LRI(51, POSTED), 434 REG16(0x588), 435 REG16(0x588), 436 REG16(0x588), 437 REG16(0x588), 438 REG16(0x588), 439 REG16(0x588), 440 REG(0x028), 441 REG(0x09c), 442 REG(0x0c0), 443 REG(0x178), 444 REG(0x17c), 445 REG16(0x358), 446 REG(0x170), 447 REG(0x150), 448 REG(0x154), 449 REG(0x158), 450 REG16(0x41c), 451 REG16(0x600), 452 REG16(0x604), 453 REG16(0x608), 454 REG16(0x60c), 455 REG16(0x610), 456 REG16(0x614), 457 REG16(0x618), 458 REG16(0x61c), 459 REG16(0x620), 460 REG16(0x624), 461 REG16(0x628), 462 REG16(0x62c), 463 REG16(0x630), 464 REG16(0x634), 465 REG16(0x638), 466 REG16(0x63c), 467 REG16(0x640), 468 REG16(0x644), 469 REG16(0x648), 470 REG16(0x64c), 471 REG16(0x650), 472 REG16(0x654), 473 REG16(0x658), 474 REG16(0x65c), 475 REG16(0x660), 476 REG16(0x664), 477 REG16(0x668), 478 REG16(0x66c), 479 REG16(0x670), 480 REG16(0x674), 481 REG16(0x678), 482 REG16(0x67c), 483 REG(0x068), 484 REG(0x084), 485 NOP(1), 486 487 END 488 }; 489 490 #undef END 491 #undef REG16 492 #undef REG 493 #undef LRI 494 #undef NOP 495 496 static const u8 *reg_offsets(const struct intel_engine_cs *engine) 497 { 498 /* 499 * The gen12+ lists only have the registers we program in the basic 500 * default state. We rely on the context image using relative 501 * addressing to automatic fixup the register state between the 502 * physical engines for virtual engine. 503 */ 504 GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && 505 !intel_engine_has_relative_mmio(engine)); 506 507 if (engine->class == RENDER_CLASS) { 508 if (INTEL_GEN(engine->i915) >= 12) 509 return gen12_rcs_offsets; 510 else if (INTEL_GEN(engine->i915) >= 11) 511 return gen11_rcs_offsets; 512 else if (INTEL_GEN(engine->i915) >= 9) 513 return gen9_rcs_offsets; 514 else 515 return gen8_rcs_offsets; 516 } else { 517 if (INTEL_GEN(engine->i915) >= 12) 518 return gen12_xcs_offsets; 519 else if (INTEL_GEN(engine->i915) >= 9) 520 return gen9_xcs_offsets; 521 else 522 return gen8_xcs_offsets; 523 } 524 } 525 526 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 527 { 528 if (INTEL_GEN(engine->i915) >= 12) 529 return 0x60; 530 else if (INTEL_GEN(engine->i915) >= 9) 531 return 0x54; 532 else if (engine->class == RENDER_CLASS) 533 return 0x58; 534 else 535 return -1; 536 } 537 538 static int lrc_ring_gpr0(const struct intel_engine_cs *engine) 539 { 540 if (INTEL_GEN(engine->i915) >= 12) 541 return 0x74; 542 else if (INTEL_GEN(engine->i915) >= 9) 543 return 0x68; 544 else if (engine->class == RENDER_CLASS) 545 return 0xd8; 546 else 547 return -1; 548 } 549 550 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 551 { 552 if (INTEL_GEN(engine->i915) >= 12) 553 return 0x12; 554 else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) 555 return 0x18; 556 else 557 return -1; 558 } 559 560 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 561 { 562 int x; 563 564 x = lrc_ring_wa_bb_per_ctx(engine); 565 if (x < 0) 566 return x; 567 568 return x + 2; 569 } 570 571 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 572 { 573 int x; 574 575 x = lrc_ring_indirect_ptr(engine); 576 if (x < 0) 577 return x; 578 579 return x + 2; 580 } 581 582 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 583 { 584 if (engine->class != RENDER_CLASS) 585 return -1; 586 587 if (INTEL_GEN(engine->i915) >= 12) 588 return 0xb6; 589 else if (INTEL_GEN(engine->i915) >= 11) 590 return 0xaa; 591 else 592 return -1; 593 } 594 595 static u32 596 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 597 { 598 switch (INTEL_GEN(engine->i915)) { 599 default: 600 MISSING_CASE(INTEL_GEN(engine->i915)); 601 fallthrough; 602 case 12: 603 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 604 case 11: 605 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 606 case 10: 607 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 608 case 9: 609 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 610 case 8: 611 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 612 } 613 } 614 615 static void 616 lrc_setup_indirect_ctx(u32 *regs, 617 const struct intel_engine_cs *engine, 618 u32 ctx_bb_ggtt_addr, 619 u32 size) 620 { 621 GEM_BUG_ON(!size); 622 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 623 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 624 regs[lrc_ring_indirect_ptr(engine) + 1] = 625 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 626 627 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 628 regs[lrc_ring_indirect_offset(engine) + 1] = 629 lrc_ring_indirect_offset_default(engine) << 6; 630 } 631 632 static void init_common_regs(u32 * const regs, 633 const struct intel_context *ce, 634 const struct intel_engine_cs *engine, 635 bool inhibit) 636 { 637 u32 ctl; 638 639 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 640 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 641 if (inhibit) 642 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 643 if (INTEL_GEN(engine->i915) < 11) 644 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 645 CTX_CTRL_RS_CTX_ENABLE); 646 regs[CTX_CONTEXT_CONTROL] = ctl; 647 648 regs[CTX_TIMESTAMP] = ce->runtime.last; 649 } 650 651 static void init_wa_bb_regs(u32 * const regs, 652 const struct intel_engine_cs *engine) 653 { 654 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 655 656 if (wa_ctx->per_ctx.size) { 657 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 658 659 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 660 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 661 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 662 } 663 664 if (wa_ctx->indirect_ctx.size) { 665 lrc_setup_indirect_ctx(regs, engine, 666 i915_ggtt_offset(wa_ctx->vma) + 667 wa_ctx->indirect_ctx.offset, 668 wa_ctx->indirect_ctx.size); 669 } 670 } 671 672 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) 673 { 674 if (i915_vm_is_4lvl(&ppgtt->vm)) { 675 /* 64b PPGTT (48bit canonical) 676 * PDP0_DESCRIPTOR contains the base address to PML4 and 677 * other PDP Descriptors are ignored. 678 */ 679 ASSIGN_CTX_PML4(ppgtt, regs); 680 } else { 681 ASSIGN_CTX_PDP(ppgtt, regs, 3); 682 ASSIGN_CTX_PDP(ppgtt, regs, 2); 683 ASSIGN_CTX_PDP(ppgtt, regs, 1); 684 ASSIGN_CTX_PDP(ppgtt, regs, 0); 685 } 686 } 687 688 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 689 { 690 if (i915_is_ggtt(vm)) 691 return i915_vm_to_ggtt(vm)->alias; 692 else 693 return i915_vm_to_ppgtt(vm); 694 } 695 696 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 697 { 698 int x; 699 700 x = lrc_ring_mi_mode(engine); 701 if (x != -1) { 702 regs[x + 1] &= ~STOP_RING; 703 regs[x + 1] |= STOP_RING << 16; 704 } 705 } 706 707 static void __lrc_init_regs(u32 *regs, 708 const struct intel_context *ce, 709 const struct intel_engine_cs *engine, 710 bool inhibit) 711 { 712 /* 713 * A context is actually a big batch buffer with several 714 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 715 * values we are setting here are only for the first context restore: 716 * on a subsequent save, the GPU will recreate this batchbuffer with new 717 * values (including all the missing MI_LOAD_REGISTER_IMM commands that 718 * we are not initializing here). 719 * 720 * Must keep consistent with virtual_update_register_offsets(). 721 */ 722 723 if (inhibit) 724 memset(regs, 0, PAGE_SIZE); 725 726 set_offsets(regs, reg_offsets(engine), engine, inhibit); 727 728 init_common_regs(regs, ce, engine, inhibit); 729 init_ppgtt_regs(regs, vm_alias(ce->vm)); 730 731 init_wa_bb_regs(regs, engine); 732 733 __reset_stop_ring(regs, engine); 734 } 735 736 void lrc_init_regs(const struct intel_context *ce, 737 const struct intel_engine_cs *engine, 738 bool inhibit) 739 { 740 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); 741 } 742 743 void lrc_reset_regs(const struct intel_context *ce, 744 const struct intel_engine_cs *engine) 745 { 746 __reset_stop_ring(ce->lrc_reg_state, engine); 747 } 748 749 static void 750 set_redzone(void *vaddr, const struct intel_engine_cs *engine) 751 { 752 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 753 return; 754 755 vaddr += engine->context_size; 756 757 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 758 } 759 760 static void 761 check_redzone(const void *vaddr, const struct intel_engine_cs *engine) 762 { 763 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 764 return; 765 766 vaddr += engine->context_size; 767 768 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 769 drm_err_once(&engine->i915->drm, 770 "%s context redzone overwritten!\n", 771 engine->name); 772 } 773 774 void lrc_init_state(struct intel_context *ce, 775 struct intel_engine_cs *engine, 776 void *state) 777 { 778 bool inhibit = true; 779 780 set_redzone(state, engine); 781 782 if (engine->default_state) { 783 shmem_read(engine->default_state, 0, 784 state, engine->context_size); 785 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 786 inhibit = false; 787 } 788 789 /* Clear the ppHWSP (inc. per-context counters) */ 790 memset(state, 0, PAGE_SIZE); 791 792 /* 793 * The second page of the context object contains some registers which 794 * must be set up prior to the first execution. 795 */ 796 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); 797 } 798 799 static struct i915_vma * 800 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) 801 { 802 struct drm_i915_gem_object *obj; 803 struct i915_vma *vma; 804 u32 context_size; 805 806 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 807 808 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 809 context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 810 811 if (INTEL_GEN(engine->i915) == 12) { 812 ce->wa_bb_page = context_size / PAGE_SIZE; 813 context_size += PAGE_SIZE; 814 } 815 816 obj = i915_gem_object_create_shmem(engine->i915, context_size); 817 if (IS_ERR(obj)) 818 return ERR_CAST(obj); 819 820 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 821 if (IS_ERR(vma)) { 822 i915_gem_object_put(obj); 823 return vma; 824 } 825 826 return vma; 827 } 828 829 static struct intel_timeline * 830 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) 831 { 832 struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 833 834 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); 835 } 836 837 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) 838 { 839 struct intel_ring *ring; 840 struct i915_vma *vma; 841 int err; 842 843 GEM_BUG_ON(ce->state); 844 845 vma = __lrc_alloc_state(ce, engine); 846 if (IS_ERR(vma)) 847 return PTR_ERR(vma); 848 849 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); 850 if (IS_ERR(ring)) { 851 err = PTR_ERR(ring); 852 goto err_vma; 853 } 854 855 if (!page_mask_bits(ce->timeline)) { 856 struct intel_timeline *tl; 857 858 /* 859 * Use the static global HWSP for the kernel context, and 860 * a dynamically allocated cacheline for everyone else. 861 */ 862 if (unlikely(ce->timeline)) 863 tl = pinned_timeline(ce, engine); 864 else 865 tl = intel_timeline_create(engine->gt); 866 if (IS_ERR(tl)) { 867 err = PTR_ERR(tl); 868 goto err_ring; 869 } 870 871 ce->timeline = tl; 872 } 873 874 ce->ring = ring; 875 ce->state = vma; 876 877 return 0; 878 879 err_ring: 880 intel_ring_put(ring); 881 err_vma: 882 i915_vma_put(vma); 883 return err; 884 } 885 886 void lrc_reset(struct intel_context *ce) 887 { 888 CE_TRACE(ce, "reset\n"); 889 GEM_BUG_ON(!intel_context_is_pinned(ce)); 890 891 intel_ring_reset(ce->ring, ce->ring->emit); 892 893 /* Scrub away the garbage */ 894 lrc_init_regs(ce, ce->engine, true); 895 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); 896 } 897 898 int 899 lrc_pre_pin(struct intel_context *ce, 900 struct intel_engine_cs *engine, 901 struct i915_gem_ww_ctx *ww, 902 void **vaddr) 903 { 904 GEM_BUG_ON(!ce->state); 905 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 906 907 *vaddr = i915_gem_object_pin_map(ce->state->obj, 908 i915_coherent_map_type(ce->engine->i915) | 909 I915_MAP_OVERRIDE); 910 911 return PTR_ERR_OR_ZERO(*vaddr); 912 } 913 914 int 915 lrc_pin(struct intel_context *ce, 916 struct intel_engine_cs *engine, 917 void *vaddr) 918 { 919 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 920 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); 921 return 0; 922 } 923 924 void lrc_unpin(struct intel_context *ce) 925 { 926 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 927 ce->engine); 928 } 929 930 void lrc_post_unpin(struct intel_context *ce) 931 { 932 i915_gem_object_unpin_map(ce->state->obj); 933 } 934 935 void lrc_fini(struct intel_context *ce) 936 { 937 if (!ce->state) 938 return; 939 940 intel_ring_put(fetch_and_zero(&ce->ring)); 941 i915_vma_put(fetch_and_zero(&ce->state)); 942 } 943 944 void lrc_destroy(struct kref *kref) 945 { 946 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 947 948 GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 949 GEM_BUG_ON(intel_context_is_pinned(ce)); 950 951 lrc_fini(ce); 952 953 intel_context_fini(ce); 954 intel_context_free(ce); 955 } 956 957 static u32 * 958 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 959 { 960 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 961 MI_SRM_LRM_GLOBAL_GTT | 962 MI_LRI_LRM_CS_MMIO; 963 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 964 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 965 CTX_TIMESTAMP * sizeof(u32); 966 *cs++ = 0; 967 968 *cs++ = MI_LOAD_REGISTER_REG | 969 MI_LRR_SOURCE_CS_MMIO | 970 MI_LRI_LRM_CS_MMIO; 971 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 972 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 973 974 *cs++ = MI_LOAD_REGISTER_REG | 975 MI_LRR_SOURCE_CS_MMIO | 976 MI_LRI_LRM_CS_MMIO; 977 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 978 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 979 980 return cs; 981 } 982 983 static u32 * 984 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 985 { 986 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 987 988 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 989 MI_SRM_LRM_GLOBAL_GTT | 990 MI_LRI_LRM_CS_MMIO; 991 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 992 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 993 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 994 *cs++ = 0; 995 996 return cs; 997 } 998 999 static u32 * 1000 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 1001 { 1002 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 1003 1004 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1005 MI_SRM_LRM_GLOBAL_GTT | 1006 MI_LRI_LRM_CS_MMIO; 1007 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1008 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1009 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 1010 *cs++ = 0; 1011 1012 *cs++ = MI_LOAD_REGISTER_REG | 1013 MI_LRR_SOURCE_CS_MMIO | 1014 MI_LRI_LRM_CS_MMIO; 1015 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1016 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 1017 1018 return cs; 1019 } 1020 1021 static u32 * 1022 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 1023 { 1024 cs = gen12_emit_timestamp_wa(ce, cs); 1025 cs = gen12_emit_cmd_buf_wa(ce, cs); 1026 cs = gen12_emit_restore_scratch(ce, cs); 1027 1028 return cs; 1029 } 1030 1031 static u32 * 1032 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 1033 { 1034 cs = gen12_emit_timestamp_wa(ce, cs); 1035 cs = gen12_emit_restore_scratch(ce, cs); 1036 1037 return cs; 1038 } 1039 1040 static inline u32 context_wa_bb_offset(const struct intel_context *ce) 1041 { 1042 return PAGE_SIZE * ce->wa_bb_page; 1043 } 1044 1045 static u32 *context_indirect_bb(const struct intel_context *ce) 1046 { 1047 void *ptr; 1048 1049 GEM_BUG_ON(!ce->wa_bb_page); 1050 1051 ptr = ce->lrc_reg_state; 1052 ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 1053 ptr += context_wa_bb_offset(ce); 1054 1055 return ptr; 1056 } 1057 1058 static void 1059 setup_indirect_ctx_bb(const struct intel_context *ce, 1060 const struct intel_engine_cs *engine, 1061 u32 *(*emit)(const struct intel_context *, u32 *)) 1062 { 1063 u32 * const start = context_indirect_bb(ce); 1064 u32 *cs; 1065 1066 cs = emit(ce, start); 1067 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 1068 while ((unsigned long)cs % CACHELINE_BYTES) 1069 *cs++ = MI_NOOP; 1070 1071 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, 1072 i915_ggtt_offset(ce->state) + 1073 context_wa_bb_offset(ce), 1074 (cs - start) * sizeof(*cs)); 1075 } 1076 1077 /* 1078 * The context descriptor encodes various attributes of a context, 1079 * including its GTT address and some flags. Because it's fairly 1080 * expensive to calculate, we'll just do it once and cache the result, 1081 * which remains valid until the context is unpinned. 1082 * 1083 * This is what a descriptor looks like, from LSB to MSB:: 1084 * 1085 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 1086 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 1087 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 1088 * bits 53-54: mbz, reserved for use by hardware 1089 * bits 55-63: group ID, currently unused and set to 0 1090 * 1091 * Starting from Gen11, the upper dword of the descriptor has a new format: 1092 * 1093 * bits 32-36: reserved 1094 * bits 37-47: SW context ID 1095 * bits 48:53: engine instance 1096 * bit 54: mbz, reserved for use by hardware 1097 * bits 55-60: SW counter 1098 * bits 61-63: engine class 1099 * 1100 * engine info, SW context ID and SW counter need to form a unique number 1101 * (Context ID) per lrc. 1102 */ 1103 static inline u32 lrc_descriptor(const struct intel_context *ce) 1104 { 1105 u32 desc; 1106 1107 desc = INTEL_LEGACY_32B_CONTEXT; 1108 if (i915_vm_is_4lvl(ce->vm)) 1109 desc = INTEL_LEGACY_64B_CONTEXT; 1110 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 1111 1112 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 1113 if (IS_GEN(ce->vm->i915, 8)) 1114 desc |= GEN8_CTX_L3LLC_COHERENT; 1115 1116 return i915_ggtt_offset(ce->state) | desc; 1117 } 1118 1119 u32 lrc_update_regs(const struct intel_context *ce, 1120 const struct intel_engine_cs *engine, 1121 u32 head) 1122 { 1123 struct intel_ring *ring = ce->ring; 1124 u32 *regs = ce->lrc_reg_state; 1125 1126 GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 1127 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 1128 1129 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1130 regs[CTX_RING_HEAD] = head; 1131 regs[CTX_RING_TAIL] = ring->tail; 1132 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1133 1134 /* RPCS */ 1135 if (engine->class == RENDER_CLASS) { 1136 regs[CTX_R_PWR_CLK_STATE] = 1137 intel_sseu_make_rpcs(engine->gt, &ce->sseu); 1138 1139 i915_oa_init_reg_state(ce, engine); 1140 } 1141 1142 if (ce->wa_bb_page) { 1143 u32 *(*fn)(const struct intel_context *ce, u32 *cs); 1144 1145 fn = gen12_emit_indirect_ctx_xcs; 1146 if (ce->engine->class == RENDER_CLASS) 1147 fn = gen12_emit_indirect_ctx_rcs; 1148 1149 /* Mutually exclusive wrt to global indirect bb */ 1150 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 1151 setup_indirect_ctx_bb(ce, engine, fn); 1152 } 1153 1154 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; 1155 } 1156 1157 void lrc_update_offsets(struct intel_context *ce, 1158 struct intel_engine_cs *engine) 1159 { 1160 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); 1161 } 1162 1163 void lrc_check_regs(const struct intel_context *ce, 1164 const struct intel_engine_cs *engine, 1165 const char *when) 1166 { 1167 const struct intel_ring *ring = ce->ring; 1168 u32 *regs = ce->lrc_reg_state; 1169 bool valid = true; 1170 int x; 1171 1172 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 1173 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 1174 engine->name, 1175 regs[CTX_RING_START], 1176 i915_ggtt_offset(ring->vma)); 1177 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1178 valid = false; 1179 } 1180 1181 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 1182 (RING_CTL_SIZE(ring->size) | RING_VALID)) { 1183 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 1184 engine->name, 1185 regs[CTX_RING_CTL], 1186 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 1187 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1188 valid = false; 1189 } 1190 1191 x = lrc_ring_mi_mode(engine); 1192 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 1193 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 1194 engine->name, regs[x + 1]); 1195 regs[x + 1] &= ~STOP_RING; 1196 regs[x + 1] |= STOP_RING << 16; 1197 valid = false; 1198 } 1199 1200 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); 1201 } 1202 1203 /* 1204 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 1205 * PIPE_CONTROL instruction. This is required for the flush to happen correctly 1206 * but there is a slight complication as this is applied in WA batch where the 1207 * values are only initialized once so we cannot take register value at the 1208 * beginning and reuse it further; hence we save its value to memory, upload a 1209 * constant value with bit21 set and then we restore it back with the saved value. 1210 * To simplify the WA, a constant value is formed by using the default value 1211 * of this register. This shouldn't be a problem because we are only modifying 1212 * it for a short period and this batch in non-premptible. We can ofcourse 1213 * use additional instructions that read the actual value of the register 1214 * at that time and set our bit of interest but it makes the WA complicated. 1215 * 1216 * This WA is also required for Gen9 so extracting as a function avoids 1217 * code duplication. 1218 */ 1219 static u32 * 1220 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1221 { 1222 /* NB no one else is allowed to scribble over scratch + 256! */ 1223 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1224 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1225 *batch++ = intel_gt_scratch_offset(engine->gt, 1226 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1227 *batch++ = 0; 1228 1229 *batch++ = MI_LOAD_REGISTER_IMM(1); 1230 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1231 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 1232 1233 batch = gen8_emit_pipe_control(batch, 1234 PIPE_CONTROL_CS_STALL | 1235 PIPE_CONTROL_DC_FLUSH_ENABLE, 1236 0); 1237 1238 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1239 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1240 *batch++ = intel_gt_scratch_offset(engine->gt, 1241 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1242 *batch++ = 0; 1243 1244 return batch; 1245 } 1246 1247 /* 1248 * Typically we only have one indirect_ctx and per_ctx batch buffer which are 1249 * initialized at the beginning and shared across all contexts but this field 1250 * helps us to have multiple batches at different offsets and select them based 1251 * on a criteria. At the moment this batch always start at the beginning of the page 1252 * and at this point we don't have multiple wa_ctx batch buffers. 1253 * 1254 * The number of WA applied are not known at the beginning; we use this field 1255 * to return the no of DWORDS written. 1256 * 1257 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 1258 * so it adds NOOPs as padding to make it cacheline aligned. 1259 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 1260 * makes a complete batch buffer. 1261 */ 1262 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1263 { 1264 /* WaDisableCtxRestoreArbitration:bdw,chv */ 1265 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1266 1267 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 1268 if (IS_BROADWELL(engine->i915)) 1269 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1270 1271 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 1272 /* Actual scratch location is at 128 bytes offset */ 1273 batch = gen8_emit_pipe_control(batch, 1274 PIPE_CONTROL_FLUSH_L3 | 1275 PIPE_CONTROL_STORE_DATA_INDEX | 1276 PIPE_CONTROL_CS_STALL | 1277 PIPE_CONTROL_QW_WRITE, 1278 LRC_PPHWSP_SCRATCH_ADDR); 1279 1280 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1281 1282 /* Pad to end of cacheline */ 1283 while ((unsigned long)batch % CACHELINE_BYTES) 1284 *batch++ = MI_NOOP; 1285 1286 /* 1287 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 1288 * execution depends on the length specified in terms of cache lines 1289 * in the register CTX_RCS_INDIRECT_CTX 1290 */ 1291 1292 return batch; 1293 } 1294 1295 struct lri { 1296 i915_reg_t reg; 1297 u32 value; 1298 }; 1299 1300 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 1301 { 1302 GEM_BUG_ON(!count || count > 63); 1303 1304 *batch++ = MI_LOAD_REGISTER_IMM(count); 1305 do { 1306 *batch++ = i915_mmio_reg_offset(lri->reg); 1307 *batch++ = lri->value; 1308 } while (lri++, --count); 1309 *batch++ = MI_NOOP; 1310 1311 return batch; 1312 } 1313 1314 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1315 { 1316 static const struct lri lri[] = { 1317 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 1318 { 1319 COMMON_SLICE_CHICKEN2, 1320 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 1321 0), 1322 }, 1323 1324 /* BSpec: 11391 */ 1325 { 1326 FF_SLICE_CHICKEN, 1327 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 1328 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 1329 }, 1330 1331 /* BSpec: 11299 */ 1332 { 1333 _3D_CHICKEN3, 1334 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 1335 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 1336 } 1337 }; 1338 1339 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1340 1341 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1342 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1343 1344 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 1345 batch = gen8_emit_pipe_control(batch, 1346 PIPE_CONTROL_FLUSH_L3 | 1347 PIPE_CONTROL_STORE_DATA_INDEX | 1348 PIPE_CONTROL_CS_STALL | 1349 PIPE_CONTROL_QW_WRITE, 1350 LRC_PPHWSP_SCRATCH_ADDR); 1351 1352 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 1353 1354 /* WaMediaPoolStateCmdInWABB:bxt,glk */ 1355 if (HAS_POOLED_EU(engine->i915)) { 1356 /* 1357 * EU pool configuration is setup along with golden context 1358 * during context initialization. This value depends on 1359 * device type (2x6 or 3x6) and needs to be updated based 1360 * on which subslice is disabled especially for 2x6 1361 * devices, however it is safe to load default 1362 * configuration of 3x6 device instead of masking off 1363 * corresponding bits because HW ignores bits of a disabled 1364 * subslice and drops down to appropriate config. Please 1365 * see render_state_setup() in i915_gem_render_state.c for 1366 * possible configurations, to avoid duplication they are 1367 * not shown here again. 1368 */ 1369 *batch++ = GEN9_MEDIA_POOL_STATE; 1370 *batch++ = GEN9_MEDIA_POOL_ENABLE; 1371 *batch++ = 0x00777000; 1372 *batch++ = 0; 1373 *batch++ = 0; 1374 *batch++ = 0; 1375 } 1376 1377 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1378 1379 /* Pad to end of cacheline */ 1380 while ((unsigned long)batch % CACHELINE_BYTES) 1381 *batch++ = MI_NOOP; 1382 1383 return batch; 1384 } 1385 1386 static u32 * 1387 gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1388 { 1389 int i; 1390 1391 /* 1392 * WaPipeControlBefore3DStateSamplePattern: cnl 1393 * 1394 * Ensure the engine is idle prior to programming a 1395 * 3DSTATE_SAMPLE_PATTERN during a context restore. 1396 */ 1397 batch = gen8_emit_pipe_control(batch, 1398 PIPE_CONTROL_CS_STALL, 1399 0); 1400 /* 1401 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for 1402 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in 1403 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is 1404 * confusing. Since gen8_emit_pipe_control() already advances the 1405 * batch by 6 dwords, we advance the other 10 here, completing a 1406 * cacheline. It's not clear if the workaround requires this padding 1407 * before other commands, or if it's just the regular padding we would 1408 * already have for the workaround bb, so leave it here for now. 1409 */ 1410 for (i = 0; i < 10; i++) 1411 *batch++ = MI_NOOP; 1412 1413 /* Pad to end of cacheline */ 1414 while ((unsigned long)batch % CACHELINE_BYTES) 1415 *batch++ = MI_NOOP; 1416 1417 return batch; 1418 } 1419 1420 #define CTX_WA_BB_SIZE (PAGE_SIZE) 1421 1422 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) 1423 { 1424 struct drm_i915_gem_object *obj; 1425 struct i915_vma *vma; 1426 int err; 1427 1428 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); 1429 if (IS_ERR(obj)) 1430 return PTR_ERR(obj); 1431 1432 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 1433 if (IS_ERR(vma)) { 1434 err = PTR_ERR(vma); 1435 goto err; 1436 } 1437 1438 err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); 1439 if (err) 1440 goto err; 1441 1442 engine->wa_ctx.vma = vma; 1443 return 0; 1444 1445 err: 1446 i915_gem_object_put(obj); 1447 return err; 1448 } 1449 1450 void lrc_fini_wa_ctx(struct intel_engine_cs *engine) 1451 { 1452 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1453 } 1454 1455 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 1456 1457 int lrc_init_wa_ctx(struct intel_engine_cs *engine) 1458 { 1459 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1460 struct i915_wa_ctx_bb *wa_bb[] = { 1461 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx 1462 }; 1463 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; 1464 void *batch, *batch_ptr; 1465 unsigned int i; 1466 int ret; 1467 1468 if (engine->class != RENDER_CLASS) 1469 return 0; 1470 1471 switch (INTEL_GEN(engine->i915)) { 1472 case 12: 1473 case 11: 1474 return 0; 1475 case 10: 1476 wa_bb_fn[0] = gen10_init_indirectctx_bb; 1477 wa_bb_fn[1] = NULL; 1478 break; 1479 case 9: 1480 wa_bb_fn[0] = gen9_init_indirectctx_bb; 1481 wa_bb_fn[1] = NULL; 1482 break; 1483 case 8: 1484 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1485 wa_bb_fn[1] = NULL; 1486 break; 1487 default: 1488 MISSING_CASE(INTEL_GEN(engine->i915)); 1489 return 0; 1490 } 1491 1492 ret = lrc_setup_wa_ctx(engine); 1493 if (ret) { 1494 drm_dbg(&engine->i915->drm, 1495 "Failed to setup context WA page: %d\n", ret); 1496 return ret; 1497 } 1498 1499 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 1500 1501 /* 1502 * Emit the two workaround batch buffers, recording the offset from the 1503 * start of the workaround batch buffer object for each and their 1504 * respective sizes. 1505 */ 1506 batch_ptr = batch; 1507 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 1508 wa_bb[i]->offset = batch_ptr - batch; 1509 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 1510 CACHELINE_BYTES))) { 1511 ret = -EINVAL; 1512 break; 1513 } 1514 if (wa_bb_fn[i]) 1515 batch_ptr = wa_bb_fn[i](engine, batch_ptr); 1516 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 1517 } 1518 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); 1519 1520 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 1521 __i915_gem_object_release_map(wa_ctx->vma->obj); 1522 if (ret) 1523 lrc_fini_wa_ctx(engine); 1524 1525 return ret; 1526 } 1527 1528 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 1529 { 1530 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1531 ce->runtime.num_underflow++; 1532 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 1533 #endif 1534 } 1535 1536 void lrc_update_runtime(struct intel_context *ce) 1537 { 1538 u32 old; 1539 s32 dt; 1540 1541 if (intel_context_is_barrier(ce)) 1542 return; 1543 1544 old = ce->runtime.last; 1545 ce->runtime.last = lrc_get_runtime(ce); 1546 dt = ce->runtime.last - old; 1547 1548 if (unlikely(dt < 0)) { 1549 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 1550 old, ce->runtime.last, dt); 1551 st_update_runtime_underflow(ce, dt); 1552 return; 1553 } 1554 1555 ewma_runtime_add(&ce->runtime.avg, dt); 1556 ce->runtime.total += dt; 1557 } 1558 1559 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1560 #include "selftest_lrc.c" 1561 #endif 1562