1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gem/i915_gem_lmem.h" 7 8 #include "gen8_engine_cs.h" 9 #include "i915_drv.h" 10 #include "i915_perf.h" 11 #include "intel_engine.h" 12 #include "intel_gpu_commands.h" 13 #include "intel_gt.h" 14 #include "intel_lrc.h" 15 #include "intel_lrc_reg.h" 16 #include "intel_ring.h" 17 #include "shmem_utils.h" 18 19 static void set_offsets(u32 *regs, 20 const u8 *data, 21 const struct intel_engine_cs *engine, 22 bool close) 23 #define NOP(x) (BIT(7) | (x)) 24 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 25 #define POSTED BIT(0) 26 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 27 #define REG16(x) \ 28 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 29 (((x) >> 2) & 0x7f) 30 #define END 0 31 { 32 const u32 base = engine->mmio_base; 33 34 while (*data) { 35 u8 count, flags; 36 37 if (*data & BIT(7)) { /* skip */ 38 count = *data++ & ~BIT(7); 39 regs += count; 40 continue; 41 } 42 43 count = *data & 0x3f; 44 flags = *data >> 6; 45 data++; 46 47 *regs = MI_LOAD_REGISTER_IMM(count); 48 if (flags & POSTED) 49 *regs |= MI_LRI_FORCE_POSTED; 50 if (GRAPHICS_VER(engine->i915) >= 11) 51 *regs |= MI_LRI_LRM_CS_MMIO; 52 regs++; 53 54 GEM_BUG_ON(!count); 55 do { 56 u32 offset = 0; 57 u8 v; 58 59 do { 60 v = *data++; 61 offset <<= 7; 62 offset |= v & ~BIT(7); 63 } while (v & BIT(7)); 64 65 regs[0] = base + (offset << 2); 66 regs += 2; 67 } while (--count); 68 } 69 70 if (close) { 71 /* Close the batch; used mainly by live_lrc_layout() */ 72 *regs = MI_BATCH_BUFFER_END; 73 if (GRAPHICS_VER(engine->i915) >= 11) 74 *regs |= BIT(0); 75 } 76 } 77 78 static const u8 gen8_xcs_offsets[] = { 79 NOP(1), 80 LRI(11, 0), 81 REG16(0x244), 82 REG(0x034), 83 REG(0x030), 84 REG(0x038), 85 REG(0x03c), 86 REG(0x168), 87 REG(0x140), 88 REG(0x110), 89 REG(0x11c), 90 REG(0x114), 91 REG(0x118), 92 93 NOP(9), 94 LRI(9, 0), 95 REG16(0x3a8), 96 REG16(0x28c), 97 REG16(0x288), 98 REG16(0x284), 99 REG16(0x280), 100 REG16(0x27c), 101 REG16(0x278), 102 REG16(0x274), 103 REG16(0x270), 104 105 NOP(13), 106 LRI(2, 0), 107 REG16(0x200), 108 REG(0x028), 109 110 END 111 }; 112 113 static const u8 gen9_xcs_offsets[] = { 114 NOP(1), 115 LRI(14, POSTED), 116 REG16(0x244), 117 REG(0x034), 118 REG(0x030), 119 REG(0x038), 120 REG(0x03c), 121 REG(0x168), 122 REG(0x140), 123 REG(0x110), 124 REG(0x11c), 125 REG(0x114), 126 REG(0x118), 127 REG(0x1c0), 128 REG(0x1c4), 129 REG(0x1c8), 130 131 NOP(3), 132 LRI(9, POSTED), 133 REG16(0x3a8), 134 REG16(0x28c), 135 REG16(0x288), 136 REG16(0x284), 137 REG16(0x280), 138 REG16(0x27c), 139 REG16(0x278), 140 REG16(0x274), 141 REG16(0x270), 142 143 NOP(13), 144 LRI(1, POSTED), 145 REG16(0x200), 146 147 NOP(13), 148 LRI(44, POSTED), 149 REG(0x028), 150 REG(0x09c), 151 REG(0x0c0), 152 REG(0x178), 153 REG(0x17c), 154 REG16(0x358), 155 REG(0x170), 156 REG(0x150), 157 REG(0x154), 158 REG(0x158), 159 REG16(0x41c), 160 REG16(0x600), 161 REG16(0x604), 162 REG16(0x608), 163 REG16(0x60c), 164 REG16(0x610), 165 REG16(0x614), 166 REG16(0x618), 167 REG16(0x61c), 168 REG16(0x620), 169 REG16(0x624), 170 REG16(0x628), 171 REG16(0x62c), 172 REG16(0x630), 173 REG16(0x634), 174 REG16(0x638), 175 REG16(0x63c), 176 REG16(0x640), 177 REG16(0x644), 178 REG16(0x648), 179 REG16(0x64c), 180 REG16(0x650), 181 REG16(0x654), 182 REG16(0x658), 183 REG16(0x65c), 184 REG16(0x660), 185 REG16(0x664), 186 REG16(0x668), 187 REG16(0x66c), 188 REG16(0x670), 189 REG16(0x674), 190 REG16(0x678), 191 REG16(0x67c), 192 REG(0x068), 193 194 END 195 }; 196 197 static const u8 gen12_xcs_offsets[] = { 198 NOP(1), 199 LRI(13, POSTED), 200 REG16(0x244), 201 REG(0x034), 202 REG(0x030), 203 REG(0x038), 204 REG(0x03c), 205 REG(0x168), 206 REG(0x140), 207 REG(0x110), 208 REG(0x1c0), 209 REG(0x1c4), 210 REG(0x1c8), 211 REG(0x180), 212 REG16(0x2b4), 213 214 NOP(5), 215 LRI(9, POSTED), 216 REG16(0x3a8), 217 REG16(0x28c), 218 REG16(0x288), 219 REG16(0x284), 220 REG16(0x280), 221 REG16(0x27c), 222 REG16(0x278), 223 REG16(0x274), 224 REG16(0x270), 225 226 END 227 }; 228 229 static const u8 dg2_xcs_offsets[] = { 230 NOP(1), 231 LRI(15, POSTED), 232 REG16(0x244), 233 REG(0x034), 234 REG(0x030), 235 REG(0x038), 236 REG(0x03c), 237 REG(0x168), 238 REG(0x140), 239 REG(0x110), 240 REG(0x1c0), 241 REG(0x1c4), 242 REG(0x1c8), 243 REG(0x180), 244 REG16(0x2b4), 245 REG(0x120), 246 REG(0x124), 247 248 NOP(1), 249 LRI(9, POSTED), 250 REG16(0x3a8), 251 REG16(0x28c), 252 REG16(0x288), 253 REG16(0x284), 254 REG16(0x280), 255 REG16(0x27c), 256 REG16(0x278), 257 REG16(0x274), 258 REG16(0x270), 259 260 END 261 }; 262 263 static const u8 gen8_rcs_offsets[] = { 264 NOP(1), 265 LRI(14, POSTED), 266 REG16(0x244), 267 REG(0x034), 268 REG(0x030), 269 REG(0x038), 270 REG(0x03c), 271 REG(0x168), 272 REG(0x140), 273 REG(0x110), 274 REG(0x11c), 275 REG(0x114), 276 REG(0x118), 277 REG(0x1c0), 278 REG(0x1c4), 279 REG(0x1c8), 280 281 NOP(3), 282 LRI(9, POSTED), 283 REG16(0x3a8), 284 REG16(0x28c), 285 REG16(0x288), 286 REG16(0x284), 287 REG16(0x280), 288 REG16(0x27c), 289 REG16(0x278), 290 REG16(0x274), 291 REG16(0x270), 292 293 NOP(13), 294 LRI(1, 0), 295 REG(0x0c8), 296 297 END 298 }; 299 300 static const u8 gen9_rcs_offsets[] = { 301 NOP(1), 302 LRI(14, POSTED), 303 REG16(0x244), 304 REG(0x34), 305 REG(0x30), 306 REG(0x38), 307 REG(0x3c), 308 REG(0x168), 309 REG(0x140), 310 REG(0x110), 311 REG(0x11c), 312 REG(0x114), 313 REG(0x118), 314 REG(0x1c0), 315 REG(0x1c4), 316 REG(0x1c8), 317 318 NOP(3), 319 LRI(9, POSTED), 320 REG16(0x3a8), 321 REG16(0x28c), 322 REG16(0x288), 323 REG16(0x284), 324 REG16(0x280), 325 REG16(0x27c), 326 REG16(0x278), 327 REG16(0x274), 328 REG16(0x270), 329 330 NOP(13), 331 LRI(1, 0), 332 REG(0xc8), 333 334 NOP(13), 335 LRI(44, POSTED), 336 REG(0x28), 337 REG(0x9c), 338 REG(0xc0), 339 REG(0x178), 340 REG(0x17c), 341 REG16(0x358), 342 REG(0x170), 343 REG(0x150), 344 REG(0x154), 345 REG(0x158), 346 REG16(0x41c), 347 REG16(0x600), 348 REG16(0x604), 349 REG16(0x608), 350 REG16(0x60c), 351 REG16(0x610), 352 REG16(0x614), 353 REG16(0x618), 354 REG16(0x61c), 355 REG16(0x620), 356 REG16(0x624), 357 REG16(0x628), 358 REG16(0x62c), 359 REG16(0x630), 360 REG16(0x634), 361 REG16(0x638), 362 REG16(0x63c), 363 REG16(0x640), 364 REG16(0x644), 365 REG16(0x648), 366 REG16(0x64c), 367 REG16(0x650), 368 REG16(0x654), 369 REG16(0x658), 370 REG16(0x65c), 371 REG16(0x660), 372 REG16(0x664), 373 REG16(0x668), 374 REG16(0x66c), 375 REG16(0x670), 376 REG16(0x674), 377 REG16(0x678), 378 REG16(0x67c), 379 REG(0x68), 380 381 END 382 }; 383 384 static const u8 gen11_rcs_offsets[] = { 385 NOP(1), 386 LRI(15, POSTED), 387 REG16(0x244), 388 REG(0x034), 389 REG(0x030), 390 REG(0x038), 391 REG(0x03c), 392 REG(0x168), 393 REG(0x140), 394 REG(0x110), 395 REG(0x11c), 396 REG(0x114), 397 REG(0x118), 398 REG(0x1c0), 399 REG(0x1c4), 400 REG(0x1c8), 401 REG(0x180), 402 403 NOP(1), 404 LRI(9, POSTED), 405 REG16(0x3a8), 406 REG16(0x28c), 407 REG16(0x288), 408 REG16(0x284), 409 REG16(0x280), 410 REG16(0x27c), 411 REG16(0x278), 412 REG16(0x274), 413 REG16(0x270), 414 415 LRI(1, POSTED), 416 REG(0x1b0), 417 418 NOP(10), 419 LRI(1, 0), 420 REG(0x0c8), 421 422 END 423 }; 424 425 static const u8 gen12_rcs_offsets[] = { 426 NOP(1), 427 LRI(13, POSTED), 428 REG16(0x244), 429 REG(0x034), 430 REG(0x030), 431 REG(0x038), 432 REG(0x03c), 433 REG(0x168), 434 REG(0x140), 435 REG(0x110), 436 REG(0x1c0), 437 REG(0x1c4), 438 REG(0x1c8), 439 REG(0x180), 440 REG16(0x2b4), 441 442 NOP(5), 443 LRI(9, POSTED), 444 REG16(0x3a8), 445 REG16(0x28c), 446 REG16(0x288), 447 REG16(0x284), 448 REG16(0x280), 449 REG16(0x27c), 450 REG16(0x278), 451 REG16(0x274), 452 REG16(0x270), 453 454 LRI(3, POSTED), 455 REG(0x1b0), 456 REG16(0x5a8), 457 REG16(0x5ac), 458 459 NOP(6), 460 LRI(1, 0), 461 REG(0x0c8), 462 NOP(3 + 9 + 1), 463 464 LRI(51, POSTED), 465 REG16(0x588), 466 REG16(0x588), 467 REG16(0x588), 468 REG16(0x588), 469 REG16(0x588), 470 REG16(0x588), 471 REG(0x028), 472 REG(0x09c), 473 REG(0x0c0), 474 REG(0x178), 475 REG(0x17c), 476 REG16(0x358), 477 REG(0x170), 478 REG(0x150), 479 REG(0x154), 480 REG(0x158), 481 REG16(0x41c), 482 REG16(0x600), 483 REG16(0x604), 484 REG16(0x608), 485 REG16(0x60c), 486 REG16(0x610), 487 REG16(0x614), 488 REG16(0x618), 489 REG16(0x61c), 490 REG16(0x620), 491 REG16(0x624), 492 REG16(0x628), 493 REG16(0x62c), 494 REG16(0x630), 495 REG16(0x634), 496 REG16(0x638), 497 REG16(0x63c), 498 REG16(0x640), 499 REG16(0x644), 500 REG16(0x648), 501 REG16(0x64c), 502 REG16(0x650), 503 REG16(0x654), 504 REG16(0x658), 505 REG16(0x65c), 506 REG16(0x660), 507 REG16(0x664), 508 REG16(0x668), 509 REG16(0x66c), 510 REG16(0x670), 511 REG16(0x674), 512 REG16(0x678), 513 REG16(0x67c), 514 REG(0x068), 515 REG(0x084), 516 NOP(1), 517 518 END 519 }; 520 521 static const u8 xehp_rcs_offsets[] = { 522 NOP(1), 523 LRI(13, POSTED), 524 REG16(0x244), 525 REG(0x034), 526 REG(0x030), 527 REG(0x038), 528 REG(0x03c), 529 REG(0x168), 530 REG(0x140), 531 REG(0x110), 532 REG(0x1c0), 533 REG(0x1c4), 534 REG(0x1c8), 535 REG(0x180), 536 REG16(0x2b4), 537 538 NOP(5), 539 LRI(9, POSTED), 540 REG16(0x3a8), 541 REG16(0x28c), 542 REG16(0x288), 543 REG16(0x284), 544 REG16(0x280), 545 REG16(0x27c), 546 REG16(0x278), 547 REG16(0x274), 548 REG16(0x270), 549 550 LRI(3, POSTED), 551 REG(0x1b0), 552 REG16(0x5a8), 553 REG16(0x5ac), 554 555 NOP(6), 556 LRI(1, 0), 557 REG(0x0c8), 558 559 END 560 }; 561 562 static const u8 dg2_rcs_offsets[] = { 563 NOP(1), 564 LRI(15, POSTED), 565 REG16(0x244), 566 REG(0x034), 567 REG(0x030), 568 REG(0x038), 569 REG(0x03c), 570 REG(0x168), 571 REG(0x140), 572 REG(0x110), 573 REG(0x1c0), 574 REG(0x1c4), 575 REG(0x1c8), 576 REG(0x180), 577 REG16(0x2b4), 578 REG(0x120), 579 REG(0x124), 580 581 NOP(1), 582 LRI(9, POSTED), 583 REG16(0x3a8), 584 REG16(0x28c), 585 REG16(0x288), 586 REG16(0x284), 587 REG16(0x280), 588 REG16(0x27c), 589 REG16(0x278), 590 REG16(0x274), 591 REG16(0x270), 592 593 LRI(3, POSTED), 594 REG(0x1b0), 595 REG16(0x5a8), 596 REG16(0x5ac), 597 598 NOP(6), 599 LRI(1, 0), 600 REG(0x0c8), 601 602 END 603 }; 604 605 #undef END 606 #undef REG16 607 #undef REG 608 #undef LRI 609 #undef NOP 610 611 static const u8 *reg_offsets(const struct intel_engine_cs *engine) 612 { 613 /* 614 * The gen12+ lists only have the registers we program in the basic 615 * default state. We rely on the context image using relative 616 * addressing to automatic fixup the register state between the 617 * physical engines for virtual engine. 618 */ 619 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && 620 !intel_engine_has_relative_mmio(engine)); 621 622 if (engine->class == RENDER_CLASS) { 623 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) 624 return dg2_rcs_offsets; 625 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 626 return xehp_rcs_offsets; 627 else if (GRAPHICS_VER(engine->i915) >= 12) 628 return gen12_rcs_offsets; 629 else if (GRAPHICS_VER(engine->i915) >= 11) 630 return gen11_rcs_offsets; 631 else if (GRAPHICS_VER(engine->i915) >= 9) 632 return gen9_rcs_offsets; 633 else 634 return gen8_rcs_offsets; 635 } else { 636 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) 637 return dg2_xcs_offsets; 638 else if (GRAPHICS_VER(engine->i915) >= 12) 639 return gen12_xcs_offsets; 640 else if (GRAPHICS_VER(engine->i915) >= 9) 641 return gen9_xcs_offsets; 642 else 643 return gen8_xcs_offsets; 644 } 645 } 646 647 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 648 { 649 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 650 return 0x70; 651 else if (GRAPHICS_VER(engine->i915) >= 12) 652 return 0x60; 653 else if (GRAPHICS_VER(engine->i915) >= 9) 654 return 0x54; 655 else if (engine->class == RENDER_CLASS) 656 return 0x58; 657 else 658 return -1; 659 } 660 661 static int lrc_ring_gpr0(const struct intel_engine_cs *engine) 662 { 663 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 664 return 0x84; 665 else if (GRAPHICS_VER(engine->i915) >= 12) 666 return 0x74; 667 else if (GRAPHICS_VER(engine->i915) >= 9) 668 return 0x68; 669 else if (engine->class == RENDER_CLASS) 670 return 0xd8; 671 else 672 return -1; 673 } 674 675 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 676 { 677 if (GRAPHICS_VER(engine->i915) >= 12) 678 return 0x12; 679 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS) 680 return 0x18; 681 else 682 return -1; 683 } 684 685 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 686 { 687 int x; 688 689 x = lrc_ring_wa_bb_per_ctx(engine); 690 if (x < 0) 691 return x; 692 693 return x + 2; 694 } 695 696 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 697 { 698 int x; 699 700 x = lrc_ring_indirect_ptr(engine); 701 if (x < 0) 702 return x; 703 704 return x + 2; 705 } 706 707 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 708 { 709 710 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 711 /* 712 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL 713 * simply to match the RCS context image layout. 714 */ 715 return 0xc6; 716 else if (engine->class != RENDER_CLASS) 717 return -1; 718 else if (GRAPHICS_VER(engine->i915) >= 12) 719 return 0xb6; 720 else if (GRAPHICS_VER(engine->i915) >= 11) 721 return 0xaa; 722 else 723 return -1; 724 } 725 726 static u32 727 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 728 { 729 switch (GRAPHICS_VER(engine->i915)) { 730 default: 731 MISSING_CASE(GRAPHICS_VER(engine->i915)); 732 fallthrough; 733 case 12: 734 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 735 case 11: 736 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 737 case 9: 738 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 739 case 8: 740 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 741 } 742 } 743 744 static void 745 lrc_setup_indirect_ctx(u32 *regs, 746 const struct intel_engine_cs *engine, 747 u32 ctx_bb_ggtt_addr, 748 u32 size) 749 { 750 GEM_BUG_ON(!size); 751 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 752 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 753 regs[lrc_ring_indirect_ptr(engine) + 1] = 754 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 755 756 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 757 regs[lrc_ring_indirect_offset(engine) + 1] = 758 lrc_ring_indirect_offset_default(engine) << 6; 759 } 760 761 static void init_common_regs(u32 * const regs, 762 const struct intel_context *ce, 763 const struct intel_engine_cs *engine, 764 bool inhibit) 765 { 766 u32 ctl; 767 768 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 769 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 770 if (inhibit) 771 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 772 if (GRAPHICS_VER(engine->i915) < 11) 773 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 774 CTX_CTRL_RS_CTX_ENABLE); 775 regs[CTX_CONTEXT_CONTROL] = ctl; 776 777 regs[CTX_TIMESTAMP] = ce->runtime.last; 778 } 779 780 static void init_wa_bb_regs(u32 * const regs, 781 const struct intel_engine_cs *engine) 782 { 783 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 784 785 if (wa_ctx->per_ctx.size) { 786 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 787 788 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 789 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 790 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 791 } 792 793 if (wa_ctx->indirect_ctx.size) { 794 lrc_setup_indirect_ctx(regs, engine, 795 i915_ggtt_offset(wa_ctx->vma) + 796 wa_ctx->indirect_ctx.offset, 797 wa_ctx->indirect_ctx.size); 798 } 799 } 800 801 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) 802 { 803 if (i915_vm_is_4lvl(&ppgtt->vm)) { 804 /* 64b PPGTT (48bit canonical) 805 * PDP0_DESCRIPTOR contains the base address to PML4 and 806 * other PDP Descriptors are ignored. 807 */ 808 ASSIGN_CTX_PML4(ppgtt, regs); 809 } else { 810 ASSIGN_CTX_PDP(ppgtt, regs, 3); 811 ASSIGN_CTX_PDP(ppgtt, regs, 2); 812 ASSIGN_CTX_PDP(ppgtt, regs, 1); 813 ASSIGN_CTX_PDP(ppgtt, regs, 0); 814 } 815 } 816 817 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 818 { 819 if (i915_is_ggtt(vm)) 820 return i915_vm_to_ggtt(vm)->alias; 821 else 822 return i915_vm_to_ppgtt(vm); 823 } 824 825 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 826 { 827 int x; 828 829 x = lrc_ring_mi_mode(engine); 830 if (x != -1) { 831 regs[x + 1] &= ~STOP_RING; 832 regs[x + 1] |= STOP_RING << 16; 833 } 834 } 835 836 static void __lrc_init_regs(u32 *regs, 837 const struct intel_context *ce, 838 const struct intel_engine_cs *engine, 839 bool inhibit) 840 { 841 /* 842 * A context is actually a big batch buffer with several 843 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 844 * values we are setting here are only for the first context restore: 845 * on a subsequent save, the GPU will recreate this batchbuffer with new 846 * values (including all the missing MI_LOAD_REGISTER_IMM commands that 847 * we are not initializing here). 848 * 849 * Must keep consistent with virtual_update_register_offsets(). 850 */ 851 852 if (inhibit) 853 memset(regs, 0, PAGE_SIZE); 854 855 set_offsets(regs, reg_offsets(engine), engine, inhibit); 856 857 init_common_regs(regs, ce, engine, inhibit); 858 init_ppgtt_regs(regs, vm_alias(ce->vm)); 859 860 init_wa_bb_regs(regs, engine); 861 862 __reset_stop_ring(regs, engine); 863 } 864 865 void lrc_init_regs(const struct intel_context *ce, 866 const struct intel_engine_cs *engine, 867 bool inhibit) 868 { 869 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); 870 } 871 872 void lrc_reset_regs(const struct intel_context *ce, 873 const struct intel_engine_cs *engine) 874 { 875 __reset_stop_ring(ce->lrc_reg_state, engine); 876 } 877 878 static void 879 set_redzone(void *vaddr, const struct intel_engine_cs *engine) 880 { 881 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 882 return; 883 884 vaddr += engine->context_size; 885 886 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 887 } 888 889 static void 890 check_redzone(const void *vaddr, const struct intel_engine_cs *engine) 891 { 892 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 893 return; 894 895 vaddr += engine->context_size; 896 897 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 898 drm_err_once(&engine->i915->drm, 899 "%s context redzone overwritten!\n", 900 engine->name); 901 } 902 903 void lrc_init_state(struct intel_context *ce, 904 struct intel_engine_cs *engine, 905 void *state) 906 { 907 bool inhibit = true; 908 909 set_redzone(state, engine); 910 911 if (engine->default_state) { 912 shmem_read(engine->default_state, 0, 913 state, engine->context_size); 914 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 915 inhibit = false; 916 } 917 918 /* Clear the ppHWSP (inc. per-context counters) */ 919 memset(state, 0, PAGE_SIZE); 920 921 /* 922 * The second page of the context object contains some registers which 923 * must be set up prior to the first execution. 924 */ 925 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); 926 } 927 928 static struct i915_vma * 929 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) 930 { 931 struct drm_i915_gem_object *obj; 932 struct i915_vma *vma; 933 u32 context_size; 934 935 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 936 937 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 938 context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 939 940 if (GRAPHICS_VER(engine->i915) == 12) { 941 ce->wa_bb_page = context_size / PAGE_SIZE; 942 context_size += PAGE_SIZE; 943 } 944 945 obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); 946 if (IS_ERR(obj)) 947 obj = i915_gem_object_create_shmem(engine->i915, context_size); 948 if (IS_ERR(obj)) 949 return ERR_CAST(obj); 950 951 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 952 if (IS_ERR(vma)) { 953 i915_gem_object_put(obj); 954 return vma; 955 } 956 957 return vma; 958 } 959 960 static struct intel_timeline * 961 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) 962 { 963 struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 964 965 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); 966 } 967 968 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) 969 { 970 struct intel_ring *ring; 971 struct i915_vma *vma; 972 int err; 973 974 GEM_BUG_ON(ce->state); 975 976 vma = __lrc_alloc_state(ce, engine); 977 if (IS_ERR(vma)) 978 return PTR_ERR(vma); 979 980 ring = intel_engine_create_ring(engine, ce->ring_size); 981 if (IS_ERR(ring)) { 982 err = PTR_ERR(ring); 983 goto err_vma; 984 } 985 986 if (!page_mask_bits(ce->timeline)) { 987 struct intel_timeline *tl; 988 989 /* 990 * Use the static global HWSP for the kernel context, and 991 * a dynamically allocated cacheline for everyone else. 992 */ 993 if (unlikely(ce->timeline)) 994 tl = pinned_timeline(ce, engine); 995 else 996 tl = intel_timeline_create(engine->gt); 997 if (IS_ERR(tl)) { 998 err = PTR_ERR(tl); 999 goto err_ring; 1000 } 1001 1002 ce->timeline = tl; 1003 } 1004 1005 ce->ring = ring; 1006 ce->state = vma; 1007 1008 return 0; 1009 1010 err_ring: 1011 intel_ring_put(ring); 1012 err_vma: 1013 i915_vma_put(vma); 1014 return err; 1015 } 1016 1017 void lrc_reset(struct intel_context *ce) 1018 { 1019 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1020 1021 intel_ring_reset(ce->ring, ce->ring->emit); 1022 1023 /* Scrub away the garbage */ 1024 lrc_init_regs(ce, ce->engine, true); 1025 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); 1026 } 1027 1028 int 1029 lrc_pre_pin(struct intel_context *ce, 1030 struct intel_engine_cs *engine, 1031 struct i915_gem_ww_ctx *ww, 1032 void **vaddr) 1033 { 1034 GEM_BUG_ON(!ce->state); 1035 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 1036 1037 *vaddr = i915_gem_object_pin_map(ce->state->obj, 1038 i915_coherent_map_type(ce->engine->i915, 1039 ce->state->obj, 1040 false) | 1041 I915_MAP_OVERRIDE); 1042 1043 return PTR_ERR_OR_ZERO(*vaddr); 1044 } 1045 1046 int 1047 lrc_pin(struct intel_context *ce, 1048 struct intel_engine_cs *engine, 1049 void *vaddr) 1050 { 1051 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 1052 1053 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) 1054 lrc_init_state(ce, engine, vaddr); 1055 1056 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); 1057 return 0; 1058 } 1059 1060 void lrc_unpin(struct intel_context *ce) 1061 { 1062 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 1063 ce->engine); 1064 } 1065 1066 void lrc_post_unpin(struct intel_context *ce) 1067 { 1068 i915_gem_object_unpin_map(ce->state->obj); 1069 } 1070 1071 void lrc_fini(struct intel_context *ce) 1072 { 1073 if (!ce->state) 1074 return; 1075 1076 intel_ring_put(fetch_and_zero(&ce->ring)); 1077 i915_vma_put(fetch_and_zero(&ce->state)); 1078 } 1079 1080 void lrc_destroy(struct kref *kref) 1081 { 1082 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 1083 1084 GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 1085 GEM_BUG_ON(intel_context_is_pinned(ce)); 1086 1087 lrc_fini(ce); 1088 1089 intel_context_fini(ce); 1090 intel_context_free(ce); 1091 } 1092 1093 static u32 * 1094 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 1095 { 1096 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1097 MI_SRM_LRM_GLOBAL_GTT | 1098 MI_LRI_LRM_CS_MMIO; 1099 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1100 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1101 CTX_TIMESTAMP * sizeof(u32); 1102 *cs++ = 0; 1103 1104 *cs++ = MI_LOAD_REGISTER_REG | 1105 MI_LRR_SOURCE_CS_MMIO | 1106 MI_LRI_LRM_CS_MMIO; 1107 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1108 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1109 1110 *cs++ = MI_LOAD_REGISTER_REG | 1111 MI_LRR_SOURCE_CS_MMIO | 1112 MI_LRI_LRM_CS_MMIO; 1113 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1114 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1115 1116 return cs; 1117 } 1118 1119 static u32 * 1120 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 1121 { 1122 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 1123 1124 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1125 MI_SRM_LRM_GLOBAL_GTT | 1126 MI_LRI_LRM_CS_MMIO; 1127 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1128 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1129 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 1130 *cs++ = 0; 1131 1132 return cs; 1133 } 1134 1135 static u32 * 1136 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 1137 { 1138 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 1139 1140 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1141 MI_SRM_LRM_GLOBAL_GTT | 1142 MI_LRI_LRM_CS_MMIO; 1143 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1144 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1145 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 1146 *cs++ = 0; 1147 1148 *cs++ = MI_LOAD_REGISTER_REG | 1149 MI_LRR_SOURCE_CS_MMIO | 1150 MI_LRI_LRM_CS_MMIO; 1151 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1152 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 1153 1154 return cs; 1155 } 1156 1157 static u32 * 1158 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 1159 { 1160 cs = gen12_emit_timestamp_wa(ce, cs); 1161 cs = gen12_emit_cmd_buf_wa(ce, cs); 1162 cs = gen12_emit_restore_scratch(ce, cs); 1163 1164 return cs; 1165 } 1166 1167 static u32 * 1168 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 1169 { 1170 cs = gen12_emit_timestamp_wa(ce, cs); 1171 cs = gen12_emit_restore_scratch(ce, cs); 1172 1173 return cs; 1174 } 1175 1176 static u32 context_wa_bb_offset(const struct intel_context *ce) 1177 { 1178 return PAGE_SIZE * ce->wa_bb_page; 1179 } 1180 1181 static u32 *context_indirect_bb(const struct intel_context *ce) 1182 { 1183 void *ptr; 1184 1185 GEM_BUG_ON(!ce->wa_bb_page); 1186 1187 ptr = ce->lrc_reg_state; 1188 ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 1189 ptr += context_wa_bb_offset(ce); 1190 1191 return ptr; 1192 } 1193 1194 static void 1195 setup_indirect_ctx_bb(const struct intel_context *ce, 1196 const struct intel_engine_cs *engine, 1197 u32 *(*emit)(const struct intel_context *, u32 *)) 1198 { 1199 u32 * const start = context_indirect_bb(ce); 1200 u32 *cs; 1201 1202 cs = emit(ce, start); 1203 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 1204 while ((unsigned long)cs % CACHELINE_BYTES) 1205 *cs++ = MI_NOOP; 1206 1207 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, 1208 i915_ggtt_offset(ce->state) + 1209 context_wa_bb_offset(ce), 1210 (cs - start) * sizeof(*cs)); 1211 } 1212 1213 /* 1214 * The context descriptor encodes various attributes of a context, 1215 * including its GTT address and some flags. Because it's fairly 1216 * expensive to calculate, we'll just do it once and cache the result, 1217 * which remains valid until the context is unpinned. 1218 * 1219 * This is what a descriptor looks like, from LSB to MSB:: 1220 * 1221 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 1222 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 1223 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 1224 * bits 53-54: mbz, reserved for use by hardware 1225 * bits 55-63: group ID, currently unused and set to 0 1226 * 1227 * Starting from Gen11, the upper dword of the descriptor has a new format: 1228 * 1229 * bits 32-36: reserved 1230 * bits 37-47: SW context ID 1231 * bits 48:53: engine instance 1232 * bit 54: mbz, reserved for use by hardware 1233 * bits 55-60: SW counter 1234 * bits 61-63: engine class 1235 * 1236 * On Xe_HP, the upper dword of the descriptor has a new format: 1237 * 1238 * bits 32-37: virtual function number 1239 * bit 38: mbz, reserved for use by hardware 1240 * bits 39-54: SW context ID 1241 * bits 55-57: reserved 1242 * bits 58-63: SW counter 1243 * 1244 * engine info, SW context ID and SW counter need to form a unique number 1245 * (Context ID) per lrc. 1246 */ 1247 static u32 lrc_descriptor(const struct intel_context *ce) 1248 { 1249 u32 desc; 1250 1251 desc = INTEL_LEGACY_32B_CONTEXT; 1252 if (i915_vm_is_4lvl(ce->vm)) 1253 desc = INTEL_LEGACY_64B_CONTEXT; 1254 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 1255 1256 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 1257 if (GRAPHICS_VER(ce->vm->i915) == 8) 1258 desc |= GEN8_CTX_L3LLC_COHERENT; 1259 1260 return i915_ggtt_offset(ce->state) | desc; 1261 } 1262 1263 u32 lrc_update_regs(const struct intel_context *ce, 1264 const struct intel_engine_cs *engine, 1265 u32 head) 1266 { 1267 struct intel_ring *ring = ce->ring; 1268 u32 *regs = ce->lrc_reg_state; 1269 1270 GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 1271 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 1272 1273 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1274 regs[CTX_RING_HEAD] = head; 1275 regs[CTX_RING_TAIL] = ring->tail; 1276 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1277 1278 /* RPCS */ 1279 if (engine->class == RENDER_CLASS) { 1280 regs[CTX_R_PWR_CLK_STATE] = 1281 intel_sseu_make_rpcs(engine->gt, &ce->sseu); 1282 1283 i915_oa_init_reg_state(ce, engine); 1284 } 1285 1286 if (ce->wa_bb_page) { 1287 u32 *(*fn)(const struct intel_context *ce, u32 *cs); 1288 1289 fn = gen12_emit_indirect_ctx_xcs; 1290 if (ce->engine->class == RENDER_CLASS) 1291 fn = gen12_emit_indirect_ctx_rcs; 1292 1293 /* Mutually exclusive wrt to global indirect bb */ 1294 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 1295 setup_indirect_ctx_bb(ce, engine, fn); 1296 } 1297 1298 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; 1299 } 1300 1301 void lrc_update_offsets(struct intel_context *ce, 1302 struct intel_engine_cs *engine) 1303 { 1304 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); 1305 } 1306 1307 void lrc_check_regs(const struct intel_context *ce, 1308 const struct intel_engine_cs *engine, 1309 const char *when) 1310 { 1311 const struct intel_ring *ring = ce->ring; 1312 u32 *regs = ce->lrc_reg_state; 1313 bool valid = true; 1314 int x; 1315 1316 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 1317 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 1318 engine->name, 1319 regs[CTX_RING_START], 1320 i915_ggtt_offset(ring->vma)); 1321 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1322 valid = false; 1323 } 1324 1325 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 1326 (RING_CTL_SIZE(ring->size) | RING_VALID)) { 1327 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 1328 engine->name, 1329 regs[CTX_RING_CTL], 1330 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 1331 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1332 valid = false; 1333 } 1334 1335 x = lrc_ring_mi_mode(engine); 1336 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 1337 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 1338 engine->name, regs[x + 1]); 1339 regs[x + 1] &= ~STOP_RING; 1340 regs[x + 1] |= STOP_RING << 16; 1341 valid = false; 1342 } 1343 1344 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); 1345 } 1346 1347 /* 1348 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 1349 * PIPE_CONTROL instruction. This is required for the flush to happen correctly 1350 * but there is a slight complication as this is applied in WA batch where the 1351 * values are only initialized once so we cannot take register value at the 1352 * beginning and reuse it further; hence we save its value to memory, upload a 1353 * constant value with bit21 set and then we restore it back with the saved value. 1354 * To simplify the WA, a constant value is formed by using the default value 1355 * of this register. This shouldn't be a problem because we are only modifying 1356 * it for a short period and this batch in non-premptible. We can ofcourse 1357 * use additional instructions that read the actual value of the register 1358 * at that time and set our bit of interest but it makes the WA complicated. 1359 * 1360 * This WA is also required for Gen9 so extracting as a function avoids 1361 * code duplication. 1362 */ 1363 static u32 * 1364 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1365 { 1366 /* NB no one else is allowed to scribble over scratch + 256! */ 1367 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1368 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1369 *batch++ = intel_gt_scratch_offset(engine->gt, 1370 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1371 *batch++ = 0; 1372 1373 *batch++ = MI_LOAD_REGISTER_IMM(1); 1374 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1375 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 1376 1377 batch = gen8_emit_pipe_control(batch, 1378 PIPE_CONTROL_CS_STALL | 1379 PIPE_CONTROL_DC_FLUSH_ENABLE, 1380 0); 1381 1382 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1383 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1384 *batch++ = intel_gt_scratch_offset(engine->gt, 1385 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1386 *batch++ = 0; 1387 1388 return batch; 1389 } 1390 1391 /* 1392 * Typically we only have one indirect_ctx and per_ctx batch buffer which are 1393 * initialized at the beginning and shared across all contexts but this field 1394 * helps us to have multiple batches at different offsets and select them based 1395 * on a criteria. At the moment this batch always start at the beginning of the page 1396 * and at this point we don't have multiple wa_ctx batch buffers. 1397 * 1398 * The number of WA applied are not known at the beginning; we use this field 1399 * to return the no of DWORDS written. 1400 * 1401 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 1402 * so it adds NOOPs as padding to make it cacheline aligned. 1403 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 1404 * makes a complete batch buffer. 1405 */ 1406 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1407 { 1408 /* WaDisableCtxRestoreArbitration:bdw,chv */ 1409 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1410 1411 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 1412 if (IS_BROADWELL(engine->i915)) 1413 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1414 1415 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 1416 /* Actual scratch location is at 128 bytes offset */ 1417 batch = gen8_emit_pipe_control(batch, 1418 PIPE_CONTROL_FLUSH_L3 | 1419 PIPE_CONTROL_STORE_DATA_INDEX | 1420 PIPE_CONTROL_CS_STALL | 1421 PIPE_CONTROL_QW_WRITE, 1422 LRC_PPHWSP_SCRATCH_ADDR); 1423 1424 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1425 1426 /* Pad to end of cacheline */ 1427 while ((unsigned long)batch % CACHELINE_BYTES) 1428 *batch++ = MI_NOOP; 1429 1430 /* 1431 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 1432 * execution depends on the length specified in terms of cache lines 1433 * in the register CTX_RCS_INDIRECT_CTX 1434 */ 1435 1436 return batch; 1437 } 1438 1439 struct lri { 1440 i915_reg_t reg; 1441 u32 value; 1442 }; 1443 1444 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 1445 { 1446 GEM_BUG_ON(!count || count > 63); 1447 1448 *batch++ = MI_LOAD_REGISTER_IMM(count); 1449 do { 1450 *batch++ = i915_mmio_reg_offset(lri->reg); 1451 *batch++ = lri->value; 1452 } while (lri++, --count); 1453 *batch++ = MI_NOOP; 1454 1455 return batch; 1456 } 1457 1458 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1459 { 1460 static const struct lri lri[] = { 1461 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 1462 { 1463 COMMON_SLICE_CHICKEN2, 1464 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 1465 0), 1466 }, 1467 1468 /* BSpec: 11391 */ 1469 { 1470 FF_SLICE_CHICKEN, 1471 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 1472 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 1473 }, 1474 1475 /* BSpec: 11299 */ 1476 { 1477 _3D_CHICKEN3, 1478 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 1479 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 1480 } 1481 }; 1482 1483 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1484 1485 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1486 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1487 1488 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 1489 batch = gen8_emit_pipe_control(batch, 1490 PIPE_CONTROL_FLUSH_L3 | 1491 PIPE_CONTROL_STORE_DATA_INDEX | 1492 PIPE_CONTROL_CS_STALL | 1493 PIPE_CONTROL_QW_WRITE, 1494 LRC_PPHWSP_SCRATCH_ADDR); 1495 1496 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 1497 1498 /* WaMediaPoolStateCmdInWABB:bxt,glk */ 1499 if (HAS_POOLED_EU(engine->i915)) { 1500 /* 1501 * EU pool configuration is setup along with golden context 1502 * during context initialization. This value depends on 1503 * device type (2x6 or 3x6) and needs to be updated based 1504 * on which subslice is disabled especially for 2x6 1505 * devices, however it is safe to load default 1506 * configuration of 3x6 device instead of masking off 1507 * corresponding bits because HW ignores bits of a disabled 1508 * subslice and drops down to appropriate config. Please 1509 * see render_state_setup() in i915_gem_render_state.c for 1510 * possible configurations, to avoid duplication they are 1511 * not shown here again. 1512 */ 1513 *batch++ = GEN9_MEDIA_POOL_STATE; 1514 *batch++ = GEN9_MEDIA_POOL_ENABLE; 1515 *batch++ = 0x00777000; 1516 *batch++ = 0; 1517 *batch++ = 0; 1518 *batch++ = 0; 1519 } 1520 1521 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1522 1523 /* Pad to end of cacheline */ 1524 while ((unsigned long)batch % CACHELINE_BYTES) 1525 *batch++ = MI_NOOP; 1526 1527 return batch; 1528 } 1529 1530 #define CTX_WA_BB_SIZE (PAGE_SIZE) 1531 1532 static int lrc_create_wa_ctx(struct intel_engine_cs *engine) 1533 { 1534 struct drm_i915_gem_object *obj; 1535 struct i915_vma *vma; 1536 int err; 1537 1538 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); 1539 if (IS_ERR(obj)) 1540 return PTR_ERR(obj); 1541 1542 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 1543 if (IS_ERR(vma)) { 1544 err = PTR_ERR(vma); 1545 goto err; 1546 } 1547 1548 engine->wa_ctx.vma = vma; 1549 return 0; 1550 1551 err: 1552 i915_gem_object_put(obj); 1553 return err; 1554 } 1555 1556 void lrc_fini_wa_ctx(struct intel_engine_cs *engine) 1557 { 1558 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1559 } 1560 1561 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 1562 1563 void lrc_init_wa_ctx(struct intel_engine_cs *engine) 1564 { 1565 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1566 struct i915_wa_ctx_bb *wa_bb[] = { 1567 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx 1568 }; 1569 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; 1570 struct i915_gem_ww_ctx ww; 1571 void *batch, *batch_ptr; 1572 unsigned int i; 1573 int err; 1574 1575 if (engine->class != RENDER_CLASS) 1576 return; 1577 1578 switch (GRAPHICS_VER(engine->i915)) { 1579 case 12: 1580 case 11: 1581 return; 1582 case 9: 1583 wa_bb_fn[0] = gen9_init_indirectctx_bb; 1584 wa_bb_fn[1] = NULL; 1585 break; 1586 case 8: 1587 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1588 wa_bb_fn[1] = NULL; 1589 break; 1590 default: 1591 MISSING_CASE(GRAPHICS_VER(engine->i915)); 1592 return; 1593 } 1594 1595 err = lrc_create_wa_ctx(engine); 1596 if (err) { 1597 /* 1598 * We continue even if we fail to initialize WA batch 1599 * because we only expect rare glitches but nothing 1600 * critical to prevent us from using GPU 1601 */ 1602 drm_err(&engine->i915->drm, 1603 "Ignoring context switch w/a allocation error:%d\n", 1604 err); 1605 return; 1606 } 1607 1608 if (!engine->wa_ctx.vma) 1609 return; 1610 1611 i915_gem_ww_ctx_init(&ww, true); 1612 retry: 1613 err = i915_gem_object_lock(wa_ctx->vma->obj, &ww); 1614 if (!err) 1615 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH); 1616 if (err) 1617 goto err; 1618 1619 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 1620 if (IS_ERR(batch)) { 1621 err = PTR_ERR(batch); 1622 goto err_unpin; 1623 } 1624 1625 /* 1626 * Emit the two workaround batch buffers, recording the offset from the 1627 * start of the workaround batch buffer object for each and their 1628 * respective sizes. 1629 */ 1630 batch_ptr = batch; 1631 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 1632 wa_bb[i]->offset = batch_ptr - batch; 1633 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 1634 CACHELINE_BYTES))) { 1635 err = -EINVAL; 1636 break; 1637 } 1638 if (wa_bb_fn[i]) 1639 batch_ptr = wa_bb_fn[i](engine, batch_ptr); 1640 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 1641 } 1642 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); 1643 1644 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 1645 __i915_gem_object_release_map(wa_ctx->vma->obj); 1646 1647 /* Verify that we can handle failure to setup the wa_ctx */ 1648 if (!err) 1649 err = i915_inject_probe_error(engine->i915, -ENODEV); 1650 1651 err_unpin: 1652 if (err) 1653 i915_vma_unpin(wa_ctx->vma); 1654 err: 1655 if (err == -EDEADLK) { 1656 err = i915_gem_ww_ctx_backoff(&ww); 1657 if (!err) 1658 goto retry; 1659 } 1660 i915_gem_ww_ctx_fini(&ww); 1661 1662 if (err) { 1663 i915_vma_put(engine->wa_ctx.vma); 1664 1665 /* Clear all flags to prevent further use */ 1666 memset(wa_ctx, 0, sizeof(*wa_ctx)); 1667 } 1668 } 1669 1670 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 1671 { 1672 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1673 ce->runtime.num_underflow++; 1674 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 1675 #endif 1676 } 1677 1678 void lrc_update_runtime(struct intel_context *ce) 1679 { 1680 u32 old; 1681 s32 dt; 1682 1683 if (intel_context_is_barrier(ce)) 1684 return; 1685 1686 old = ce->runtime.last; 1687 ce->runtime.last = lrc_get_runtime(ce); 1688 dt = ce->runtime.last - old; 1689 1690 if (unlikely(dt < 0)) { 1691 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 1692 old, ce->runtime.last, dt); 1693 st_update_runtime_underflow(ce, dt); 1694 return; 1695 } 1696 1697 ewma_runtime_add(&ce->runtime.avg, dt); 1698 ce->runtime.total += dt; 1699 } 1700 1701 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1702 #include "selftest_lrc.c" 1703 #endif 1704