1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gem/i915_gem_lmem.h" 7 8 #include "gen8_engine_cs.h" 9 #include "i915_drv.h" 10 #include "i915_perf.h" 11 #include "intel_engine.h" 12 #include "intel_gpu_commands.h" 13 #include "intel_gt.h" 14 #include "intel_lrc.h" 15 #include "intel_lrc_reg.h" 16 #include "intel_ring.h" 17 #include "shmem_utils.h" 18 19 static void set_offsets(u32 *regs, 20 const u8 *data, 21 const struct intel_engine_cs *engine, 22 bool close) 23 #define NOP(x) (BIT(7) | (x)) 24 #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 25 #define POSTED BIT(0) 26 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 27 #define REG16(x) \ 28 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 29 (((x) >> 2) & 0x7f) 30 #define END 0 31 { 32 const u32 base = engine->mmio_base; 33 34 while (*data) { 35 u8 count, flags; 36 37 if (*data & BIT(7)) { /* skip */ 38 count = *data++ & ~BIT(7); 39 regs += count; 40 continue; 41 } 42 43 count = *data & 0x3f; 44 flags = *data >> 6; 45 data++; 46 47 *regs = MI_LOAD_REGISTER_IMM(count); 48 if (flags & POSTED) 49 *regs |= MI_LRI_FORCE_POSTED; 50 if (GRAPHICS_VER(engine->i915) >= 11) 51 *regs |= MI_LRI_LRM_CS_MMIO; 52 regs++; 53 54 GEM_BUG_ON(!count); 55 do { 56 u32 offset = 0; 57 u8 v; 58 59 do { 60 v = *data++; 61 offset <<= 7; 62 offset |= v & ~BIT(7); 63 } while (v & BIT(7)); 64 65 regs[0] = base + (offset << 2); 66 regs += 2; 67 } while (--count); 68 } 69 70 if (close) { 71 /* Close the batch; used mainly by live_lrc_layout() */ 72 *regs = MI_BATCH_BUFFER_END; 73 if (GRAPHICS_VER(engine->i915) >= 11) 74 *regs |= BIT(0); 75 } 76 } 77 78 static const u8 gen8_xcs_offsets[] = { 79 NOP(1), 80 LRI(11, 0), 81 REG16(0x244), 82 REG(0x034), 83 REG(0x030), 84 REG(0x038), 85 REG(0x03c), 86 REG(0x168), 87 REG(0x140), 88 REG(0x110), 89 REG(0x11c), 90 REG(0x114), 91 REG(0x118), 92 93 NOP(9), 94 LRI(9, 0), 95 REG16(0x3a8), 96 REG16(0x28c), 97 REG16(0x288), 98 REG16(0x284), 99 REG16(0x280), 100 REG16(0x27c), 101 REG16(0x278), 102 REG16(0x274), 103 REG16(0x270), 104 105 NOP(13), 106 LRI(2, 0), 107 REG16(0x200), 108 REG(0x028), 109 110 END 111 }; 112 113 static const u8 gen9_xcs_offsets[] = { 114 NOP(1), 115 LRI(14, POSTED), 116 REG16(0x244), 117 REG(0x034), 118 REG(0x030), 119 REG(0x038), 120 REG(0x03c), 121 REG(0x168), 122 REG(0x140), 123 REG(0x110), 124 REG(0x11c), 125 REG(0x114), 126 REG(0x118), 127 REG(0x1c0), 128 REG(0x1c4), 129 REG(0x1c8), 130 131 NOP(3), 132 LRI(9, POSTED), 133 REG16(0x3a8), 134 REG16(0x28c), 135 REG16(0x288), 136 REG16(0x284), 137 REG16(0x280), 138 REG16(0x27c), 139 REG16(0x278), 140 REG16(0x274), 141 REG16(0x270), 142 143 NOP(13), 144 LRI(1, POSTED), 145 REG16(0x200), 146 147 NOP(13), 148 LRI(44, POSTED), 149 REG(0x028), 150 REG(0x09c), 151 REG(0x0c0), 152 REG(0x178), 153 REG(0x17c), 154 REG16(0x358), 155 REG(0x170), 156 REG(0x150), 157 REG(0x154), 158 REG(0x158), 159 REG16(0x41c), 160 REG16(0x600), 161 REG16(0x604), 162 REG16(0x608), 163 REG16(0x60c), 164 REG16(0x610), 165 REG16(0x614), 166 REG16(0x618), 167 REG16(0x61c), 168 REG16(0x620), 169 REG16(0x624), 170 REG16(0x628), 171 REG16(0x62c), 172 REG16(0x630), 173 REG16(0x634), 174 REG16(0x638), 175 REG16(0x63c), 176 REG16(0x640), 177 REG16(0x644), 178 REG16(0x648), 179 REG16(0x64c), 180 REG16(0x650), 181 REG16(0x654), 182 REG16(0x658), 183 REG16(0x65c), 184 REG16(0x660), 185 REG16(0x664), 186 REG16(0x668), 187 REG16(0x66c), 188 REG16(0x670), 189 REG16(0x674), 190 REG16(0x678), 191 REG16(0x67c), 192 REG(0x068), 193 194 END 195 }; 196 197 static const u8 gen12_xcs_offsets[] = { 198 NOP(1), 199 LRI(13, POSTED), 200 REG16(0x244), 201 REG(0x034), 202 REG(0x030), 203 REG(0x038), 204 REG(0x03c), 205 REG(0x168), 206 REG(0x140), 207 REG(0x110), 208 REG(0x1c0), 209 REG(0x1c4), 210 REG(0x1c8), 211 REG(0x180), 212 REG16(0x2b4), 213 214 NOP(5), 215 LRI(9, POSTED), 216 REG16(0x3a8), 217 REG16(0x28c), 218 REG16(0x288), 219 REG16(0x284), 220 REG16(0x280), 221 REG16(0x27c), 222 REG16(0x278), 223 REG16(0x274), 224 REG16(0x270), 225 226 END 227 }; 228 229 static const u8 dg2_xcs_offsets[] = { 230 NOP(1), 231 LRI(15, POSTED), 232 REG16(0x244), 233 REG(0x034), 234 REG(0x030), 235 REG(0x038), 236 REG(0x03c), 237 REG(0x168), 238 REG(0x140), 239 REG(0x110), 240 REG(0x1c0), 241 REG(0x1c4), 242 REG(0x1c8), 243 REG(0x180), 244 REG16(0x2b4), 245 REG(0x120), 246 REG(0x124), 247 248 NOP(1), 249 LRI(9, POSTED), 250 REG16(0x3a8), 251 REG16(0x28c), 252 REG16(0x288), 253 REG16(0x284), 254 REG16(0x280), 255 REG16(0x27c), 256 REG16(0x278), 257 REG16(0x274), 258 REG16(0x270), 259 260 END 261 }; 262 263 static const u8 gen8_rcs_offsets[] = { 264 NOP(1), 265 LRI(14, POSTED), 266 REG16(0x244), 267 REG(0x034), 268 REG(0x030), 269 REG(0x038), 270 REG(0x03c), 271 REG(0x168), 272 REG(0x140), 273 REG(0x110), 274 REG(0x11c), 275 REG(0x114), 276 REG(0x118), 277 REG(0x1c0), 278 REG(0x1c4), 279 REG(0x1c8), 280 281 NOP(3), 282 LRI(9, POSTED), 283 REG16(0x3a8), 284 REG16(0x28c), 285 REG16(0x288), 286 REG16(0x284), 287 REG16(0x280), 288 REG16(0x27c), 289 REG16(0x278), 290 REG16(0x274), 291 REG16(0x270), 292 293 NOP(13), 294 LRI(1, 0), 295 REG(0x0c8), 296 297 END 298 }; 299 300 static const u8 gen9_rcs_offsets[] = { 301 NOP(1), 302 LRI(14, POSTED), 303 REG16(0x244), 304 REG(0x34), 305 REG(0x30), 306 REG(0x38), 307 REG(0x3c), 308 REG(0x168), 309 REG(0x140), 310 REG(0x110), 311 REG(0x11c), 312 REG(0x114), 313 REG(0x118), 314 REG(0x1c0), 315 REG(0x1c4), 316 REG(0x1c8), 317 318 NOP(3), 319 LRI(9, POSTED), 320 REG16(0x3a8), 321 REG16(0x28c), 322 REG16(0x288), 323 REG16(0x284), 324 REG16(0x280), 325 REG16(0x27c), 326 REG16(0x278), 327 REG16(0x274), 328 REG16(0x270), 329 330 NOP(13), 331 LRI(1, 0), 332 REG(0xc8), 333 334 NOP(13), 335 LRI(44, POSTED), 336 REG(0x28), 337 REG(0x9c), 338 REG(0xc0), 339 REG(0x178), 340 REG(0x17c), 341 REG16(0x358), 342 REG(0x170), 343 REG(0x150), 344 REG(0x154), 345 REG(0x158), 346 REG16(0x41c), 347 REG16(0x600), 348 REG16(0x604), 349 REG16(0x608), 350 REG16(0x60c), 351 REG16(0x610), 352 REG16(0x614), 353 REG16(0x618), 354 REG16(0x61c), 355 REG16(0x620), 356 REG16(0x624), 357 REG16(0x628), 358 REG16(0x62c), 359 REG16(0x630), 360 REG16(0x634), 361 REG16(0x638), 362 REG16(0x63c), 363 REG16(0x640), 364 REG16(0x644), 365 REG16(0x648), 366 REG16(0x64c), 367 REG16(0x650), 368 REG16(0x654), 369 REG16(0x658), 370 REG16(0x65c), 371 REG16(0x660), 372 REG16(0x664), 373 REG16(0x668), 374 REG16(0x66c), 375 REG16(0x670), 376 REG16(0x674), 377 REG16(0x678), 378 REG16(0x67c), 379 REG(0x68), 380 381 END 382 }; 383 384 static const u8 gen11_rcs_offsets[] = { 385 NOP(1), 386 LRI(15, POSTED), 387 REG16(0x244), 388 REG(0x034), 389 REG(0x030), 390 REG(0x038), 391 REG(0x03c), 392 REG(0x168), 393 REG(0x140), 394 REG(0x110), 395 REG(0x11c), 396 REG(0x114), 397 REG(0x118), 398 REG(0x1c0), 399 REG(0x1c4), 400 REG(0x1c8), 401 REG(0x180), 402 403 NOP(1), 404 LRI(9, POSTED), 405 REG16(0x3a8), 406 REG16(0x28c), 407 REG16(0x288), 408 REG16(0x284), 409 REG16(0x280), 410 REG16(0x27c), 411 REG16(0x278), 412 REG16(0x274), 413 REG16(0x270), 414 415 LRI(1, POSTED), 416 REG(0x1b0), 417 418 NOP(10), 419 LRI(1, 0), 420 REG(0x0c8), 421 422 END 423 }; 424 425 static const u8 gen12_rcs_offsets[] = { 426 NOP(1), 427 LRI(13, POSTED), 428 REG16(0x244), 429 REG(0x034), 430 REG(0x030), 431 REG(0x038), 432 REG(0x03c), 433 REG(0x168), 434 REG(0x140), 435 REG(0x110), 436 REG(0x1c0), 437 REG(0x1c4), 438 REG(0x1c8), 439 REG(0x180), 440 REG16(0x2b4), 441 442 NOP(5), 443 LRI(9, POSTED), 444 REG16(0x3a8), 445 REG16(0x28c), 446 REG16(0x288), 447 REG16(0x284), 448 REG16(0x280), 449 REG16(0x27c), 450 REG16(0x278), 451 REG16(0x274), 452 REG16(0x270), 453 454 LRI(3, POSTED), 455 REG(0x1b0), 456 REG16(0x5a8), 457 REG16(0x5ac), 458 459 NOP(6), 460 LRI(1, 0), 461 REG(0x0c8), 462 NOP(3 + 9 + 1), 463 464 LRI(51, POSTED), 465 REG16(0x588), 466 REG16(0x588), 467 REG16(0x588), 468 REG16(0x588), 469 REG16(0x588), 470 REG16(0x588), 471 REG(0x028), 472 REG(0x09c), 473 REG(0x0c0), 474 REG(0x178), 475 REG(0x17c), 476 REG16(0x358), 477 REG(0x170), 478 REG(0x150), 479 REG(0x154), 480 REG(0x158), 481 REG16(0x41c), 482 REG16(0x600), 483 REG16(0x604), 484 REG16(0x608), 485 REG16(0x60c), 486 REG16(0x610), 487 REG16(0x614), 488 REG16(0x618), 489 REG16(0x61c), 490 REG16(0x620), 491 REG16(0x624), 492 REG16(0x628), 493 REG16(0x62c), 494 REG16(0x630), 495 REG16(0x634), 496 REG16(0x638), 497 REG16(0x63c), 498 REG16(0x640), 499 REG16(0x644), 500 REG16(0x648), 501 REG16(0x64c), 502 REG16(0x650), 503 REG16(0x654), 504 REG16(0x658), 505 REG16(0x65c), 506 REG16(0x660), 507 REG16(0x664), 508 REG16(0x668), 509 REG16(0x66c), 510 REG16(0x670), 511 REG16(0x674), 512 REG16(0x678), 513 REG16(0x67c), 514 REG(0x068), 515 REG(0x084), 516 NOP(1), 517 518 END 519 }; 520 521 static const u8 xehp_rcs_offsets[] = { 522 NOP(1), 523 LRI(13, POSTED), 524 REG16(0x244), 525 REG(0x034), 526 REG(0x030), 527 REG(0x038), 528 REG(0x03c), 529 REG(0x168), 530 REG(0x140), 531 REG(0x110), 532 REG(0x1c0), 533 REG(0x1c4), 534 REG(0x1c8), 535 REG(0x180), 536 REG16(0x2b4), 537 538 NOP(5), 539 LRI(9, POSTED), 540 REG16(0x3a8), 541 REG16(0x28c), 542 REG16(0x288), 543 REG16(0x284), 544 REG16(0x280), 545 REG16(0x27c), 546 REG16(0x278), 547 REG16(0x274), 548 REG16(0x270), 549 550 LRI(3, POSTED), 551 REG(0x1b0), 552 REG16(0x5a8), 553 REG16(0x5ac), 554 555 NOP(6), 556 LRI(1, 0), 557 REG(0x0c8), 558 559 END 560 }; 561 562 static const u8 dg2_rcs_offsets[] = { 563 NOP(1), 564 LRI(15, POSTED), 565 REG16(0x244), 566 REG(0x034), 567 REG(0x030), 568 REG(0x038), 569 REG(0x03c), 570 REG(0x168), 571 REG(0x140), 572 REG(0x110), 573 REG(0x1c0), 574 REG(0x1c4), 575 REG(0x1c8), 576 REG(0x180), 577 REG16(0x2b4), 578 REG(0x120), 579 REG(0x124), 580 581 NOP(1), 582 LRI(9, POSTED), 583 REG16(0x3a8), 584 REG16(0x28c), 585 REG16(0x288), 586 REG16(0x284), 587 REG16(0x280), 588 REG16(0x27c), 589 REG16(0x278), 590 REG16(0x274), 591 REG16(0x270), 592 593 LRI(3, POSTED), 594 REG(0x1b0), 595 REG16(0x5a8), 596 REG16(0x5ac), 597 598 NOP(6), 599 LRI(1, 0), 600 REG(0x0c8), 601 602 END 603 }; 604 605 #undef END 606 #undef REG16 607 #undef REG 608 #undef LRI 609 #undef NOP 610 611 static const u8 *reg_offsets(const struct intel_engine_cs *engine) 612 { 613 /* 614 * The gen12+ lists only have the registers we program in the basic 615 * default state. We rely on the context image using relative 616 * addressing to automatic fixup the register state between the 617 * physical engines for virtual engine. 618 */ 619 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 && 620 !intel_engine_has_relative_mmio(engine)); 621 622 if (engine->class == RENDER_CLASS) { 623 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) 624 return dg2_rcs_offsets; 625 else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 626 return xehp_rcs_offsets; 627 else if (GRAPHICS_VER(engine->i915) >= 12) 628 return gen12_rcs_offsets; 629 else if (GRAPHICS_VER(engine->i915) >= 11) 630 return gen11_rcs_offsets; 631 else if (GRAPHICS_VER(engine->i915) >= 9) 632 return gen9_rcs_offsets; 633 else 634 return gen8_rcs_offsets; 635 } else { 636 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) 637 return dg2_xcs_offsets; 638 else if (GRAPHICS_VER(engine->i915) >= 12) 639 return gen12_xcs_offsets; 640 else if (GRAPHICS_VER(engine->i915) >= 9) 641 return gen9_xcs_offsets; 642 else 643 return gen8_xcs_offsets; 644 } 645 } 646 647 static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 648 { 649 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 650 return 0x70; 651 else if (GRAPHICS_VER(engine->i915) >= 12) 652 return 0x60; 653 else if (GRAPHICS_VER(engine->i915) >= 9) 654 return 0x54; 655 else if (engine->class == RENDER_CLASS) 656 return 0x58; 657 else 658 return -1; 659 } 660 661 static int lrc_ring_gpr0(const struct intel_engine_cs *engine) 662 { 663 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 664 return 0x84; 665 else if (GRAPHICS_VER(engine->i915) >= 12) 666 return 0x74; 667 else if (GRAPHICS_VER(engine->i915) >= 9) 668 return 0x68; 669 else if (engine->class == RENDER_CLASS) 670 return 0xd8; 671 else 672 return -1; 673 } 674 675 static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 676 { 677 if (GRAPHICS_VER(engine->i915) >= 12) 678 return 0x12; 679 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS) 680 return 0x18; 681 else 682 return -1; 683 } 684 685 static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 686 { 687 int x; 688 689 x = lrc_ring_wa_bb_per_ctx(engine); 690 if (x < 0) 691 return x; 692 693 return x + 2; 694 } 695 696 static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 697 { 698 int x; 699 700 x = lrc_ring_indirect_ptr(engine); 701 if (x < 0) 702 return x; 703 704 return x + 2; 705 } 706 707 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 708 { 709 710 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 711 /* 712 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL 713 * simply to match the RCS context image layout. 714 */ 715 return 0xc6; 716 else if (engine->class != RENDER_CLASS) 717 return -1; 718 else if (GRAPHICS_VER(engine->i915) >= 12) 719 return 0xb6; 720 else if (GRAPHICS_VER(engine->i915) >= 11) 721 return 0xaa; 722 else 723 return -1; 724 } 725 726 static u32 727 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 728 { 729 switch (GRAPHICS_VER(engine->i915)) { 730 default: 731 MISSING_CASE(GRAPHICS_VER(engine->i915)); 732 fallthrough; 733 case 12: 734 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 735 case 11: 736 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 737 case 9: 738 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 739 case 8: 740 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 741 } 742 } 743 744 static void 745 lrc_setup_indirect_ctx(u32 *regs, 746 const struct intel_engine_cs *engine, 747 u32 ctx_bb_ggtt_addr, 748 u32 size) 749 { 750 GEM_BUG_ON(!size); 751 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 752 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 753 regs[lrc_ring_indirect_ptr(engine) + 1] = 754 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 755 756 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 757 regs[lrc_ring_indirect_offset(engine) + 1] = 758 lrc_ring_indirect_offset_default(engine) << 6; 759 } 760 761 static void init_common_regs(u32 * const regs, 762 const struct intel_context *ce, 763 const struct intel_engine_cs *engine, 764 bool inhibit) 765 { 766 u32 ctl; 767 768 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 769 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 770 if (inhibit) 771 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 772 if (GRAPHICS_VER(engine->i915) < 11) 773 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 774 CTX_CTRL_RS_CTX_ENABLE); 775 regs[CTX_CONTEXT_CONTROL] = ctl; 776 777 regs[CTX_TIMESTAMP] = ce->runtime.last; 778 } 779 780 static void init_wa_bb_regs(u32 * const regs, 781 const struct intel_engine_cs *engine) 782 { 783 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 784 785 if (wa_ctx->per_ctx.size) { 786 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 787 788 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 789 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 790 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 791 } 792 793 if (wa_ctx->indirect_ctx.size) { 794 lrc_setup_indirect_ctx(regs, engine, 795 i915_ggtt_offset(wa_ctx->vma) + 796 wa_ctx->indirect_ctx.offset, 797 wa_ctx->indirect_ctx.size); 798 } 799 } 800 801 static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) 802 { 803 if (i915_vm_is_4lvl(&ppgtt->vm)) { 804 /* 64b PPGTT (48bit canonical) 805 * PDP0_DESCRIPTOR contains the base address to PML4 and 806 * other PDP Descriptors are ignored. 807 */ 808 ASSIGN_CTX_PML4(ppgtt, regs); 809 } else { 810 ASSIGN_CTX_PDP(ppgtt, regs, 3); 811 ASSIGN_CTX_PDP(ppgtt, regs, 2); 812 ASSIGN_CTX_PDP(ppgtt, regs, 1); 813 ASSIGN_CTX_PDP(ppgtt, regs, 0); 814 } 815 } 816 817 static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 818 { 819 if (i915_is_ggtt(vm)) 820 return i915_vm_to_ggtt(vm)->alias; 821 else 822 return i915_vm_to_ppgtt(vm); 823 } 824 825 static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 826 { 827 int x; 828 829 x = lrc_ring_mi_mode(engine); 830 if (x != -1) { 831 regs[x + 1] &= ~STOP_RING; 832 regs[x + 1] |= STOP_RING << 16; 833 } 834 } 835 836 static void __lrc_init_regs(u32 *regs, 837 const struct intel_context *ce, 838 const struct intel_engine_cs *engine, 839 bool inhibit) 840 { 841 /* 842 * A context is actually a big batch buffer with several 843 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 844 * values we are setting here are only for the first context restore: 845 * on a subsequent save, the GPU will recreate this batchbuffer with new 846 * values (including all the missing MI_LOAD_REGISTER_IMM commands that 847 * we are not initializing here). 848 * 849 * Must keep consistent with virtual_update_register_offsets(). 850 */ 851 852 if (inhibit) 853 memset(regs, 0, PAGE_SIZE); 854 855 set_offsets(regs, reg_offsets(engine), engine, inhibit); 856 857 init_common_regs(regs, ce, engine, inhibit); 858 init_ppgtt_regs(regs, vm_alias(ce->vm)); 859 860 init_wa_bb_regs(regs, engine); 861 862 __reset_stop_ring(regs, engine); 863 } 864 865 void lrc_init_regs(const struct intel_context *ce, 866 const struct intel_engine_cs *engine, 867 bool inhibit) 868 { 869 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); 870 } 871 872 void lrc_reset_regs(const struct intel_context *ce, 873 const struct intel_engine_cs *engine) 874 { 875 __reset_stop_ring(ce->lrc_reg_state, engine); 876 } 877 878 static void 879 set_redzone(void *vaddr, const struct intel_engine_cs *engine) 880 { 881 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 882 return; 883 884 vaddr += engine->context_size; 885 886 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 887 } 888 889 static void 890 check_redzone(const void *vaddr, const struct intel_engine_cs *engine) 891 { 892 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 893 return; 894 895 vaddr += engine->context_size; 896 897 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 898 drm_err_once(&engine->i915->drm, 899 "%s context redzone overwritten!\n", 900 engine->name); 901 } 902 903 void lrc_init_state(struct intel_context *ce, 904 struct intel_engine_cs *engine, 905 void *state) 906 { 907 bool inhibit = true; 908 909 set_redzone(state, engine); 910 911 if (engine->default_state) { 912 shmem_read(engine->default_state, 0, 913 state, engine->context_size); 914 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 915 inhibit = false; 916 } 917 918 /* Clear the ppHWSP (inc. per-context counters) */ 919 memset(state, 0, PAGE_SIZE); 920 921 /* 922 * The second page of the context object contains some registers which 923 * must be set up prior to the first execution. 924 */ 925 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); 926 } 927 928 static struct i915_vma * 929 __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) 930 { 931 struct drm_i915_gem_object *obj; 932 struct i915_vma *vma; 933 u32 context_size; 934 935 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 936 937 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 938 context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 939 940 if (GRAPHICS_VER(engine->i915) == 12) { 941 ce->wa_bb_page = context_size / PAGE_SIZE; 942 context_size += PAGE_SIZE; 943 } 944 945 obj = i915_gem_object_create_lmem(engine->i915, context_size, 946 I915_BO_ALLOC_PM_VOLATILE); 947 if (IS_ERR(obj)) 948 obj = i915_gem_object_create_shmem(engine->i915, context_size); 949 if (IS_ERR(obj)) 950 return ERR_CAST(obj); 951 952 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 953 if (IS_ERR(vma)) { 954 i915_gem_object_put(obj); 955 return vma; 956 } 957 958 return vma; 959 } 960 961 static struct intel_timeline * 962 pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) 963 { 964 struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 965 966 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); 967 } 968 969 int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) 970 { 971 struct intel_ring *ring; 972 struct i915_vma *vma; 973 int err; 974 975 GEM_BUG_ON(ce->state); 976 977 vma = __lrc_alloc_state(ce, engine); 978 if (IS_ERR(vma)) 979 return PTR_ERR(vma); 980 981 ring = intel_engine_create_ring(engine, ce->ring_size); 982 if (IS_ERR(ring)) { 983 err = PTR_ERR(ring); 984 goto err_vma; 985 } 986 987 if (!page_mask_bits(ce->timeline)) { 988 struct intel_timeline *tl; 989 990 /* 991 * Use the static global HWSP for the kernel context, and 992 * a dynamically allocated cacheline for everyone else. 993 */ 994 if (unlikely(ce->timeline)) 995 tl = pinned_timeline(ce, engine); 996 else 997 tl = intel_timeline_create(engine->gt); 998 if (IS_ERR(tl)) { 999 err = PTR_ERR(tl); 1000 goto err_ring; 1001 } 1002 1003 ce->timeline = tl; 1004 } 1005 1006 ce->ring = ring; 1007 ce->state = vma; 1008 1009 return 0; 1010 1011 err_ring: 1012 intel_ring_put(ring); 1013 err_vma: 1014 i915_vma_put(vma); 1015 return err; 1016 } 1017 1018 void lrc_reset(struct intel_context *ce) 1019 { 1020 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1021 1022 intel_ring_reset(ce->ring, ce->ring->emit); 1023 1024 /* Scrub away the garbage */ 1025 lrc_init_regs(ce, ce->engine, true); 1026 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); 1027 } 1028 1029 int 1030 lrc_pre_pin(struct intel_context *ce, 1031 struct intel_engine_cs *engine, 1032 struct i915_gem_ww_ctx *ww, 1033 void **vaddr) 1034 { 1035 GEM_BUG_ON(!ce->state); 1036 GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 1037 1038 *vaddr = i915_gem_object_pin_map(ce->state->obj, 1039 i915_coherent_map_type(ce->engine->i915, 1040 ce->state->obj, 1041 false) | 1042 I915_MAP_OVERRIDE); 1043 1044 return PTR_ERR_OR_ZERO(*vaddr); 1045 } 1046 1047 int 1048 lrc_pin(struct intel_context *ce, 1049 struct intel_engine_cs *engine, 1050 void *vaddr) 1051 { 1052 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 1053 1054 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) 1055 lrc_init_state(ce, engine, vaddr); 1056 1057 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); 1058 return 0; 1059 } 1060 1061 void lrc_unpin(struct intel_context *ce) 1062 { 1063 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 1064 ce->engine); 1065 } 1066 1067 void lrc_post_unpin(struct intel_context *ce) 1068 { 1069 i915_gem_object_unpin_map(ce->state->obj); 1070 } 1071 1072 void lrc_fini(struct intel_context *ce) 1073 { 1074 if (!ce->state) 1075 return; 1076 1077 intel_ring_put(fetch_and_zero(&ce->ring)); 1078 i915_vma_put(fetch_and_zero(&ce->state)); 1079 } 1080 1081 void lrc_destroy(struct kref *kref) 1082 { 1083 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 1084 1085 GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 1086 GEM_BUG_ON(intel_context_is_pinned(ce)); 1087 1088 lrc_fini(ce); 1089 1090 intel_context_fini(ce); 1091 intel_context_free(ce); 1092 } 1093 1094 static u32 * 1095 gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 1096 { 1097 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1098 MI_SRM_LRM_GLOBAL_GTT | 1099 MI_LRI_LRM_CS_MMIO; 1100 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1101 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1102 CTX_TIMESTAMP * sizeof(u32); 1103 *cs++ = 0; 1104 1105 *cs++ = MI_LOAD_REGISTER_REG | 1106 MI_LRR_SOURCE_CS_MMIO | 1107 MI_LRI_LRM_CS_MMIO; 1108 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1109 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1110 1111 *cs++ = MI_LOAD_REGISTER_REG | 1112 MI_LRR_SOURCE_CS_MMIO | 1113 MI_LRI_LRM_CS_MMIO; 1114 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1115 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 1116 1117 return cs; 1118 } 1119 1120 static u32 * 1121 gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 1122 { 1123 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 1124 1125 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1126 MI_SRM_LRM_GLOBAL_GTT | 1127 MI_LRI_LRM_CS_MMIO; 1128 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1129 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1130 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 1131 *cs++ = 0; 1132 1133 return cs; 1134 } 1135 1136 static u32 * 1137 gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 1138 { 1139 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 1140 1141 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 1142 MI_SRM_LRM_GLOBAL_GTT | 1143 MI_LRI_LRM_CS_MMIO; 1144 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1145 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 1146 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 1147 *cs++ = 0; 1148 1149 *cs++ = MI_LOAD_REGISTER_REG | 1150 MI_LRR_SOURCE_CS_MMIO | 1151 MI_LRI_LRM_CS_MMIO; 1152 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 1153 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 1154 1155 return cs; 1156 } 1157 1158 static u32 * 1159 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 1160 { 1161 cs = gen12_emit_timestamp_wa(ce, cs); 1162 cs = gen12_emit_cmd_buf_wa(ce, cs); 1163 cs = gen12_emit_restore_scratch(ce, cs); 1164 1165 return cs; 1166 } 1167 1168 static u32 * 1169 gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 1170 { 1171 cs = gen12_emit_timestamp_wa(ce, cs); 1172 cs = gen12_emit_restore_scratch(ce, cs); 1173 1174 return cs; 1175 } 1176 1177 static u32 context_wa_bb_offset(const struct intel_context *ce) 1178 { 1179 return PAGE_SIZE * ce->wa_bb_page; 1180 } 1181 1182 static u32 *context_indirect_bb(const struct intel_context *ce) 1183 { 1184 void *ptr; 1185 1186 GEM_BUG_ON(!ce->wa_bb_page); 1187 1188 ptr = ce->lrc_reg_state; 1189 ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 1190 ptr += context_wa_bb_offset(ce); 1191 1192 return ptr; 1193 } 1194 1195 static void 1196 setup_indirect_ctx_bb(const struct intel_context *ce, 1197 const struct intel_engine_cs *engine, 1198 u32 *(*emit)(const struct intel_context *, u32 *)) 1199 { 1200 u32 * const start = context_indirect_bb(ce); 1201 u32 *cs; 1202 1203 cs = emit(ce, start); 1204 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 1205 while ((unsigned long)cs % CACHELINE_BYTES) 1206 *cs++ = MI_NOOP; 1207 1208 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, 1209 i915_ggtt_offset(ce->state) + 1210 context_wa_bb_offset(ce), 1211 (cs - start) * sizeof(*cs)); 1212 } 1213 1214 /* 1215 * The context descriptor encodes various attributes of a context, 1216 * including its GTT address and some flags. Because it's fairly 1217 * expensive to calculate, we'll just do it once and cache the result, 1218 * which remains valid until the context is unpinned. 1219 * 1220 * This is what a descriptor looks like, from LSB to MSB:: 1221 * 1222 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 1223 * bits 12-31: LRCA, GTT address of (the HWSP of) this context 1224 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 1225 * bits 53-54: mbz, reserved for use by hardware 1226 * bits 55-63: group ID, currently unused and set to 0 1227 * 1228 * Starting from Gen11, the upper dword of the descriptor has a new format: 1229 * 1230 * bits 32-36: reserved 1231 * bits 37-47: SW context ID 1232 * bits 48:53: engine instance 1233 * bit 54: mbz, reserved for use by hardware 1234 * bits 55-60: SW counter 1235 * bits 61-63: engine class 1236 * 1237 * On Xe_HP, the upper dword of the descriptor has a new format: 1238 * 1239 * bits 32-37: virtual function number 1240 * bit 38: mbz, reserved for use by hardware 1241 * bits 39-54: SW context ID 1242 * bits 55-57: reserved 1243 * bits 58-63: SW counter 1244 * 1245 * engine info, SW context ID and SW counter need to form a unique number 1246 * (Context ID) per lrc. 1247 */ 1248 static u32 lrc_descriptor(const struct intel_context *ce) 1249 { 1250 u32 desc; 1251 1252 desc = INTEL_LEGACY_32B_CONTEXT; 1253 if (i915_vm_is_4lvl(ce->vm)) 1254 desc = INTEL_LEGACY_64B_CONTEXT; 1255 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 1256 1257 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 1258 if (GRAPHICS_VER(ce->vm->i915) == 8) 1259 desc |= GEN8_CTX_L3LLC_COHERENT; 1260 1261 return i915_ggtt_offset(ce->state) | desc; 1262 } 1263 1264 u32 lrc_update_regs(const struct intel_context *ce, 1265 const struct intel_engine_cs *engine, 1266 u32 head) 1267 { 1268 struct intel_ring *ring = ce->ring; 1269 u32 *regs = ce->lrc_reg_state; 1270 1271 GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 1272 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 1273 1274 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1275 regs[CTX_RING_HEAD] = head; 1276 regs[CTX_RING_TAIL] = ring->tail; 1277 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1278 1279 /* RPCS */ 1280 if (engine->class == RENDER_CLASS) { 1281 regs[CTX_R_PWR_CLK_STATE] = 1282 intel_sseu_make_rpcs(engine->gt, &ce->sseu); 1283 1284 i915_oa_init_reg_state(ce, engine); 1285 } 1286 1287 if (ce->wa_bb_page) { 1288 u32 *(*fn)(const struct intel_context *ce, u32 *cs); 1289 1290 fn = gen12_emit_indirect_ctx_xcs; 1291 if (ce->engine->class == RENDER_CLASS) 1292 fn = gen12_emit_indirect_ctx_rcs; 1293 1294 /* Mutually exclusive wrt to global indirect bb */ 1295 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 1296 setup_indirect_ctx_bb(ce, engine, fn); 1297 } 1298 1299 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; 1300 } 1301 1302 void lrc_update_offsets(struct intel_context *ce, 1303 struct intel_engine_cs *engine) 1304 { 1305 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); 1306 } 1307 1308 void lrc_check_regs(const struct intel_context *ce, 1309 const struct intel_engine_cs *engine, 1310 const char *when) 1311 { 1312 const struct intel_ring *ring = ce->ring; 1313 u32 *regs = ce->lrc_reg_state; 1314 bool valid = true; 1315 int x; 1316 1317 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 1318 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 1319 engine->name, 1320 regs[CTX_RING_START], 1321 i915_ggtt_offset(ring->vma)); 1322 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 1323 valid = false; 1324 } 1325 1326 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 1327 (RING_CTL_SIZE(ring->size) | RING_VALID)) { 1328 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 1329 engine->name, 1330 regs[CTX_RING_CTL], 1331 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 1332 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 1333 valid = false; 1334 } 1335 1336 x = lrc_ring_mi_mode(engine); 1337 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 1338 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 1339 engine->name, regs[x + 1]); 1340 regs[x + 1] &= ~STOP_RING; 1341 regs[x + 1] |= STOP_RING << 16; 1342 valid = false; 1343 } 1344 1345 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); 1346 } 1347 1348 /* 1349 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 1350 * PIPE_CONTROL instruction. This is required for the flush to happen correctly 1351 * but there is a slight complication as this is applied in WA batch where the 1352 * values are only initialized once so we cannot take register value at the 1353 * beginning and reuse it further; hence we save its value to memory, upload a 1354 * constant value with bit21 set and then we restore it back with the saved value. 1355 * To simplify the WA, a constant value is formed by using the default value 1356 * of this register. This shouldn't be a problem because we are only modifying 1357 * it for a short period and this batch in non-premptible. We can ofcourse 1358 * use additional instructions that read the actual value of the register 1359 * at that time and set our bit of interest but it makes the WA complicated. 1360 * 1361 * This WA is also required for Gen9 so extracting as a function avoids 1362 * code duplication. 1363 */ 1364 static u32 * 1365 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 1366 { 1367 /* NB no one else is allowed to scribble over scratch + 256! */ 1368 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1369 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1370 *batch++ = intel_gt_scratch_offset(engine->gt, 1371 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1372 *batch++ = 0; 1373 1374 *batch++ = MI_LOAD_REGISTER_IMM(1); 1375 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1376 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 1377 1378 batch = gen8_emit_pipe_control(batch, 1379 PIPE_CONTROL_CS_STALL | 1380 PIPE_CONTROL_DC_FLUSH_ENABLE, 1381 0); 1382 1383 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 1384 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 1385 *batch++ = intel_gt_scratch_offset(engine->gt, 1386 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 1387 *batch++ = 0; 1388 1389 return batch; 1390 } 1391 1392 /* 1393 * Typically we only have one indirect_ctx and per_ctx batch buffer which are 1394 * initialized at the beginning and shared across all contexts but this field 1395 * helps us to have multiple batches at different offsets and select them based 1396 * on a criteria. At the moment this batch always start at the beginning of the page 1397 * and at this point we don't have multiple wa_ctx batch buffers. 1398 * 1399 * The number of WA applied are not known at the beginning; we use this field 1400 * to return the no of DWORDS written. 1401 * 1402 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 1403 * so it adds NOOPs as padding to make it cacheline aligned. 1404 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 1405 * makes a complete batch buffer. 1406 */ 1407 static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1408 { 1409 /* WaDisableCtxRestoreArbitration:bdw,chv */ 1410 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1411 1412 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 1413 if (IS_BROADWELL(engine->i915)) 1414 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1415 1416 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 1417 /* Actual scratch location is at 128 bytes offset */ 1418 batch = gen8_emit_pipe_control(batch, 1419 PIPE_CONTROL_FLUSH_L3 | 1420 PIPE_CONTROL_STORE_DATA_INDEX | 1421 PIPE_CONTROL_CS_STALL | 1422 PIPE_CONTROL_QW_WRITE, 1423 LRC_PPHWSP_SCRATCH_ADDR); 1424 1425 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1426 1427 /* Pad to end of cacheline */ 1428 while ((unsigned long)batch % CACHELINE_BYTES) 1429 *batch++ = MI_NOOP; 1430 1431 /* 1432 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 1433 * execution depends on the length specified in terms of cache lines 1434 * in the register CTX_RCS_INDIRECT_CTX 1435 */ 1436 1437 return batch; 1438 } 1439 1440 struct lri { 1441 i915_reg_t reg; 1442 u32 value; 1443 }; 1444 1445 static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 1446 { 1447 GEM_BUG_ON(!count || count > 63); 1448 1449 *batch++ = MI_LOAD_REGISTER_IMM(count); 1450 do { 1451 *batch++ = i915_mmio_reg_offset(lri->reg); 1452 *batch++ = lri->value; 1453 } while (lri++, --count); 1454 *batch++ = MI_NOOP; 1455 1456 return batch; 1457 } 1458 1459 static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 1460 { 1461 static const struct lri lri[] = { 1462 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 1463 { 1464 COMMON_SLICE_CHICKEN2, 1465 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 1466 0), 1467 }, 1468 1469 /* BSpec: 11391 */ 1470 { 1471 FF_SLICE_CHICKEN, 1472 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 1473 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 1474 }, 1475 1476 /* BSpec: 11299 */ 1477 { 1478 _3D_CHICKEN3, 1479 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 1480 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 1481 } 1482 }; 1483 1484 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 1485 1486 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 1487 batch = gen8_emit_flush_coherentl3_wa(engine, batch); 1488 1489 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 1490 batch = gen8_emit_pipe_control(batch, 1491 PIPE_CONTROL_FLUSH_L3 | 1492 PIPE_CONTROL_STORE_DATA_INDEX | 1493 PIPE_CONTROL_CS_STALL | 1494 PIPE_CONTROL_QW_WRITE, 1495 LRC_PPHWSP_SCRATCH_ADDR); 1496 1497 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 1498 1499 /* WaMediaPoolStateCmdInWABB:bxt,glk */ 1500 if (HAS_POOLED_EU(engine->i915)) { 1501 /* 1502 * EU pool configuration is setup along with golden context 1503 * during context initialization. This value depends on 1504 * device type (2x6 or 3x6) and needs to be updated based 1505 * on which subslice is disabled especially for 2x6 1506 * devices, however it is safe to load default 1507 * configuration of 3x6 device instead of masking off 1508 * corresponding bits because HW ignores bits of a disabled 1509 * subslice and drops down to appropriate config. Please 1510 * see render_state_setup() in i915_gem_render_state.c for 1511 * possible configurations, to avoid duplication they are 1512 * not shown here again. 1513 */ 1514 *batch++ = GEN9_MEDIA_POOL_STATE; 1515 *batch++ = GEN9_MEDIA_POOL_ENABLE; 1516 *batch++ = 0x00777000; 1517 *batch++ = 0; 1518 *batch++ = 0; 1519 *batch++ = 0; 1520 } 1521 1522 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 1523 1524 /* Pad to end of cacheline */ 1525 while ((unsigned long)batch % CACHELINE_BYTES) 1526 *batch++ = MI_NOOP; 1527 1528 return batch; 1529 } 1530 1531 #define CTX_WA_BB_SIZE (PAGE_SIZE) 1532 1533 static int lrc_create_wa_ctx(struct intel_engine_cs *engine) 1534 { 1535 struct drm_i915_gem_object *obj; 1536 struct i915_vma *vma; 1537 int err; 1538 1539 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); 1540 if (IS_ERR(obj)) 1541 return PTR_ERR(obj); 1542 1543 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 1544 if (IS_ERR(vma)) { 1545 err = PTR_ERR(vma); 1546 goto err; 1547 } 1548 1549 engine->wa_ctx.vma = vma; 1550 return 0; 1551 1552 err: 1553 i915_gem_object_put(obj); 1554 return err; 1555 } 1556 1557 void lrc_fini_wa_ctx(struct intel_engine_cs *engine) 1558 { 1559 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1560 } 1561 1562 typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 1563 1564 void lrc_init_wa_ctx(struct intel_engine_cs *engine) 1565 { 1566 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 1567 struct i915_wa_ctx_bb *wa_bb[] = { 1568 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx 1569 }; 1570 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; 1571 struct i915_gem_ww_ctx ww; 1572 void *batch, *batch_ptr; 1573 unsigned int i; 1574 int err; 1575 1576 if (engine->class != RENDER_CLASS) 1577 return; 1578 1579 switch (GRAPHICS_VER(engine->i915)) { 1580 case 12: 1581 case 11: 1582 return; 1583 case 9: 1584 wa_bb_fn[0] = gen9_init_indirectctx_bb; 1585 wa_bb_fn[1] = NULL; 1586 break; 1587 case 8: 1588 wa_bb_fn[0] = gen8_init_indirectctx_bb; 1589 wa_bb_fn[1] = NULL; 1590 break; 1591 default: 1592 MISSING_CASE(GRAPHICS_VER(engine->i915)); 1593 return; 1594 } 1595 1596 err = lrc_create_wa_ctx(engine); 1597 if (err) { 1598 /* 1599 * We continue even if we fail to initialize WA batch 1600 * because we only expect rare glitches but nothing 1601 * critical to prevent us from using GPU 1602 */ 1603 drm_err(&engine->i915->drm, 1604 "Ignoring context switch w/a allocation error:%d\n", 1605 err); 1606 return; 1607 } 1608 1609 if (!engine->wa_ctx.vma) 1610 return; 1611 1612 i915_gem_ww_ctx_init(&ww, true); 1613 retry: 1614 err = i915_gem_object_lock(wa_ctx->vma->obj, &ww); 1615 if (!err) 1616 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH); 1617 if (err) 1618 goto err; 1619 1620 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 1621 if (IS_ERR(batch)) { 1622 err = PTR_ERR(batch); 1623 goto err_unpin; 1624 } 1625 1626 /* 1627 * Emit the two workaround batch buffers, recording the offset from the 1628 * start of the workaround batch buffer object for each and their 1629 * respective sizes. 1630 */ 1631 batch_ptr = batch; 1632 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 1633 wa_bb[i]->offset = batch_ptr - batch; 1634 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 1635 CACHELINE_BYTES))) { 1636 err = -EINVAL; 1637 break; 1638 } 1639 if (wa_bb_fn[i]) 1640 batch_ptr = wa_bb_fn[i](engine, batch_ptr); 1641 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 1642 } 1643 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); 1644 1645 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 1646 __i915_gem_object_release_map(wa_ctx->vma->obj); 1647 1648 /* Verify that we can handle failure to setup the wa_ctx */ 1649 if (!err) 1650 err = i915_inject_probe_error(engine->i915, -ENODEV); 1651 1652 err_unpin: 1653 if (err) 1654 i915_vma_unpin(wa_ctx->vma); 1655 err: 1656 if (err == -EDEADLK) { 1657 err = i915_gem_ww_ctx_backoff(&ww); 1658 if (!err) 1659 goto retry; 1660 } 1661 i915_gem_ww_ctx_fini(&ww); 1662 1663 if (err) { 1664 i915_vma_put(engine->wa_ctx.vma); 1665 1666 /* Clear all flags to prevent further use */ 1667 memset(wa_ctx, 0, sizeof(*wa_ctx)); 1668 } 1669 } 1670 1671 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 1672 { 1673 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1674 ce->runtime.num_underflow++; 1675 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 1676 #endif 1677 } 1678 1679 void lrc_update_runtime(struct intel_context *ce) 1680 { 1681 u32 old; 1682 s32 dt; 1683 1684 if (intel_context_is_barrier(ce)) 1685 return; 1686 1687 old = ce->runtime.last; 1688 ce->runtime.last = lrc_get_runtime(ce); 1689 dt = ce->runtime.last - old; 1690 1691 if (unlikely(dt < 0)) { 1692 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 1693 old, ce->runtime.last, dt); 1694 st_update_runtime_underflow(ce, dt); 1695 return; 1696 } 1697 1698 ewma_runtime_add(&ce->runtime.avg, dt); 1699 ce->runtime.total += dt; 1700 } 1701 1702 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1703 #include "selftest_lrc.c" 1704 #endif 1705