1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include "gen2_engine_cs.h" 31 #include "gen6_engine_cs.h" 32 #include "gen6_ppgtt.h" 33 #include "gen7_renderclear.h" 34 #include "i915_drv.h" 35 #include "i915_mitigations.h" 36 #include "intel_breadcrumbs.h" 37 #include "intel_context.h" 38 #include "intel_gt.h" 39 #include "intel_reset.h" 40 #include "intel_ring.h" 41 #include "shmem_utils.h" 42 43 /* Rough estimate of the typical request size, performing a flush, 44 * set-context and then emitting the batch. 45 */ 46 #define LEGACY_REQUEST_SIZE 200 47 48 static void set_hwstam(struct intel_engine_cs *engine, u32 mask) 49 { 50 /* 51 * Keep the render interrupt unmasked as this papers over 52 * lost interrupts following a reset. 53 */ 54 if (engine->class == RENDER_CLASS) { 55 if (INTEL_GEN(engine->i915) >= 6) 56 mask &= ~BIT(0); 57 else 58 mask &= ~I915_USER_INTERRUPT; 59 } 60 61 intel_engine_set_hwsp_writemask(engine, mask); 62 } 63 64 static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys) 65 { 66 u32 addr; 67 68 addr = lower_32_bits(phys); 69 if (INTEL_GEN(engine->i915) >= 4) 70 addr |= (phys >> 28) & 0xf0; 71 72 intel_uncore_write(engine->uncore, HWS_PGA, addr); 73 } 74 75 static struct page *status_page(struct intel_engine_cs *engine) 76 { 77 struct drm_i915_gem_object *obj = engine->status_page.vma->obj; 78 79 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 80 return sg_page(obj->mm.pages->sgl); 81 } 82 83 static void ring_setup_phys_status_page(struct intel_engine_cs *engine) 84 { 85 set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine)))); 86 set_hwstam(engine, ~0u); 87 } 88 89 static void set_hwsp(struct intel_engine_cs *engine, u32 offset) 90 { 91 i915_reg_t hwsp; 92 93 /* 94 * The ring status page addresses are no longer next to the rest of 95 * the ring registers as of gen7. 96 */ 97 if (IS_GEN(engine->i915, 7)) { 98 switch (engine->id) { 99 /* 100 * No more rings exist on Gen7. Default case is only to shut up 101 * gcc switch check warning. 102 */ 103 default: 104 GEM_BUG_ON(engine->id); 105 fallthrough; 106 case RCS0: 107 hwsp = RENDER_HWS_PGA_GEN7; 108 break; 109 case BCS0: 110 hwsp = BLT_HWS_PGA_GEN7; 111 break; 112 case VCS0: 113 hwsp = BSD_HWS_PGA_GEN7; 114 break; 115 case VECS0: 116 hwsp = VEBOX_HWS_PGA_GEN7; 117 break; 118 } 119 } else if (IS_GEN(engine->i915, 6)) { 120 hwsp = RING_HWS_PGA_GEN6(engine->mmio_base); 121 } else { 122 hwsp = RING_HWS_PGA(engine->mmio_base); 123 } 124 125 intel_uncore_write_fw(engine->uncore, hwsp, offset); 126 intel_uncore_posting_read_fw(engine->uncore, hwsp); 127 } 128 129 static void flush_cs_tlb(struct intel_engine_cs *engine) 130 { 131 if (!IS_GEN_RANGE(engine->i915, 6, 7)) 132 return; 133 134 /* ring should be idle before issuing a sync flush*/ 135 GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); 136 137 ENGINE_WRITE_FW(engine, RING_INSTPM, 138 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 139 INSTPM_SYNC_FLUSH)); 140 if (__intel_wait_for_register_fw(engine->uncore, 141 RING_INSTPM(engine->mmio_base), 142 INSTPM_SYNC_FLUSH, 0, 143 2000, 0, NULL)) 144 ENGINE_TRACE(engine, 145 "wait for SyncFlush to complete for TLB invalidation timed out\n"); 146 } 147 148 static void ring_setup_status_page(struct intel_engine_cs *engine) 149 { 150 set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma)); 151 set_hwstam(engine, ~0u); 152 153 flush_cs_tlb(engine); 154 } 155 156 static struct i915_address_space *vm_alias(struct i915_address_space *vm) 157 { 158 if (i915_is_ggtt(vm)) 159 vm = &i915_vm_to_ggtt(vm)->alias->vm; 160 161 return vm; 162 } 163 164 static u32 pp_dir(struct i915_address_space *vm) 165 { 166 return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir; 167 } 168 169 static void set_pp_dir(struct intel_engine_cs *engine) 170 { 171 struct i915_address_space *vm = vm_alias(engine->gt->vm); 172 173 if (!vm) 174 return; 175 176 ENGINE_WRITE_FW(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); 177 ENGINE_WRITE_FW(engine, RING_PP_DIR_BASE, pp_dir(vm)); 178 179 if (INTEL_GEN(engine->i915) >= 7) { 180 ENGINE_WRITE_FW(engine, 181 RING_MODE_GEN7, 182 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 183 } 184 } 185 186 static int xcs_resume(struct intel_engine_cs *engine) 187 { 188 struct drm_i915_private *dev_priv = engine->i915; 189 struct intel_ring *ring = engine->legacy.ring; 190 191 ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", 192 ring->head, ring->tail); 193 194 if (HWS_NEEDS_PHYSICAL(dev_priv)) 195 ring_setup_phys_status_page(engine); 196 else 197 ring_setup_status_page(engine); 198 199 intel_breadcrumbs_reset(engine->breadcrumbs); 200 201 /* Enforce ordering by reading HEAD register back */ 202 ENGINE_POSTING_READ(engine, RING_HEAD); 203 204 /* 205 * Initialize the ring. This must happen _after_ we've cleared the ring 206 * registers with the above sequence (the readback of the HEAD registers 207 * also enforces ordering), otherwise the hw might lose the new ring 208 * register values. 209 */ 210 ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma)); 211 212 /* Check that the ring offsets point within the ring! */ 213 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); 214 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 215 intel_ring_update_space(ring); 216 217 set_pp_dir(engine); 218 219 /* First wake the ring up to an empty/idle ring */ 220 ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); 221 ENGINE_WRITE_FW(engine, RING_TAIL, ring->head); 222 ENGINE_POSTING_READ(engine, RING_TAIL); 223 224 ENGINE_WRITE_FW(engine, RING_CTL, 225 RING_CTL_SIZE(ring->size) | RING_VALID); 226 227 /* If the head is still not zero, the ring is dead */ 228 if (__intel_wait_for_register_fw(engine->uncore, 229 RING_CTL(engine->mmio_base), 230 RING_VALID, RING_VALID, 231 5000, 0, NULL)) { 232 drm_err(&dev_priv->drm, 233 "%s initialization failed; " 234 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", 235 engine->name, 236 ENGINE_READ(engine, RING_CTL), 237 ENGINE_READ(engine, RING_CTL) & RING_VALID, 238 ENGINE_READ(engine, RING_HEAD), ring->head, 239 ENGINE_READ(engine, RING_TAIL), ring->tail, 240 ENGINE_READ(engine, RING_START), 241 i915_ggtt_offset(ring->vma)); 242 return -EIO; 243 } 244 245 if (INTEL_GEN(dev_priv) > 2) 246 ENGINE_WRITE_FW(engine, 247 RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 248 249 /* Now awake, let it get started */ 250 if (ring->tail != ring->head) { 251 ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail); 252 ENGINE_POSTING_READ(engine, RING_TAIL); 253 } 254 255 /* Papering over lost _interrupts_ immediately following the restart */ 256 intel_engine_signal_breadcrumbs(engine); 257 return 0; 258 } 259 260 static void sanitize_hwsp(struct intel_engine_cs *engine) 261 { 262 struct intel_timeline *tl; 263 264 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 265 intel_timeline_reset_seqno(tl); 266 } 267 268 static void xcs_sanitize(struct intel_engine_cs *engine) 269 { 270 /* 271 * Poison residual state on resume, in case the suspend didn't! 272 * 273 * We have to assume that across suspend/resume (or other loss 274 * of control) that the contents of our pinned buffers has been 275 * lost, replaced by garbage. Since this doesn't always happen, 276 * let's poison such state so that we more quickly spot when 277 * we falsely assume it has been preserved. 278 */ 279 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 280 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 281 282 /* 283 * The kernel_context HWSP is stored in the status_page. As above, 284 * that may be lost on resume/initialisation, and so we need to 285 * reset the value in the HWSP. 286 */ 287 sanitize_hwsp(engine); 288 289 /* And scrub the dirty cachelines for the HWSP */ 290 clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 291 } 292 293 static bool stop_ring(struct intel_engine_cs *engine) 294 { 295 /* Empty the ring by skipping to the end */ 296 ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); 297 ENGINE_POSTING_READ(engine, RING_HEAD); 298 299 /* The ring must be empty before it is disabled */ 300 ENGINE_WRITE_FW(engine, RING_CTL, 0); 301 ENGINE_POSTING_READ(engine, RING_CTL); 302 303 /* Then reset the disabled ring */ 304 ENGINE_WRITE_FW(engine, RING_HEAD, 0); 305 ENGINE_WRITE_FW(engine, RING_TAIL, 0); 306 307 return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; 308 } 309 310 static void reset_prepare(struct intel_engine_cs *engine) 311 { 312 /* 313 * We stop engines, otherwise we might get failed reset and a 314 * dead gpu (on elk). Also as modern gpu as kbl can suffer 315 * from system hang if batchbuffer is progressing when 316 * the reset is issued, regardless of READY_TO_RESET ack. 317 * Thus assume it is best to stop engines on all gens 318 * where we have a gpu reset. 319 * 320 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 321 * 322 * WaMediaResetMainRingCleanup:ctg,elk (presumably) 323 * WaClearRingBufHeadRegAtInit:ctg,elk 324 * 325 * FIXME: Wa for more modern gens needs to be validated 326 */ 327 ENGINE_TRACE(engine, "\n"); 328 intel_engine_stop_cs(engine); 329 330 if (!stop_ring(engine)) { 331 /* G45 ring initialization often fails to reset head to zero */ 332 drm_dbg(&engine->i915->drm, 333 "%s head not reset to zero " 334 "ctl %08x head %08x tail %08x start %08x\n", 335 engine->name, 336 ENGINE_READ_FW(engine, RING_CTL), 337 ENGINE_READ_FW(engine, RING_HEAD), 338 ENGINE_READ_FW(engine, RING_TAIL), 339 ENGINE_READ_FW(engine, RING_START)); 340 } 341 342 if (!stop_ring(engine)) { 343 drm_err(&engine->i915->drm, 344 "failed to set %s head to zero " 345 "ctl %08x head %08x tail %08x start %08x\n", 346 engine->name, 347 ENGINE_READ_FW(engine, RING_CTL), 348 ENGINE_READ_FW(engine, RING_HEAD), 349 ENGINE_READ_FW(engine, RING_TAIL), 350 ENGINE_READ_FW(engine, RING_START)); 351 } 352 } 353 354 static void reset_rewind(struct intel_engine_cs *engine, bool stalled) 355 { 356 struct i915_request *pos, *rq; 357 unsigned long flags; 358 u32 head; 359 360 rq = NULL; 361 spin_lock_irqsave(&engine->active.lock, flags); 362 rcu_read_lock(); 363 list_for_each_entry(pos, &engine->active.requests, sched.link) { 364 if (!__i915_request_is_complete(pos)) { 365 rq = pos; 366 break; 367 } 368 } 369 rcu_read_unlock(); 370 371 /* 372 * The guilty request will get skipped on a hung engine. 373 * 374 * Users of client default contexts do not rely on logical 375 * state preserved between batches so it is safe to execute 376 * queued requests following the hang. Non default contexts 377 * rely on preserved state, so skipping a batch loses the 378 * evolution of the state and it needs to be considered corrupted. 379 * Executing more queued batches on top of corrupted state is 380 * risky. But we take the risk by trying to advance through 381 * the queued requests in order to make the client behaviour 382 * more predictable around resets, by not throwing away random 383 * amount of batches it has prepared for execution. Sophisticated 384 * clients can use gem_reset_stats_ioctl and dma fence status 385 * (exported via sync_file info ioctl on explicit fences) to observe 386 * when it loses the context state and should rebuild accordingly. 387 * 388 * The context ban, and ultimately the client ban, mechanism are safety 389 * valves if client submission ends up resulting in nothing more than 390 * subsequent hangs. 391 */ 392 393 if (rq) { 394 /* 395 * Try to restore the logical GPU state to match the 396 * continuation of the request queue. If we skip the 397 * context/PD restore, then the next request may try to execute 398 * assuming that its context is valid and loaded on the GPU and 399 * so may try to access invalid memory, prompting repeated GPU 400 * hangs. 401 * 402 * If the request was guilty, we still restore the logical 403 * state in case the next request requires it (e.g. the 404 * aliasing ppgtt), but skip over the hung batch. 405 * 406 * If the request was innocent, we try to replay the request 407 * with the restored context. 408 */ 409 __i915_request_reset(rq, stalled); 410 411 GEM_BUG_ON(rq->ring != engine->legacy.ring); 412 head = rq->head; 413 } else { 414 head = engine->legacy.ring->tail; 415 } 416 engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); 417 418 spin_unlock_irqrestore(&engine->active.lock, flags); 419 } 420 421 static void reset_finish(struct intel_engine_cs *engine) 422 { 423 } 424 425 static void reset_cancel(struct intel_engine_cs *engine) 426 { 427 struct i915_request *request; 428 unsigned long flags; 429 430 spin_lock_irqsave(&engine->active.lock, flags); 431 432 /* Mark all submitted requests as skipped. */ 433 list_for_each_entry(request, &engine->active.requests, sched.link) 434 i915_request_mark_eio(request); 435 intel_engine_signal_breadcrumbs(engine); 436 437 /* Remaining _unready_ requests will be nop'ed when submitted */ 438 439 spin_unlock_irqrestore(&engine->active.lock, flags); 440 } 441 442 static void i9xx_submit_request(struct i915_request *request) 443 { 444 i915_request_submit(request); 445 wmb(); /* paranoid flush writes out of the WCB before mmio */ 446 447 ENGINE_WRITE(request->engine, RING_TAIL, 448 intel_ring_set_tail(request->ring, request->tail)); 449 } 450 451 static void __ring_context_fini(struct intel_context *ce) 452 { 453 i915_vma_put(ce->state); 454 } 455 456 static void ring_context_destroy(struct kref *ref) 457 { 458 struct intel_context *ce = container_of(ref, typeof(*ce), ref); 459 460 GEM_BUG_ON(intel_context_is_pinned(ce)); 461 462 if (ce->state) 463 __ring_context_fini(ce); 464 465 intel_context_fini(ce); 466 intel_context_free(ce); 467 } 468 469 static int ring_context_pre_pin(struct intel_context *ce, 470 struct i915_gem_ww_ctx *ww, 471 void **unused) 472 { 473 struct i915_address_space *vm; 474 int err = 0; 475 476 vm = vm_alias(ce->vm); 477 if (vm) 478 err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); 479 480 return err; 481 } 482 483 static void __context_unpin_ppgtt(struct intel_context *ce) 484 { 485 struct i915_address_space *vm; 486 487 vm = vm_alias(ce->vm); 488 if (vm) 489 gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); 490 } 491 492 static void ring_context_unpin(struct intel_context *ce) 493 { 494 } 495 496 static void ring_context_post_unpin(struct intel_context *ce) 497 { 498 __context_unpin_ppgtt(ce); 499 } 500 501 static struct i915_vma * 502 alloc_context_vma(struct intel_engine_cs *engine) 503 { 504 struct drm_i915_private *i915 = engine->i915; 505 struct drm_i915_gem_object *obj; 506 struct i915_vma *vma; 507 int err; 508 509 obj = i915_gem_object_create_shmem(i915, engine->context_size); 510 if (IS_ERR(obj)) 511 return ERR_CAST(obj); 512 513 /* 514 * Try to make the context utilize L3 as well as LLC. 515 * 516 * On VLV we don't have L3 controls in the PTEs so we 517 * shouldn't touch the cache level, especially as that 518 * would make the object snooped which might have a 519 * negative performance impact. 520 * 521 * Snooping is required on non-llc platforms in execlist 522 * mode, but since all GGTT accesses use PAT entry 0 we 523 * get snooping anyway regardless of cache_level. 524 * 525 * This is only applicable for Ivy Bridge devices since 526 * later platforms don't have L3 control bits in the PTE. 527 */ 528 if (IS_IVYBRIDGE(i915)) 529 i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); 530 531 if (engine->default_state) { 532 void *vaddr; 533 534 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 535 if (IS_ERR(vaddr)) { 536 err = PTR_ERR(vaddr); 537 goto err_obj; 538 } 539 540 shmem_read(engine->default_state, 0, 541 vaddr, engine->context_size); 542 543 i915_gem_object_flush_map(obj); 544 __i915_gem_object_release_map(obj); 545 } 546 547 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 548 if (IS_ERR(vma)) { 549 err = PTR_ERR(vma); 550 goto err_obj; 551 } 552 553 return vma; 554 555 err_obj: 556 i915_gem_object_put(obj); 557 return ERR_PTR(err); 558 } 559 560 static int ring_context_alloc(struct intel_context *ce) 561 { 562 struct intel_engine_cs *engine = ce->engine; 563 564 /* One ringbuffer to rule them all */ 565 GEM_BUG_ON(!engine->legacy.ring); 566 ce->ring = engine->legacy.ring; 567 ce->timeline = intel_timeline_get(engine->legacy.timeline); 568 569 GEM_BUG_ON(ce->state); 570 if (engine->context_size) { 571 struct i915_vma *vma; 572 573 vma = alloc_context_vma(engine); 574 if (IS_ERR(vma)) 575 return PTR_ERR(vma); 576 577 ce->state = vma; 578 if (engine->default_state) 579 __set_bit(CONTEXT_VALID_BIT, &ce->flags); 580 } 581 582 return 0; 583 } 584 585 static int ring_context_pin(struct intel_context *ce, void *unused) 586 { 587 return 0; 588 } 589 590 static void ring_context_reset(struct intel_context *ce) 591 { 592 intel_ring_reset(ce->ring, ce->ring->emit); 593 clear_bit(CONTEXT_VALID_BIT, &ce->flags); 594 } 595 596 static const struct intel_context_ops ring_context_ops = { 597 .alloc = ring_context_alloc, 598 599 .pre_pin = ring_context_pre_pin, 600 .pin = ring_context_pin, 601 .unpin = ring_context_unpin, 602 .post_unpin = ring_context_post_unpin, 603 604 .enter = intel_context_enter_engine, 605 .exit = intel_context_exit_engine, 606 607 .reset = ring_context_reset, 608 .destroy = ring_context_destroy, 609 }; 610 611 static int load_pd_dir(struct i915_request *rq, 612 struct i915_address_space *vm, 613 u32 valid) 614 { 615 const struct intel_engine_cs * const engine = rq->engine; 616 u32 *cs; 617 618 cs = intel_ring_begin(rq, 12); 619 if (IS_ERR(cs)) 620 return PTR_ERR(cs); 621 622 *cs++ = MI_LOAD_REGISTER_IMM(1); 623 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); 624 *cs++ = valid; 625 626 *cs++ = MI_LOAD_REGISTER_IMM(1); 627 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); 628 *cs++ = pp_dir(vm); 629 630 /* Stall until the page table load is complete? */ 631 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 632 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); 633 *cs++ = intel_gt_scratch_offset(engine->gt, 634 INTEL_GT_SCRATCH_FIELD_DEFAULT); 635 636 *cs++ = MI_LOAD_REGISTER_IMM(1); 637 *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base)); 638 *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE); 639 640 intel_ring_advance(rq, cs); 641 642 return rq->engine->emit_flush(rq, EMIT_FLUSH); 643 } 644 645 static int mi_set_context(struct i915_request *rq, 646 struct intel_context *ce, 647 u32 flags) 648 { 649 struct intel_engine_cs *engine = rq->engine; 650 struct drm_i915_private *i915 = engine->i915; 651 enum intel_engine_id id; 652 const int num_engines = 653 IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; 654 bool force_restore = false; 655 int len; 656 u32 *cs; 657 658 len = 4; 659 if (IS_GEN(i915, 7)) 660 len += 2 + (num_engines ? 4 * num_engines + 6 : 0); 661 else if (IS_GEN(i915, 5)) 662 len += 2; 663 if (flags & MI_FORCE_RESTORE) { 664 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); 665 flags &= ~MI_FORCE_RESTORE; 666 force_restore = true; 667 len += 2; 668 } 669 670 cs = intel_ring_begin(rq, len); 671 if (IS_ERR(cs)) 672 return PTR_ERR(cs); 673 674 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 675 if (IS_GEN(i915, 7)) { 676 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 677 if (num_engines) { 678 struct intel_engine_cs *signaller; 679 680 *cs++ = MI_LOAD_REGISTER_IMM(num_engines); 681 for_each_engine(signaller, engine->gt, id) { 682 if (signaller == engine) 683 continue; 684 685 *cs++ = i915_mmio_reg_offset( 686 RING_PSMI_CTL(signaller->mmio_base)); 687 *cs++ = _MASKED_BIT_ENABLE( 688 GEN6_PSMI_SLEEP_MSG_DISABLE); 689 } 690 } 691 } else if (IS_GEN(i915, 5)) { 692 /* 693 * This w/a is only listed for pre-production ilk a/b steppings, 694 * but is also mentioned for programming the powerctx. To be 695 * safe, just apply the workaround; we do not use SyncFlush so 696 * this should never take effect and so be a no-op! 697 */ 698 *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; 699 } 700 701 if (force_restore) { 702 /* 703 * The HW doesn't handle being told to restore the current 704 * context very well. Quite often it likes goes to go off and 705 * sulk, especially when it is meant to be reloading PP_DIR. 706 * A very simple fix to force the reload is to simply switch 707 * away from the current context and back again. 708 * 709 * Note that the kernel_context will contain random state 710 * following the INHIBIT_RESTORE. We accept this since we 711 * never use the kernel_context state; it is merely a 712 * placeholder we use to flush other contexts. 713 */ 714 *cs++ = MI_SET_CONTEXT; 715 *cs++ = i915_ggtt_offset(engine->kernel_context->state) | 716 MI_MM_SPACE_GTT | 717 MI_RESTORE_INHIBIT; 718 } 719 720 *cs++ = MI_NOOP; 721 *cs++ = MI_SET_CONTEXT; 722 *cs++ = i915_ggtt_offset(ce->state) | flags; 723 /* 724 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 725 * WaMiSetContext_Hang:snb,ivb,vlv 726 */ 727 *cs++ = MI_NOOP; 728 729 if (IS_GEN(i915, 7)) { 730 if (num_engines) { 731 struct intel_engine_cs *signaller; 732 i915_reg_t last_reg = {}; /* keep gcc quiet */ 733 734 *cs++ = MI_LOAD_REGISTER_IMM(num_engines); 735 for_each_engine(signaller, engine->gt, id) { 736 if (signaller == engine) 737 continue; 738 739 last_reg = RING_PSMI_CTL(signaller->mmio_base); 740 *cs++ = i915_mmio_reg_offset(last_reg); 741 *cs++ = _MASKED_BIT_DISABLE( 742 GEN6_PSMI_SLEEP_MSG_DISABLE); 743 } 744 745 /* Insert a delay before the next switch! */ 746 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; 747 *cs++ = i915_mmio_reg_offset(last_reg); 748 *cs++ = intel_gt_scratch_offset(engine->gt, 749 INTEL_GT_SCRATCH_FIELD_DEFAULT); 750 *cs++ = MI_NOOP; 751 } 752 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 753 } else if (IS_GEN(i915, 5)) { 754 *cs++ = MI_SUSPEND_FLUSH; 755 } 756 757 intel_ring_advance(rq, cs); 758 759 return 0; 760 } 761 762 static int remap_l3_slice(struct i915_request *rq, int slice) 763 { 764 u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice]; 765 int i; 766 767 if (!remap_info) 768 return 0; 769 770 cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); 771 if (IS_ERR(cs)) 772 return PTR_ERR(cs); 773 774 /* 775 * Note: We do not worry about the concurrent register cacheline hang 776 * here because no other code should access these registers other than 777 * at initialization time. 778 */ 779 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); 780 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 781 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); 782 *cs++ = remap_info[i]; 783 } 784 *cs++ = MI_NOOP; 785 intel_ring_advance(rq, cs); 786 787 return 0; 788 } 789 790 static int remap_l3(struct i915_request *rq) 791 { 792 struct i915_gem_context *ctx = i915_request_gem_context(rq); 793 int i, err; 794 795 if (!ctx || !ctx->remap_slice) 796 return 0; 797 798 for (i = 0; i < MAX_L3_SLICES; i++) { 799 if (!(ctx->remap_slice & BIT(i))) 800 continue; 801 802 err = remap_l3_slice(rq, i); 803 if (err) 804 return err; 805 } 806 807 ctx->remap_slice = 0; 808 return 0; 809 } 810 811 static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) 812 { 813 int ret; 814 815 if (!vm) 816 return 0; 817 818 ret = rq->engine->emit_flush(rq, EMIT_FLUSH); 819 if (ret) 820 return ret; 821 822 /* 823 * Not only do we need a full barrier (post-sync write) after 824 * invalidating the TLBs, but we need to wait a little bit 825 * longer. Whether this is merely delaying us, or the 826 * subsequent flush is a key part of serialising with the 827 * post-sync op, this extra pass appears vital before a 828 * mm switch! 829 */ 830 ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); 831 if (ret) 832 return ret; 833 834 return rq->engine->emit_flush(rq, EMIT_INVALIDATE); 835 } 836 837 static int clear_residuals(struct i915_request *rq) 838 { 839 struct intel_engine_cs *engine = rq->engine; 840 int ret; 841 842 ret = switch_mm(rq, vm_alias(engine->kernel_context->vm)); 843 if (ret) 844 return ret; 845 846 if (engine->kernel_context->state) { 847 ret = mi_set_context(rq, 848 engine->kernel_context, 849 MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); 850 if (ret) 851 return ret; 852 } 853 854 ret = engine->emit_bb_start(rq, 855 engine->wa_ctx.vma->node.start, 0, 856 0); 857 if (ret) 858 return ret; 859 860 ret = engine->emit_flush(rq, EMIT_FLUSH); 861 if (ret) 862 return ret; 863 864 /* Always invalidate before the next switch_mm() */ 865 return engine->emit_flush(rq, EMIT_INVALIDATE); 866 } 867 868 static int switch_context(struct i915_request *rq) 869 { 870 struct intel_engine_cs *engine = rq->engine; 871 struct intel_context *ce = rq->context; 872 void **residuals = NULL; 873 int ret; 874 875 GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); 876 877 if (engine->wa_ctx.vma && ce != engine->kernel_context) { 878 if (engine->wa_ctx.vma->private != ce && 879 i915_mitigate_clear_residuals()) { 880 ret = clear_residuals(rq); 881 if (ret) 882 return ret; 883 884 residuals = &engine->wa_ctx.vma->private; 885 } 886 } 887 888 ret = switch_mm(rq, vm_alias(ce->vm)); 889 if (ret) 890 return ret; 891 892 if (ce->state) { 893 u32 flags; 894 895 GEM_BUG_ON(engine->id != RCS0); 896 897 /* For resource streamer on HSW+ and power context elsewhere */ 898 BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); 899 BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); 900 901 flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; 902 if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) 903 flags |= MI_RESTORE_EXT_STATE_EN; 904 else 905 flags |= MI_RESTORE_INHIBIT; 906 907 ret = mi_set_context(rq, ce, flags); 908 if (ret) 909 return ret; 910 } 911 912 ret = remap_l3(rq); 913 if (ret) 914 return ret; 915 916 /* 917 * Now past the point of no return, this request _will_ be emitted. 918 * 919 * Or at least this preamble will be emitted, the request may be 920 * interrupted prior to submitting the user payload. If so, we 921 * still submit the "empty" request in order to preserve global 922 * state tracking such as this, our tracking of the current 923 * dirty context. 924 */ 925 if (residuals) { 926 intel_context_put(*residuals); 927 *residuals = intel_context_get(ce); 928 } 929 930 return 0; 931 } 932 933 static int ring_request_alloc(struct i915_request *request) 934 { 935 int ret; 936 937 GEM_BUG_ON(!intel_context_is_pinned(request->context)); 938 GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); 939 940 /* 941 * Flush enough space to reduce the likelihood of waiting after 942 * we start building the request - in which case we will just 943 * have to repeat work. 944 */ 945 request->reserved_space += LEGACY_REQUEST_SIZE; 946 947 /* Unconditionally invalidate GPU caches and TLBs. */ 948 ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 949 if (ret) 950 return ret; 951 952 ret = switch_context(request); 953 if (ret) 954 return ret; 955 956 request->reserved_space -= LEGACY_REQUEST_SIZE; 957 return 0; 958 } 959 960 static void gen6_bsd_submit_request(struct i915_request *request) 961 { 962 struct intel_uncore *uncore = request->engine->uncore; 963 964 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 965 966 /* Every tail move must follow the sequence below */ 967 968 /* Disable notification that the ring is IDLE. The GT 969 * will then assume that it is busy and bring it out of rc6. 970 */ 971 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, 972 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 973 974 /* Clear the context id. Here be magic! */ 975 intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0); 976 977 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 978 if (__intel_wait_for_register_fw(uncore, 979 GEN6_BSD_SLEEP_PSMI_CONTROL, 980 GEN6_BSD_SLEEP_INDICATOR, 981 0, 982 1000, 0, NULL)) 983 drm_err(&uncore->i915->drm, 984 "timed out waiting for the BSD ring to wake up\n"); 985 986 /* Now that the ring is fully powered up, update the tail */ 987 i9xx_submit_request(request); 988 989 /* Let the ring send IDLE messages to the GT again, 990 * and so let it sleep to conserve power when idle. 991 */ 992 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL, 993 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 994 995 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 996 } 997 998 static void i9xx_set_default_submission(struct intel_engine_cs *engine) 999 { 1000 engine->submit_request = i9xx_submit_request; 1001 1002 engine->park = NULL; 1003 engine->unpark = NULL; 1004 } 1005 1006 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) 1007 { 1008 i9xx_set_default_submission(engine); 1009 engine->submit_request = gen6_bsd_submit_request; 1010 } 1011 1012 static void ring_release(struct intel_engine_cs *engine) 1013 { 1014 struct drm_i915_private *dev_priv = engine->i915; 1015 1016 drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 && 1017 (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); 1018 1019 intel_engine_cleanup_common(engine); 1020 1021 if (engine->wa_ctx.vma) { 1022 intel_context_put(engine->wa_ctx.vma->private); 1023 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 1024 } 1025 1026 intel_ring_unpin(engine->legacy.ring); 1027 intel_ring_put(engine->legacy.ring); 1028 1029 intel_timeline_unpin(engine->legacy.timeline); 1030 intel_timeline_put(engine->legacy.timeline); 1031 } 1032 1033 static void setup_irq(struct intel_engine_cs *engine) 1034 { 1035 struct drm_i915_private *i915 = engine->i915; 1036 1037 if (INTEL_GEN(i915) >= 6) { 1038 engine->irq_enable = gen6_irq_enable; 1039 engine->irq_disable = gen6_irq_disable; 1040 } else if (INTEL_GEN(i915) >= 5) { 1041 engine->irq_enable = gen5_irq_enable; 1042 engine->irq_disable = gen5_irq_disable; 1043 } else if (INTEL_GEN(i915) >= 3) { 1044 engine->irq_enable = gen3_irq_enable; 1045 engine->irq_disable = gen3_irq_disable; 1046 } else { 1047 engine->irq_enable = gen2_irq_enable; 1048 engine->irq_disable = gen2_irq_disable; 1049 } 1050 } 1051 1052 static void setup_common(struct intel_engine_cs *engine) 1053 { 1054 struct drm_i915_private *i915 = engine->i915; 1055 1056 /* gen8+ are only supported with execlists */ 1057 GEM_BUG_ON(INTEL_GEN(i915) >= 8); 1058 1059 setup_irq(engine); 1060 1061 engine->resume = xcs_resume; 1062 engine->sanitize = xcs_sanitize; 1063 1064 engine->reset.prepare = reset_prepare; 1065 engine->reset.rewind = reset_rewind; 1066 engine->reset.cancel = reset_cancel; 1067 engine->reset.finish = reset_finish; 1068 1069 engine->cops = &ring_context_ops; 1070 engine->request_alloc = ring_request_alloc; 1071 1072 /* 1073 * Using a global execution timeline; the previous final breadcrumb is 1074 * equivalent to our next initial bread so we can elide 1075 * engine->emit_init_breadcrumb(). 1076 */ 1077 engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; 1078 if (IS_GEN(i915, 5)) 1079 engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; 1080 1081 engine->set_default_submission = i9xx_set_default_submission; 1082 1083 if (INTEL_GEN(i915) >= 6) 1084 engine->emit_bb_start = gen6_emit_bb_start; 1085 else if (INTEL_GEN(i915) >= 4) 1086 engine->emit_bb_start = gen4_emit_bb_start; 1087 else if (IS_I830(i915) || IS_I845G(i915)) 1088 engine->emit_bb_start = i830_emit_bb_start; 1089 else 1090 engine->emit_bb_start = gen3_emit_bb_start; 1091 } 1092 1093 static void setup_rcs(struct intel_engine_cs *engine) 1094 { 1095 struct drm_i915_private *i915 = engine->i915; 1096 1097 if (HAS_L3_DPF(i915)) 1098 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; 1099 1100 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 1101 1102 if (INTEL_GEN(i915) >= 7) { 1103 engine->emit_flush = gen7_emit_flush_rcs; 1104 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; 1105 } else if (IS_GEN(i915, 6)) { 1106 engine->emit_flush = gen6_emit_flush_rcs; 1107 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; 1108 } else if (IS_GEN(i915, 5)) { 1109 engine->emit_flush = gen4_emit_flush_rcs; 1110 } else { 1111 if (INTEL_GEN(i915) < 4) 1112 engine->emit_flush = gen2_emit_flush; 1113 else 1114 engine->emit_flush = gen4_emit_flush_rcs; 1115 engine->irq_enable_mask = I915_USER_INTERRUPT; 1116 } 1117 1118 if (IS_HASWELL(i915)) 1119 engine->emit_bb_start = hsw_emit_bb_start; 1120 } 1121 1122 static void setup_vcs(struct intel_engine_cs *engine) 1123 { 1124 struct drm_i915_private *i915 = engine->i915; 1125 1126 if (INTEL_GEN(i915) >= 6) { 1127 /* gen6 bsd needs a special wa for tail updates */ 1128 if (IS_GEN(i915, 6)) 1129 engine->set_default_submission = gen6_bsd_set_default_submission; 1130 engine->emit_flush = gen6_emit_flush_vcs; 1131 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1132 1133 if (IS_GEN(i915, 6)) 1134 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; 1135 else 1136 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1137 } else { 1138 engine->emit_flush = gen4_emit_flush_vcs; 1139 if (IS_GEN(i915, 5)) 1140 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 1141 else 1142 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1143 } 1144 } 1145 1146 static void setup_bcs(struct intel_engine_cs *engine) 1147 { 1148 struct drm_i915_private *i915 = engine->i915; 1149 1150 engine->emit_flush = gen6_emit_flush_xcs; 1151 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; 1152 1153 if (IS_GEN(i915, 6)) 1154 engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; 1155 else 1156 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1157 } 1158 1159 static void setup_vecs(struct intel_engine_cs *engine) 1160 { 1161 struct drm_i915_private *i915 = engine->i915; 1162 1163 GEM_BUG_ON(INTEL_GEN(i915) < 7); 1164 1165 engine->emit_flush = gen6_emit_flush_xcs; 1166 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 1167 engine->irq_enable = hsw_irq_enable_vecs; 1168 engine->irq_disable = hsw_irq_disable_vecs; 1169 1170 engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; 1171 } 1172 1173 static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, 1174 struct i915_vma * const vma) 1175 { 1176 return gen7_setup_clear_gpr_bb(engine, vma); 1177 } 1178 1179 static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) 1180 { 1181 struct drm_i915_gem_object *obj; 1182 struct i915_vma *vma; 1183 int size; 1184 int err; 1185 1186 size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); 1187 if (size <= 0) 1188 return size; 1189 1190 size = ALIGN(size, PAGE_SIZE); 1191 obj = i915_gem_object_create_internal(engine->i915, size); 1192 if (IS_ERR(obj)) 1193 return PTR_ERR(obj); 1194 1195 vma = i915_vma_instance(obj, engine->gt->vm, NULL); 1196 if (IS_ERR(vma)) { 1197 err = PTR_ERR(vma); 1198 goto err_obj; 1199 } 1200 1201 vma->private = intel_context_create(engine); /* dummy residuals */ 1202 if (IS_ERR(vma->private)) { 1203 err = PTR_ERR(vma->private); 1204 goto err_obj; 1205 } 1206 1207 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); 1208 if (err) 1209 goto err_private; 1210 1211 err = i915_vma_sync(vma); 1212 if (err) 1213 goto err_unpin; 1214 1215 err = gen7_ctx_switch_bb_setup(engine, vma); 1216 if (err) 1217 goto err_unpin; 1218 1219 engine->wa_ctx.vma = vma; 1220 return 0; 1221 1222 err_unpin: 1223 i915_vma_unpin(vma); 1224 err_private: 1225 intel_context_put(vma->private); 1226 err_obj: 1227 i915_gem_object_put(obj); 1228 return err; 1229 } 1230 1231 int intel_ring_submission_setup(struct intel_engine_cs *engine) 1232 { 1233 struct intel_timeline *timeline; 1234 struct intel_ring *ring; 1235 int err; 1236 1237 setup_common(engine); 1238 1239 switch (engine->class) { 1240 case RENDER_CLASS: 1241 setup_rcs(engine); 1242 break; 1243 case VIDEO_DECODE_CLASS: 1244 setup_vcs(engine); 1245 break; 1246 case COPY_ENGINE_CLASS: 1247 setup_bcs(engine); 1248 break; 1249 case VIDEO_ENHANCEMENT_CLASS: 1250 setup_vecs(engine); 1251 break; 1252 default: 1253 MISSING_CASE(engine->class); 1254 return -ENODEV; 1255 } 1256 1257 timeline = intel_timeline_create_from_engine(engine, 1258 I915_GEM_HWS_SEQNO_ADDR); 1259 if (IS_ERR(timeline)) { 1260 err = PTR_ERR(timeline); 1261 goto err; 1262 } 1263 GEM_BUG_ON(timeline->has_initial_breadcrumb); 1264 1265 err = intel_timeline_pin(timeline, NULL); 1266 if (err) 1267 goto err_timeline; 1268 1269 ring = intel_engine_create_ring(engine, SZ_16K); 1270 if (IS_ERR(ring)) { 1271 err = PTR_ERR(ring); 1272 goto err_timeline_unpin; 1273 } 1274 1275 err = intel_ring_pin(ring, NULL); 1276 if (err) 1277 goto err_ring; 1278 1279 GEM_BUG_ON(engine->legacy.ring); 1280 engine->legacy.ring = ring; 1281 engine->legacy.timeline = timeline; 1282 1283 GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); 1284 1285 if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { 1286 err = gen7_ctx_switch_bb_init(engine); 1287 if (err) 1288 goto err_ring_unpin; 1289 } 1290 1291 /* Finally, take ownership and responsibility for cleanup! */ 1292 engine->release = ring_release; 1293 1294 return 0; 1295 1296 err_ring_unpin: 1297 intel_ring_unpin(ring); 1298 err_ring: 1299 intel_ring_put(ring); 1300 err_timeline_unpin: 1301 intel_timeline_unpin(timeline); 1302 err_timeline: 1303 intel_timeline_put(timeline); 1304 err: 1305 intel_engine_cleanup_common(engine); 1306 return err; 1307 } 1308 1309 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1310 #include "selftest_ring_submission.c" 1311 #endif 1312