1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include "gen8_engine_cs.h" 7 #include "i915_drv.h" 8 #include "intel_gpu_commands.h" 9 #include "intel_lrc.h" 10 #include "intel_ring.h" 11 12 int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) 13 { 14 bool vf_flush_wa = false, dc_flush_wa = false; 15 u32 *cs, flags = 0; 16 int len; 17 18 flags |= PIPE_CONTROL_CS_STALL; 19 20 if (mode & EMIT_FLUSH) { 21 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 22 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 23 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 24 flags |= PIPE_CONTROL_FLUSH_ENABLE; 25 } 26 27 if (mode & EMIT_INVALIDATE) { 28 flags |= PIPE_CONTROL_TLB_INVALIDATE; 29 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 30 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 31 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 32 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 33 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 34 flags |= PIPE_CONTROL_QW_WRITE; 35 flags |= PIPE_CONTROL_STORE_DATA_INDEX; 36 37 /* 38 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL 39 * pipe control. 40 */ 41 if (GRAPHICS_VER(rq->engine->i915) == 9) 42 vf_flush_wa = true; 43 44 /* WaForGAMHang:kbl */ 45 if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) 46 dc_flush_wa = true; 47 } 48 49 len = 6; 50 51 if (vf_flush_wa) 52 len += 6; 53 54 if (dc_flush_wa) 55 len += 12; 56 57 cs = intel_ring_begin(rq, len); 58 if (IS_ERR(cs)) 59 return PTR_ERR(cs); 60 61 if (vf_flush_wa) 62 cs = gen8_emit_pipe_control(cs, 0, 0); 63 64 if (dc_flush_wa) 65 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 66 0); 67 68 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 69 70 if (dc_flush_wa) 71 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); 72 73 intel_ring_advance(rq, cs); 74 75 return 0; 76 } 77 78 int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode) 79 { 80 u32 cmd, *cs; 81 82 cs = intel_ring_begin(rq, 4); 83 if (IS_ERR(cs)) 84 return PTR_ERR(cs); 85 86 cmd = MI_FLUSH_DW + 1; 87 88 /* 89 * We always require a command barrier so that subsequent 90 * commands, such as breadcrumb interrupts, are strictly ordered 91 * wrt the contents of the write cache being flushed to memory 92 * (and thus being coherent from the CPU). 93 */ 94 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 95 96 if (mode & EMIT_INVALIDATE) { 97 cmd |= MI_INVALIDATE_TLB; 98 if (rq->engine->class == VIDEO_DECODE_CLASS) 99 cmd |= MI_INVALIDATE_BSD; 100 } 101 102 *cs++ = cmd; 103 *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 104 *cs++ = 0; /* upper addr */ 105 *cs++ = 0; /* value */ 106 intel_ring_advance(rq, cs); 107 108 return 0; 109 } 110 111 int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode) 112 { 113 if (mode & EMIT_FLUSH) { 114 u32 *cs; 115 u32 flags = 0; 116 117 flags |= PIPE_CONTROL_CS_STALL; 118 119 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 120 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 121 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 122 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 123 flags |= PIPE_CONTROL_FLUSH_ENABLE; 124 flags |= PIPE_CONTROL_QW_WRITE; 125 flags |= PIPE_CONTROL_STORE_DATA_INDEX; 126 127 cs = intel_ring_begin(rq, 6); 128 if (IS_ERR(cs)) 129 return PTR_ERR(cs); 130 131 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 132 intel_ring_advance(rq, cs); 133 } 134 135 if (mode & EMIT_INVALIDATE) { 136 u32 *cs; 137 u32 flags = 0; 138 139 flags |= PIPE_CONTROL_CS_STALL; 140 141 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 142 flags |= PIPE_CONTROL_TLB_INVALIDATE; 143 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 144 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 145 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 146 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 147 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 148 flags |= PIPE_CONTROL_QW_WRITE; 149 flags |= PIPE_CONTROL_STORE_DATA_INDEX; 150 151 cs = intel_ring_begin(rq, 6); 152 if (IS_ERR(cs)) 153 return PTR_ERR(cs); 154 155 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 156 intel_ring_advance(rq, cs); 157 } 158 159 return 0; 160 } 161 162 static u32 preparser_disable(bool state) 163 { 164 return MI_ARB_CHECK | 1 << 8 | state; 165 } 166 167 u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg) 168 { 169 *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; 170 *cs++ = i915_mmio_reg_offset(inv_reg); 171 *cs++ = AUX_INV; 172 *cs++ = MI_NOOP; 173 174 return cs; 175 } 176 177 int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) 178 { 179 struct intel_engine_cs *engine = rq->engine; 180 181 if (mode & EMIT_FLUSH) { 182 u32 flags = 0; 183 u32 *cs; 184 185 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 186 flags |= PIPE_CONTROL_FLUSH_L3; 187 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 188 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 189 /* Wa_1409600907:tgl,adl-p */ 190 flags |= PIPE_CONTROL_DEPTH_STALL; 191 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 192 flags |= PIPE_CONTROL_FLUSH_ENABLE; 193 194 flags |= PIPE_CONTROL_STORE_DATA_INDEX; 195 flags |= PIPE_CONTROL_QW_WRITE; 196 197 flags |= PIPE_CONTROL_CS_STALL; 198 199 if (engine->class == COMPUTE_CLASS) 200 flags &= ~PIPE_CONTROL_3D_FLAGS; 201 202 cs = intel_ring_begin(rq, 6); 203 if (IS_ERR(cs)) 204 return PTR_ERR(cs); 205 206 cs = gen12_emit_pipe_control(cs, 207 PIPE_CONTROL0_HDC_PIPELINE_FLUSH, 208 flags, LRC_PPHWSP_SCRATCH_ADDR); 209 intel_ring_advance(rq, cs); 210 } 211 212 if (mode & EMIT_INVALIDATE) { 213 u32 flags = 0; 214 u32 *cs, count; 215 216 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 217 flags |= PIPE_CONTROL_TLB_INVALIDATE; 218 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 219 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 220 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 221 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 222 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 223 224 flags |= PIPE_CONTROL_STORE_DATA_INDEX; 225 flags |= PIPE_CONTROL_QW_WRITE; 226 227 flags |= PIPE_CONTROL_CS_STALL; 228 229 if (engine->class == COMPUTE_CLASS) 230 flags &= ~PIPE_CONTROL_3D_FLAGS; 231 232 if (!HAS_FLAT_CCS(rq->engine->i915)) 233 count = 8 + 4; 234 else 235 count = 8; 236 237 cs = intel_ring_begin(rq, count); 238 if (IS_ERR(cs)) 239 return PTR_ERR(cs); 240 241 /* 242 * Prevent the pre-parser from skipping past the TLB 243 * invalidate and loading a stale page for the batch 244 * buffer / request payload. 245 */ 246 *cs++ = preparser_disable(true); 247 248 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 249 250 if (!HAS_FLAT_CCS(rq->engine->i915)) { 251 /* hsdes: 1809175790 */ 252 cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); 253 } 254 255 *cs++ = preparser_disable(false); 256 intel_ring_advance(rq, cs); 257 } 258 259 return 0; 260 } 261 262 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) 263 { 264 intel_engine_mask_t aux_inv = 0; 265 u32 cmd, *cs; 266 267 cmd = 4; 268 if (mode & EMIT_INVALIDATE) { 269 cmd += 2; 270 271 if (!HAS_FLAT_CCS(rq->engine->i915) && 272 (rq->engine->class == VIDEO_DECODE_CLASS || 273 rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { 274 aux_inv = rq->engine->mask & ~BIT(BCS0); 275 if (aux_inv) 276 cmd += 4; 277 } 278 } 279 280 cs = intel_ring_begin(rq, cmd); 281 if (IS_ERR(cs)) 282 return PTR_ERR(cs); 283 284 if (mode & EMIT_INVALIDATE) 285 *cs++ = preparser_disable(true); 286 287 cmd = MI_FLUSH_DW + 1; 288 289 /* 290 * We always require a command barrier so that subsequent 291 * commands, such as breadcrumb interrupts, are strictly ordered 292 * wrt the contents of the write cache being flushed to memory 293 * (and thus being coherent from the CPU). 294 */ 295 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 296 297 if (mode & EMIT_INVALIDATE) { 298 cmd |= MI_INVALIDATE_TLB; 299 if (rq->engine->class == VIDEO_DECODE_CLASS) 300 cmd |= MI_INVALIDATE_BSD; 301 } 302 303 *cs++ = cmd; 304 *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 305 *cs++ = 0; /* upper addr */ 306 *cs++ = 0; /* value */ 307 308 if (aux_inv) { /* hsdes: 1809175790 */ 309 if (rq->engine->class == VIDEO_DECODE_CLASS) 310 cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); 311 else 312 cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); 313 } 314 315 if (mode & EMIT_INVALIDATE) 316 *cs++ = preparser_disable(false); 317 318 intel_ring_advance(rq, cs); 319 320 return 0; 321 } 322 323 static u32 preempt_address(struct intel_engine_cs *engine) 324 { 325 return (i915_ggtt_offset(engine->status_page.vma) + 326 I915_GEM_HWS_PREEMPT_ADDR); 327 } 328 329 static u32 hwsp_offset(const struct i915_request *rq) 330 { 331 const struct intel_timeline *tl; 332 333 /* Before the request is executed, the timeline is fixed */ 334 tl = rcu_dereference_protected(rq->timeline, 335 !i915_request_signaled(rq)); 336 337 /* See the comment in i915_request_active_seqno(). */ 338 return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno); 339 } 340 341 int gen8_emit_init_breadcrumb(struct i915_request *rq) 342 { 343 u32 *cs; 344 345 GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); 346 if (!i915_request_timeline(rq)->has_initial_breadcrumb) 347 return 0; 348 349 cs = intel_ring_begin(rq, 6); 350 if (IS_ERR(cs)) 351 return PTR_ERR(cs); 352 353 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 354 *cs++ = hwsp_offset(rq); 355 *cs++ = 0; 356 *cs++ = rq->fence.seqno - 1; 357 358 /* 359 * Check if we have been preempted before we even get started. 360 * 361 * After this point i915_request_started() reports true, even if 362 * we get preempted and so are no longer running. 363 * 364 * i915_request_started() is used during preemption processing 365 * to decide if the request is currently inside the user payload 366 * or spinning on a kernel semaphore (or earlier). For no-preemption 367 * requests, we do allow preemption on the semaphore before the user 368 * payload, but do not allow preemption once the request is started. 369 * 370 * i915_request_started() is similarly used during GPU hangs to 371 * determine if the user's payload was guilty, and if so, the 372 * request is banned. Before the request is started, it is assumed 373 * to be unharmed and an innocent victim of another's hang. 374 */ 375 *cs++ = MI_NOOP; 376 *cs++ = MI_ARB_CHECK; 377 378 intel_ring_advance(rq, cs); 379 380 /* Record the updated position of the request's payload */ 381 rq->infix = intel_ring_offset(rq, cs); 382 383 __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); 384 385 return 0; 386 } 387 388 int gen8_emit_bb_start_noarb(struct i915_request *rq, 389 u64 offset, u32 len, 390 const unsigned int flags) 391 { 392 u32 *cs; 393 394 cs = intel_ring_begin(rq, 4); 395 if (IS_ERR(cs)) 396 return PTR_ERR(cs); 397 398 /* 399 * WaDisableCtxRestoreArbitration:bdw,chv 400 * 401 * We don't need to perform MI_ARB_ENABLE as often as we do (in 402 * particular all the gen that do not need the w/a at all!), if we 403 * took care to make sure that on every switch into this context 404 * (both ordinary and for preemption) that arbitrartion was enabled 405 * we would be fine. However, for gen8 there is another w/a that 406 * requires us to not preempt inside GPGPU execution, so we keep 407 * arbitration disabled for gen8 batches. Arbitration will be 408 * re-enabled before we close the request 409 * (engine->emit_fini_breadcrumb). 410 */ 411 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 412 413 /* FIXME(BDW+): Address space and security selectors. */ 414 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 415 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 416 *cs++ = lower_32_bits(offset); 417 *cs++ = upper_32_bits(offset); 418 419 intel_ring_advance(rq, cs); 420 421 return 0; 422 } 423 424 int gen8_emit_bb_start(struct i915_request *rq, 425 u64 offset, u32 len, 426 const unsigned int flags) 427 { 428 u32 *cs; 429 430 if (unlikely(i915_request_has_nopreempt(rq))) 431 return gen8_emit_bb_start_noarb(rq, offset, len, flags); 432 433 cs = intel_ring_begin(rq, 6); 434 if (IS_ERR(cs)) 435 return PTR_ERR(cs); 436 437 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 438 439 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 440 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 441 *cs++ = lower_32_bits(offset); 442 *cs++ = upper_32_bits(offset); 443 444 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 445 *cs++ = MI_NOOP; 446 447 intel_ring_advance(rq, cs); 448 449 return 0; 450 } 451 452 static void assert_request_valid(struct i915_request *rq) 453 { 454 struct intel_ring *ring __maybe_unused = rq->ring; 455 456 /* Can we unwind this request without appearing to go forwards? */ 457 GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); 458 } 459 460 /* 461 * Reserve space for 2 NOOPs at the end of each request to be 462 * used as a workaround for not being allowed to do lite 463 * restore with HEAD==TAIL (WaIdleLiteRestore). 464 */ 465 static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) 466 { 467 /* Ensure there's always at least one preemption point per-request. */ 468 *cs++ = MI_ARB_CHECK; 469 *cs++ = MI_NOOP; 470 rq->wa_tail = intel_ring_offset(rq, cs); 471 472 /* Check that entire request is less than half the ring */ 473 assert_request_valid(rq); 474 475 return cs; 476 } 477 478 static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) 479 { 480 *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ 481 *cs++ = MI_SEMAPHORE_WAIT | 482 MI_SEMAPHORE_GLOBAL_GTT | 483 MI_SEMAPHORE_POLL | 484 MI_SEMAPHORE_SAD_EQ_SDD; 485 *cs++ = 0; 486 *cs++ = preempt_address(rq->engine); 487 *cs++ = 0; 488 *cs++ = MI_NOOP; 489 490 return cs; 491 } 492 493 static __always_inline u32* 494 gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) 495 { 496 *cs++ = MI_USER_INTERRUPT; 497 498 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 499 if (intel_engine_has_semaphores(rq->engine) && 500 !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) 501 cs = emit_preempt_busywait(rq, cs); 502 503 rq->tail = intel_ring_offset(rq, cs); 504 assert_ring_tail_valid(rq->ring, rq->tail); 505 506 return gen8_emit_wa_tail(rq, cs); 507 } 508 509 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) 510 { 511 return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); 512 } 513 514 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) 515 { 516 return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); 517 } 518 519 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 520 { 521 cs = gen8_emit_pipe_control(cs, 522 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 523 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 524 PIPE_CONTROL_DC_FLUSH_ENABLE, 525 0); 526 527 /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ 528 cs = gen8_emit_ggtt_write_rcs(cs, 529 rq->fence.seqno, 530 hwsp_offset(rq), 531 PIPE_CONTROL_FLUSH_ENABLE | 532 PIPE_CONTROL_CS_STALL); 533 534 return gen8_emit_fini_breadcrumb_tail(rq, cs); 535 } 536 537 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 538 { 539 cs = gen8_emit_ggtt_write_rcs(cs, 540 rq->fence.seqno, 541 hwsp_offset(rq), 542 PIPE_CONTROL_CS_STALL | 543 PIPE_CONTROL_TILE_CACHE_FLUSH | 544 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 545 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 546 PIPE_CONTROL_DC_FLUSH_ENABLE | 547 PIPE_CONTROL_FLUSH_ENABLE); 548 549 return gen8_emit_fini_breadcrumb_tail(rq, cs); 550 } 551 552 /* 553 * Note that the CS instruction pre-parser will not stall on the breadcrumb 554 * flush and will continue pre-fetching the instructions after it before the 555 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at 556 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble 557 * of the next request before the memory has been flushed, we're guaranteed that 558 * we won't access the batch itself too early. 559 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, 560 * so, if the current request is modifying an instruction in the next request on 561 * the same intel_context, we might pre-fetch and then execute the pre-update 562 * instruction. To avoid this, the users of self-modifying code should either 563 * disable the parser around the code emitting the memory writes, via a new flag 564 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For 565 * the in-kernel use-cases we've opted to use a separate context, see 566 * reloc_gpu() as an example. 567 * All the above applies only to the instructions themselves. Non-inline data 568 * used by the instructions is not pre-fetched. 569 */ 570 571 static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) 572 { 573 *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ 574 *cs++ = MI_SEMAPHORE_WAIT_TOKEN | 575 MI_SEMAPHORE_GLOBAL_GTT | 576 MI_SEMAPHORE_POLL | 577 MI_SEMAPHORE_SAD_EQ_SDD; 578 *cs++ = 0; 579 *cs++ = preempt_address(rq->engine); 580 *cs++ = 0; 581 *cs++ = 0; 582 583 return cs; 584 } 585 586 /* Wa_14014475959:dg2 */ 587 #define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540 588 static u32 ccs_semaphore_offset(struct i915_request *rq) 589 { 590 return i915_ggtt_offset(rq->context->state) + 591 (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; 592 } 593 594 /* Wa_14014475959:dg2 */ 595 static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) 596 { 597 int i; 598 599 *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | 600 MI_ATOMIC_MOVE; 601 *cs++ = ccs_semaphore_offset(rq); 602 *cs++ = 0; 603 *cs++ = 1; 604 605 /* 606 * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) 607 * to align. 4 DWs above + 8 filler DWs here. 608 */ 609 for (i = 0; i < 8; ++i) 610 *cs++ = 0; 611 612 *cs++ = MI_SEMAPHORE_WAIT | 613 MI_SEMAPHORE_GLOBAL_GTT | 614 MI_SEMAPHORE_POLL | 615 MI_SEMAPHORE_SAD_EQ_SDD; 616 *cs++ = 0; 617 *cs++ = ccs_semaphore_offset(rq); 618 *cs++ = 0; 619 620 return cs; 621 } 622 623 static __always_inline u32* 624 gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) 625 { 626 *cs++ = MI_USER_INTERRUPT; 627 628 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 629 if (intel_engine_has_semaphores(rq->engine) && 630 !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) 631 cs = gen12_emit_preempt_busywait(rq, cs); 632 633 /* Wa_14014475959:dg2 */ 634 if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) 635 cs = ccs_emit_wa_busywait(rq, cs); 636 637 rq->tail = intel_ring_offset(rq, cs); 638 assert_ring_tail_valid(rq->ring, rq->tail); 639 640 return gen8_emit_wa_tail(rq, cs); 641 } 642 643 u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) 644 { 645 /* XXX Stalling flush before seqno write; post-sync not */ 646 cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); 647 return gen12_emit_fini_breadcrumb_tail(rq, cs); 648 } 649 650 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 651 { 652 struct drm_i915_private *i915 = rq->engine->i915; 653 u32 flags = (PIPE_CONTROL_CS_STALL | 654 PIPE_CONTROL_TILE_CACHE_FLUSH | 655 PIPE_CONTROL_FLUSH_L3 | 656 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 657 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 658 PIPE_CONTROL_DC_FLUSH_ENABLE | 659 PIPE_CONTROL_FLUSH_ENABLE); 660 661 if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) 662 /* Wa_1409600907 */ 663 flags |= PIPE_CONTROL_DEPTH_STALL; 664 665 if (rq->engine->class == COMPUTE_CLASS) 666 flags &= ~PIPE_CONTROL_3D_FLAGS; 667 668 cs = gen12_emit_ggtt_write_rcs(cs, 669 rq->fence.seqno, 670 hwsp_offset(rq), 671 PIPE_CONTROL0_HDC_PIPELINE_FLUSH, 672 flags); 673 674 return gen12_emit_fini_breadcrumb_tail(rq, cs); 675 } 676