1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 #include "intel_context.h" 8 #include "intel_gpu_commands.h" 9 #include "intel_gt.h" 10 #include "intel_gtt.h" 11 #include "intel_migrate.h" 12 #include "intel_ring.h" 13 14 struct insert_pte_data { 15 u64 offset; 16 }; 17 18 #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ 19 20 static bool engine_supports_migration(struct intel_engine_cs *engine) 21 { 22 if (!engine) 23 return false; 24 25 /* 26 * We need the ability to prevent aribtration (MI_ARB_ON_OFF), 27 * the ability to write PTE using inline data (MI_STORE_DATA) 28 * and of course the ability to do the block transfer (blits). 29 */ 30 GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS); 31 32 return true; 33 } 34 35 static void xehpsdv_toggle_pdes(struct i915_address_space *vm, 36 struct i915_page_table *pt, 37 void *data) 38 { 39 struct insert_pte_data *d = data; 40 41 /* 42 * Insert a dummy PTE into every PT that will map to LMEM to ensure 43 * we have a correctly setup PDE structure for later use. 44 */ 45 vm->insert_page(vm, 0, d->offset, I915_CACHE_NONE, PTE_LM); 46 GEM_BUG_ON(!pt->is_compact); 47 d->offset += SZ_2M; 48 } 49 50 static void xehpsdv_insert_pte(struct i915_address_space *vm, 51 struct i915_page_table *pt, 52 void *data) 53 { 54 struct insert_pte_data *d = data; 55 56 /* 57 * We are playing tricks here, since the actual pt, from the hw 58 * pov, is only 256bytes with 32 entries, or 4096bytes with 512 59 * entries, but we are still guaranteed that the physical 60 * alignment is 64K underneath for the pt, and we are careful 61 * not to access the space in the void. 62 */ 63 vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, PTE_LM); 64 d->offset += SZ_64K; 65 } 66 67 static void insert_pte(struct i915_address_space *vm, 68 struct i915_page_table *pt, 69 void *data) 70 { 71 struct insert_pte_data *d = data; 72 73 vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, 74 i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); 75 d->offset += PAGE_SIZE; 76 } 77 78 static struct i915_address_space *migrate_vm(struct intel_gt *gt) 79 { 80 struct i915_vm_pt_stash stash = {}; 81 struct i915_ppgtt *vm; 82 int err; 83 int i; 84 85 /* 86 * We construct a very special VM for use by all migration contexts, 87 * it is kept pinned so that it can be used at any time. As we need 88 * to pre-allocate the page directories for the migration VM, this 89 * limits us to only using a small number of prepared vma. 90 * 91 * To be able to pipeline and reschedule migration operations while 92 * avoiding unnecessary contention on the vm itself, the PTE updates 93 * are inline with the blits. All the blits use the same fixed 94 * addresses, with the backing store redirection being updated on the 95 * fly. Only 2 implicit vma are used for all migration operations. 96 * 97 * We lay the ppGTT out as: 98 * 99 * [0, CHUNK_SZ) -> first object 100 * [CHUNK_SZ, 2 * CHUNK_SZ) -> second object 101 * [2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE 102 * 103 * By exposing the dma addresses of the page directories themselves 104 * within the ppGTT, we are then able to rewrite the PTE prior to use. 105 * But the PTE update and subsequent migration operation must be atomic, 106 * i.e. within the same non-preemptible window so that we do not switch 107 * to another migration context that overwrites the PTE. 108 * 109 * This changes quite a bit on platforms with HAS_64K_PAGES support, 110 * where we instead have three windows, each CHUNK_SIZE in size. The 111 * first is reserved for mapping system-memory, and that just uses the 112 * 512 entry layout using 4K GTT pages. The other two windows just map 113 * lmem pages and must use the new compact 32 entry layout using 64K GTT 114 * pages, which ensures we can address any lmem object that the user 115 * throws at us. We then also use the xehpsdv_toggle_pdes as a way of 116 * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the 117 * compact layout for each of these page-tables, that fall within the 118 * [CHUNK_SIZE, 3 * CHUNK_SIZE) range. 119 * 120 * We lay the ppGTT out as: 121 * 122 * [0, CHUNK_SZ) -> first window/object, maps smem 123 * [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src 124 * [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst 125 * 126 * For the PTE window it's also quite different, since each PTE must 127 * point to some 64K page, one for each PT(since it's in lmem), and yet 128 * each is only <= 4096bytes, but since the unused space within that PTE 129 * range is never touched, this should be fine. 130 * 131 * So basically each PT now needs 64K of virtual memory, instead of 4K, 132 * which looks like: 133 * 134 * [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE 135 */ 136 137 vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY); 138 if (IS_ERR(vm)) 139 return ERR_CAST(vm); 140 141 if (!vm->vm.allocate_va_range || !vm->vm.foreach) { 142 err = -ENODEV; 143 goto err_vm; 144 } 145 146 if (HAS_64K_PAGES(gt->i915)) 147 stash.pt_sz = I915_GTT_PAGE_SIZE_64K; 148 149 /* 150 * Each engine instance is assigned its own chunk in the VM, so 151 * that we can run multiple instances concurrently 152 */ 153 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { 154 struct intel_engine_cs *engine; 155 u64 base = (u64)i << 32; 156 struct insert_pte_data d = {}; 157 struct i915_gem_ww_ctx ww; 158 u64 sz; 159 160 engine = gt->engine_class[COPY_ENGINE_CLASS][i]; 161 if (!engine_supports_migration(engine)) 162 continue; 163 164 /* 165 * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need 166 * 4x2 page directories for source/destination. 167 */ 168 if (HAS_64K_PAGES(gt->i915)) 169 sz = 3 * CHUNK_SZ; 170 else 171 sz = 2 * CHUNK_SZ; 172 d.offset = base + sz; 173 174 /* 175 * We need another page directory setup so that we can write 176 * the 8x512 PTE in each chunk. 177 */ 178 if (HAS_64K_PAGES(gt->i915)) 179 sz += (sz / SZ_2M) * SZ_64K; 180 else 181 sz += (sz >> 12) * sizeof(u64); 182 183 err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); 184 if (err) 185 goto err_vm; 186 187 for_i915_gem_ww(&ww, err, true) { 188 err = i915_vm_lock_objects(&vm->vm, &ww); 189 if (err) 190 continue; 191 err = i915_vm_map_pt_stash(&vm->vm, &stash); 192 if (err) 193 continue; 194 195 vm->vm.allocate_va_range(&vm->vm, &stash, base, sz); 196 } 197 i915_vm_free_pt_stash(&vm->vm, &stash); 198 if (err) 199 goto err_vm; 200 201 /* Now allow the GPU to rewrite the PTE via its own ppGTT */ 202 if (HAS_64K_PAGES(gt->i915)) { 203 vm->vm.foreach(&vm->vm, base, d.offset - base, 204 xehpsdv_insert_pte, &d); 205 d.offset = base + CHUNK_SZ; 206 vm->vm.foreach(&vm->vm, 207 d.offset, 208 2 * CHUNK_SZ, 209 xehpsdv_toggle_pdes, &d); 210 } else { 211 vm->vm.foreach(&vm->vm, base, d.offset - base, 212 insert_pte, &d); 213 } 214 } 215 216 return &vm->vm; 217 218 err_vm: 219 i915_vm_put(&vm->vm); 220 return ERR_PTR(err); 221 } 222 223 static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt) 224 { 225 struct intel_engine_cs *engine; 226 int i; 227 228 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { 229 engine = gt->engine_class[COPY_ENGINE_CLASS][i]; 230 if (engine_supports_migration(engine)) 231 return engine; 232 } 233 234 return NULL; 235 } 236 237 static struct intel_context *pinned_context(struct intel_gt *gt) 238 { 239 static struct lock_class_key key; 240 struct intel_engine_cs *engine; 241 struct i915_address_space *vm; 242 struct intel_context *ce; 243 244 engine = first_copy_engine(gt); 245 if (!engine) 246 return ERR_PTR(-ENODEV); 247 248 vm = migrate_vm(gt); 249 if (IS_ERR(vm)) 250 return ERR_CAST(vm); 251 252 ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, 253 I915_GEM_HWS_MIGRATE, 254 &key, "migrate"); 255 i915_vm_put(vm); 256 return ce; 257 } 258 259 int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt) 260 { 261 struct intel_context *ce; 262 263 memset(m, 0, sizeof(*m)); 264 265 ce = pinned_context(gt); 266 if (IS_ERR(ce)) 267 return PTR_ERR(ce); 268 269 m->context = ce; 270 return 0; 271 } 272 273 static int random_index(unsigned int max) 274 { 275 return upper_32_bits(mul_u32_u32(get_random_u32(), max)); 276 } 277 278 static struct intel_context *__migrate_engines(struct intel_gt *gt) 279 { 280 struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE]; 281 struct intel_engine_cs *engine; 282 unsigned int count, i; 283 284 count = 0; 285 for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { 286 engine = gt->engine_class[COPY_ENGINE_CLASS][i]; 287 if (engine_supports_migration(engine)) 288 engines[count++] = engine; 289 } 290 291 return intel_context_create(engines[random_index(count)]); 292 } 293 294 struct intel_context *intel_migrate_create_context(struct intel_migrate *m) 295 { 296 struct intel_context *ce; 297 298 /* 299 * We randomly distribute contexts across the engines upon constrction, 300 * as they all share the same pinned vm, and so in order to allow 301 * multiple blits to run in parallel, we must construct each blit 302 * to use a different range of the vm for its GTT. This has to be 303 * known at construction, so we can not use the late greedy load 304 * balancing of the virtual-engine. 305 */ 306 ce = __migrate_engines(m->context->engine->gt); 307 if (IS_ERR(ce)) 308 return ce; 309 310 ce->ring = NULL; 311 ce->ring_size = SZ_256K; 312 313 i915_vm_put(ce->vm); 314 ce->vm = i915_vm_get(m->context->vm); 315 316 return ce; 317 } 318 319 static inline struct sgt_dma sg_sgt(struct scatterlist *sg) 320 { 321 dma_addr_t addr = sg_dma_address(sg); 322 323 return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; 324 } 325 326 static int emit_no_arbitration(struct i915_request *rq) 327 { 328 u32 *cs; 329 330 cs = intel_ring_begin(rq, 2); 331 if (IS_ERR(cs)) 332 return PTR_ERR(cs); 333 334 /* Explicitly disable preemption for this request. */ 335 *cs++ = MI_ARB_ON_OFF; 336 *cs++ = MI_NOOP; 337 intel_ring_advance(rq, cs); 338 339 return 0; 340 } 341 342 static int emit_pte(struct i915_request *rq, 343 struct sgt_dma *it, 344 enum i915_cache_level cache_level, 345 bool is_lmem, 346 u64 offset, 347 int length) 348 { 349 bool has_64K_pages = HAS_64K_PAGES(rq->engine->i915); 350 const u64 encode = rq->context->vm->pte_encode(0, cache_level, 351 is_lmem ? PTE_LM : 0); 352 struct intel_ring *ring = rq->ring; 353 int pkt, dword_length; 354 u32 total = 0; 355 u32 page_size; 356 u32 *hdr, *cs; 357 358 GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); 359 360 page_size = I915_GTT_PAGE_SIZE; 361 dword_length = 0x400; 362 363 /* Compute the page directory offset for the target address range */ 364 if (has_64K_pages) { 365 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M)); 366 367 offset /= SZ_2M; 368 offset *= SZ_64K; 369 offset += 3 * CHUNK_SZ; 370 371 if (is_lmem) { 372 page_size = I915_GTT_PAGE_SIZE_64K; 373 dword_length = 0x40; 374 } 375 } else { 376 offset >>= 12; 377 offset *= sizeof(u64); 378 offset += 2 * CHUNK_SZ; 379 } 380 381 offset += (u64)rq->engine->instance << 32; 382 383 cs = intel_ring_begin(rq, 6); 384 if (IS_ERR(cs)) 385 return PTR_ERR(cs); 386 387 /* Pack as many PTE updates as possible into a single MI command */ 388 pkt = min_t(int, dword_length, ring->space / sizeof(u32) + 5); 389 pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); 390 391 hdr = cs; 392 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ 393 *cs++ = lower_32_bits(offset); 394 *cs++ = upper_32_bits(offset); 395 396 do { 397 if (cs - hdr >= pkt) { 398 int dword_rem; 399 400 *hdr += cs - hdr - 2; 401 *cs++ = MI_NOOP; 402 403 ring->emit = (void *)cs - ring->vaddr; 404 intel_ring_advance(rq, cs); 405 intel_ring_update_space(ring); 406 407 cs = intel_ring_begin(rq, 6); 408 if (IS_ERR(cs)) 409 return PTR_ERR(cs); 410 411 dword_rem = dword_length; 412 if (has_64K_pages) { 413 if (IS_ALIGNED(total, SZ_2M)) { 414 offset = round_up(offset, SZ_64K); 415 } else { 416 dword_rem = SZ_2M - (total & (SZ_2M - 1)); 417 dword_rem /= page_size; 418 dword_rem *= 2; 419 } 420 } 421 422 pkt = min_t(int, dword_rem, ring->space / sizeof(u32) + 5); 423 pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); 424 425 hdr = cs; 426 *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); 427 *cs++ = lower_32_bits(offset); 428 *cs++ = upper_32_bits(offset); 429 } 430 431 GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size)); 432 433 *cs++ = lower_32_bits(encode | it->dma); 434 *cs++ = upper_32_bits(encode | it->dma); 435 436 offset += 8; 437 total += page_size; 438 439 it->dma += page_size; 440 if (it->dma >= it->max) { 441 it->sg = __sg_next(it->sg); 442 if (!it->sg || sg_dma_len(it->sg) == 0) 443 break; 444 445 it->dma = sg_dma_address(it->sg); 446 it->max = it->dma + sg_dma_len(it->sg); 447 } 448 } while (total < length); 449 450 *hdr += cs - hdr - 2; 451 *cs++ = MI_NOOP; 452 453 ring->emit = (void *)cs - ring->vaddr; 454 intel_ring_advance(rq, cs); 455 intel_ring_update_space(ring); 456 457 return total; 458 } 459 460 static bool wa_1209644611_applies(int ver, u32 size) 461 { 462 u32 height = size >> PAGE_SHIFT; 463 464 if (ver != 11) 465 return false; 466 467 return height % 4 == 3 && height <= 8; 468 } 469 470 static int emit_copy(struct i915_request *rq, 471 u32 dst_offset, u32 src_offset, int size) 472 { 473 const int ver = GRAPHICS_VER(rq->engine->i915); 474 u32 instance = rq->engine->instance; 475 u32 *cs; 476 477 cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6); 478 if (IS_ERR(cs)) 479 return PTR_ERR(cs); 480 481 if (ver >= 9 && !wa_1209644611_applies(ver, size)) { 482 *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); 483 *cs++ = BLT_DEPTH_32 | PAGE_SIZE; 484 *cs++ = 0; 485 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 486 *cs++ = dst_offset; 487 *cs++ = instance; 488 *cs++ = 0; 489 *cs++ = PAGE_SIZE; 490 *cs++ = src_offset; 491 *cs++ = instance; 492 } else if (ver >= 8) { 493 *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); 494 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; 495 *cs++ = 0; 496 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 497 *cs++ = dst_offset; 498 *cs++ = instance; 499 *cs++ = 0; 500 *cs++ = PAGE_SIZE; 501 *cs++ = src_offset; 502 *cs++ = instance; 503 } else { 504 GEM_BUG_ON(instance); 505 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 506 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; 507 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; 508 *cs++ = dst_offset; 509 *cs++ = PAGE_SIZE; 510 *cs++ = src_offset; 511 } 512 513 intel_ring_advance(rq, cs); 514 return 0; 515 } 516 517 int 518 intel_context_migrate_copy(struct intel_context *ce, 519 const struct i915_deps *deps, 520 struct scatterlist *src, 521 enum i915_cache_level src_cache_level, 522 bool src_is_lmem, 523 struct scatterlist *dst, 524 enum i915_cache_level dst_cache_level, 525 bool dst_is_lmem, 526 struct i915_request **out) 527 { 528 struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); 529 struct i915_request *rq; 530 int err; 531 532 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 533 *out = NULL; 534 535 GEM_BUG_ON(ce->ring->size < SZ_64K); 536 537 do { 538 u32 src_offset, dst_offset; 539 int len; 540 541 rq = i915_request_create(ce); 542 if (IS_ERR(rq)) { 543 err = PTR_ERR(rq); 544 goto out_ce; 545 } 546 547 if (deps) { 548 err = i915_request_await_deps(rq, deps); 549 if (err) 550 goto out_rq; 551 552 if (rq->engine->emit_init_breadcrumb) { 553 err = rq->engine->emit_init_breadcrumb(rq); 554 if (err) 555 goto out_rq; 556 } 557 558 deps = NULL; 559 } 560 561 /* The PTE updates + copy must not be interrupted. */ 562 err = emit_no_arbitration(rq); 563 if (err) 564 goto out_rq; 565 566 src_offset = 0; 567 dst_offset = CHUNK_SZ; 568 if (HAS_64K_PAGES(ce->engine->i915)) { 569 GEM_BUG_ON(!src_is_lmem && !dst_is_lmem); 570 571 src_offset = 0; 572 dst_offset = 0; 573 if (src_is_lmem) 574 src_offset = CHUNK_SZ; 575 if (dst_is_lmem) 576 dst_offset = 2 * CHUNK_SZ; 577 } 578 579 len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 580 src_offset, CHUNK_SZ); 581 if (len <= 0) { 582 err = len; 583 goto out_rq; 584 } 585 586 err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, 587 dst_offset, len); 588 if (err < 0) 589 goto out_rq; 590 if (err < len) { 591 err = -EINVAL; 592 goto out_rq; 593 } 594 595 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 596 if (err) 597 goto out_rq; 598 599 err = emit_copy(rq, dst_offset, src_offset, len); 600 601 /* Arbitration is re-enabled between requests. */ 602 out_rq: 603 if (*out) 604 i915_request_put(*out); 605 *out = i915_request_get(rq); 606 i915_request_add(rq); 607 if (err || !it_src.sg || !sg_dma_len(it_src.sg)) 608 break; 609 610 cond_resched(); 611 } while (1); 612 613 out_ce: 614 return err; 615 } 616 617 static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value) 618 { 619 const int ver = GRAPHICS_VER(rq->engine->i915); 620 u32 *cs; 621 622 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); 623 624 offset += (u64)rq->engine->instance << 32; 625 626 cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); 627 if (IS_ERR(cs)) 628 return PTR_ERR(cs); 629 630 if (ver >= 8) { 631 *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); 632 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; 633 *cs++ = 0; 634 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 635 *cs++ = lower_32_bits(offset); 636 *cs++ = upper_32_bits(offset); 637 *cs++ = value; 638 *cs++ = MI_NOOP; 639 } else { 640 GEM_BUG_ON(upper_32_bits(offset)); 641 *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 642 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; 643 *cs++ = 0; 644 *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 645 *cs++ = lower_32_bits(offset); 646 *cs++ = value; 647 } 648 649 intel_ring_advance(rq, cs); 650 return 0; 651 } 652 653 int 654 intel_context_migrate_clear(struct intel_context *ce, 655 const struct i915_deps *deps, 656 struct scatterlist *sg, 657 enum i915_cache_level cache_level, 658 bool is_lmem, 659 u32 value, 660 struct i915_request **out) 661 { 662 struct sgt_dma it = sg_sgt(sg); 663 struct i915_request *rq; 664 int err; 665 666 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 667 *out = NULL; 668 669 GEM_BUG_ON(ce->ring->size < SZ_64K); 670 671 do { 672 u32 offset; 673 int len; 674 675 rq = i915_request_create(ce); 676 if (IS_ERR(rq)) { 677 err = PTR_ERR(rq); 678 goto out_ce; 679 } 680 681 if (deps) { 682 err = i915_request_await_deps(rq, deps); 683 if (err) 684 goto out_rq; 685 686 if (rq->engine->emit_init_breadcrumb) { 687 err = rq->engine->emit_init_breadcrumb(rq); 688 if (err) 689 goto out_rq; 690 } 691 692 deps = NULL; 693 } 694 695 /* The PTE updates + clear must not be interrupted. */ 696 err = emit_no_arbitration(rq); 697 if (err) 698 goto out_rq; 699 700 offset = 0; 701 if (HAS_64K_PAGES(ce->engine->i915) && is_lmem) 702 offset = CHUNK_SZ; 703 704 len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ); 705 if (len <= 0) { 706 err = len; 707 goto out_rq; 708 } 709 710 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 711 if (err) 712 goto out_rq; 713 714 err = emit_clear(rq, offset, len, value); 715 716 /* Arbitration is re-enabled between requests. */ 717 out_rq: 718 if (*out) 719 i915_request_put(*out); 720 *out = i915_request_get(rq); 721 i915_request_add(rq); 722 if (err || !it.sg || !sg_dma_len(it.sg)) 723 break; 724 725 cond_resched(); 726 } while (1); 727 728 out_ce: 729 return err; 730 } 731 732 int intel_migrate_copy(struct intel_migrate *m, 733 struct i915_gem_ww_ctx *ww, 734 const struct i915_deps *deps, 735 struct scatterlist *src, 736 enum i915_cache_level src_cache_level, 737 bool src_is_lmem, 738 struct scatterlist *dst, 739 enum i915_cache_level dst_cache_level, 740 bool dst_is_lmem, 741 struct i915_request **out) 742 { 743 struct intel_context *ce; 744 int err; 745 746 *out = NULL; 747 if (!m->context) 748 return -ENODEV; 749 750 ce = intel_migrate_create_context(m); 751 if (IS_ERR(ce)) 752 ce = intel_context_get(m->context); 753 GEM_BUG_ON(IS_ERR(ce)); 754 755 err = intel_context_pin_ww(ce, ww); 756 if (err) 757 goto out; 758 759 err = intel_context_migrate_copy(ce, deps, 760 src, src_cache_level, src_is_lmem, 761 dst, dst_cache_level, dst_is_lmem, 762 out); 763 764 intel_context_unpin(ce); 765 out: 766 intel_context_put(ce); 767 return err; 768 } 769 770 int 771 intel_migrate_clear(struct intel_migrate *m, 772 struct i915_gem_ww_ctx *ww, 773 const struct i915_deps *deps, 774 struct scatterlist *sg, 775 enum i915_cache_level cache_level, 776 bool is_lmem, 777 u32 value, 778 struct i915_request **out) 779 { 780 struct intel_context *ce; 781 int err; 782 783 *out = NULL; 784 if (!m->context) 785 return -ENODEV; 786 787 ce = intel_migrate_create_context(m); 788 if (IS_ERR(ce)) 789 ce = intel_context_get(m->context); 790 GEM_BUG_ON(IS_ERR(ce)); 791 792 err = intel_context_pin_ww(ce, ww); 793 if (err) 794 goto out; 795 796 err = intel_context_migrate_clear(ce, deps, sg, cache_level, 797 is_lmem, value, out); 798 799 intel_context_unpin(ce); 800 out: 801 intel_context_put(ce); 802 return err; 803 } 804 805 void intel_migrate_fini(struct intel_migrate *m) 806 { 807 struct intel_context *ce; 808 809 ce = fetch_and_zero(&m->context); 810 if (!ce) 811 return; 812 813 intel_engine_destroy_pinned_context(ce); 814 } 815 816 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 817 #include "selftest_migrate.c" 818 #endif 819