1 /* 2 * Copyright 2018 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "nouveau_dmem.h" 23 #include "nouveau_drv.h" 24 #include "nouveau_chan.h" 25 #include "nouveau_dma.h" 26 #include "nouveau_mem.h" 27 #include "nouveau_bo.h" 28 29 #include <nvif/class.h> 30 #include <nvif/object.h> 31 #include <nvif/if500b.h> 32 #include <nvif/if900b.h> 33 34 #include <linux/sched/mm.h> 35 #include <linux/hmm.h> 36 37 /* 38 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin 39 * it in vram while in use. We likely want to overhaul memory management for 40 * nouveau to be more page like (not necessarily with system page size but a 41 * bigger page size) at lowest level and have some shim layer on top that would 42 * provide the same functionality as TTM. 43 */ 44 #define DMEM_CHUNK_SIZE (2UL << 20) 45 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) 46 47 struct nouveau_migrate; 48 49 enum nouveau_aper { 50 NOUVEAU_APER_VIRT, 51 NOUVEAU_APER_VRAM, 52 NOUVEAU_APER_HOST, 53 }; 54 55 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, 56 enum nouveau_aper, u64 dst_addr, 57 enum nouveau_aper, u64 src_addr); 58 59 struct nouveau_dmem_chunk { 60 struct list_head list; 61 struct nouveau_bo *bo; 62 struct nouveau_drm *drm; 63 unsigned long pfn_first; 64 unsigned long callocated; 65 unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; 66 spinlock_t lock; 67 }; 68 69 struct nouveau_dmem_migrate { 70 nouveau_migrate_copy_t copy_func; 71 struct nouveau_channel *chan; 72 }; 73 74 struct nouveau_dmem { 75 struct nouveau_drm *drm; 76 struct dev_pagemap pagemap; 77 struct nouveau_dmem_migrate migrate; 78 struct list_head chunk_free; 79 struct list_head chunk_full; 80 struct list_head chunk_empty; 81 struct mutex mutex; 82 }; 83 84 static inline struct nouveau_dmem *page_to_dmem(struct page *page) 85 { 86 return container_of(page->pgmap, struct nouveau_dmem, pagemap); 87 } 88 89 struct nouveau_dmem_fault { 90 struct nouveau_drm *drm; 91 struct nouveau_fence *fence; 92 dma_addr_t *dma; 93 unsigned long npages; 94 }; 95 96 struct nouveau_migrate { 97 struct vm_area_struct *vma; 98 struct nouveau_drm *drm; 99 struct nouveau_fence *fence; 100 unsigned long npages; 101 dma_addr_t *dma; 102 unsigned long dma_nr; 103 }; 104 105 static void nouveau_dmem_page_free(struct page *page) 106 { 107 struct nouveau_dmem_chunk *chunk = page->zone_device_data; 108 unsigned long idx = page_to_pfn(page) - chunk->pfn_first; 109 110 /* 111 * FIXME: 112 * 113 * This is really a bad example, we need to overhaul nouveau memory 114 * management to be more page focus and allow lighter locking scheme 115 * to be use in the process. 116 */ 117 spin_lock(&chunk->lock); 118 clear_bit(idx, chunk->bitmap); 119 WARN_ON(!chunk->callocated); 120 chunk->callocated--; 121 /* 122 * FIXME when chunk->callocated reach 0 we should add the chunk to 123 * a reclaim list so that it can be freed in case of memory pressure. 124 */ 125 spin_unlock(&chunk->lock); 126 } 127 128 static void 129 nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, 130 const unsigned long *src_pfns, 131 unsigned long *dst_pfns, 132 unsigned long start, 133 unsigned long end, 134 void *private) 135 { 136 struct nouveau_dmem_fault *fault = private; 137 struct nouveau_drm *drm = fault->drm; 138 struct device *dev = drm->dev->dev; 139 unsigned long addr, i, npages = 0; 140 nouveau_migrate_copy_t copy; 141 int ret; 142 143 144 /* First allocate new memory */ 145 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 146 struct page *dpage, *spage; 147 148 dst_pfns[i] = 0; 149 spage = migrate_pfn_to_page(src_pfns[i]); 150 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 151 continue; 152 153 dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr); 154 if (!dpage) { 155 dst_pfns[i] = MIGRATE_PFN_ERROR; 156 continue; 157 } 158 lock_page(dpage); 159 160 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | 161 MIGRATE_PFN_LOCKED; 162 npages++; 163 } 164 165 /* Allocate storage for DMA addresses, so we can unmap later. */ 166 fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); 167 if (!fault->dma) 168 goto error; 169 170 /* Copy things over */ 171 copy = drm->dmem->migrate.copy_func; 172 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 173 struct nouveau_dmem_chunk *chunk; 174 struct page *spage, *dpage; 175 u64 src_addr, dst_addr; 176 177 dpage = migrate_pfn_to_page(dst_pfns[i]); 178 if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) 179 continue; 180 181 spage = migrate_pfn_to_page(src_pfns[i]); 182 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { 183 dst_pfns[i] = MIGRATE_PFN_ERROR; 184 __free_page(dpage); 185 continue; 186 } 187 188 fault->dma[fault->npages] = 189 dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, 190 PCI_DMA_BIDIRECTIONAL, 191 DMA_ATTR_SKIP_CPU_SYNC); 192 if (dma_mapping_error(dev, fault->dma[fault->npages])) { 193 dst_pfns[i] = MIGRATE_PFN_ERROR; 194 __free_page(dpage); 195 continue; 196 } 197 198 dst_addr = fault->dma[fault->npages++]; 199 200 chunk = spage->zone_device_data; 201 src_addr = page_to_pfn(spage) - chunk->pfn_first; 202 src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; 203 204 ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, 205 NOUVEAU_APER_VRAM, src_addr); 206 if (ret) { 207 dst_pfns[i] = MIGRATE_PFN_ERROR; 208 __free_page(dpage); 209 continue; 210 } 211 } 212 213 nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); 214 215 return; 216 217 error: 218 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { 219 struct page *page; 220 221 if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) 222 continue; 223 224 page = migrate_pfn_to_page(dst_pfns[i]); 225 dst_pfns[i] = MIGRATE_PFN_ERROR; 226 if (page == NULL) 227 continue; 228 229 __free_page(page); 230 } 231 } 232 233 void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, 234 const unsigned long *src_pfns, 235 const unsigned long *dst_pfns, 236 unsigned long start, 237 unsigned long end, 238 void *private) 239 { 240 struct nouveau_dmem_fault *fault = private; 241 struct nouveau_drm *drm = fault->drm; 242 243 if (fault->fence) { 244 nouveau_fence_wait(fault->fence, true, false); 245 nouveau_fence_unref(&fault->fence); 246 } else { 247 /* 248 * FIXME wait for channel to be IDLE before calling finalizing 249 * the hmem object below (nouveau_migrate_hmem_fini()). 250 */ 251 } 252 253 while (fault->npages--) { 254 dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], 255 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 256 } 257 kfree(fault->dma); 258 } 259 260 static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { 261 .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, 262 .finalize_and_map = nouveau_dmem_fault_finalize_and_map, 263 }; 264 265 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) 266 { 267 struct nouveau_dmem *dmem = page_to_dmem(vmf->page); 268 unsigned long src[1] = {0}, dst[1] = {0}; 269 struct nouveau_dmem_fault fault = { .drm = dmem->drm }; 270 int ret; 271 272 /* 273 * FIXME what we really want is to find some heuristic to migrate more 274 * than just one page on CPU fault. When such fault happens it is very 275 * likely that more surrounding page will CPU fault too. 276 */ 277 ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma, 278 vmf->address, vmf->address + PAGE_SIZE, 279 src, dst, &fault); 280 if (ret) 281 return VM_FAULT_SIGBUS; 282 283 if (dst[0] == MIGRATE_PFN_ERROR) 284 return VM_FAULT_SIGBUS; 285 286 return 0; 287 } 288 289 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { 290 .page_free = nouveau_dmem_page_free, 291 .migrate_to_ram = nouveau_dmem_migrate_to_ram, 292 }; 293 294 static int 295 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) 296 { 297 struct nouveau_dmem_chunk *chunk; 298 int ret; 299 300 if (drm->dmem == NULL) 301 return -EINVAL; 302 303 mutex_lock(&drm->dmem->mutex); 304 chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, 305 struct nouveau_dmem_chunk, 306 list); 307 if (chunk == NULL) { 308 mutex_unlock(&drm->dmem->mutex); 309 return -ENOMEM; 310 } 311 312 list_del(&chunk->list); 313 mutex_unlock(&drm->dmem->mutex); 314 315 ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, 316 TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, 317 &chunk->bo); 318 if (ret) 319 goto out; 320 321 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 322 if (ret) { 323 nouveau_bo_ref(NULL, &chunk->bo); 324 goto out; 325 } 326 327 bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); 328 spin_lock_init(&chunk->lock); 329 330 out: 331 mutex_lock(&drm->dmem->mutex); 332 if (chunk->bo) 333 list_add(&chunk->list, &drm->dmem->chunk_empty); 334 else 335 list_add_tail(&chunk->list, &drm->dmem->chunk_empty); 336 mutex_unlock(&drm->dmem->mutex); 337 338 return ret; 339 } 340 341 static struct nouveau_dmem_chunk * 342 nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) 343 { 344 struct nouveau_dmem_chunk *chunk; 345 346 chunk = list_first_entry_or_null(&drm->dmem->chunk_free, 347 struct nouveau_dmem_chunk, 348 list); 349 if (chunk) 350 return chunk; 351 352 chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, 353 struct nouveau_dmem_chunk, 354 list); 355 if (chunk->bo) 356 return chunk; 357 358 return NULL; 359 } 360 361 static int 362 nouveau_dmem_pages_alloc(struct nouveau_drm *drm, 363 unsigned long npages, 364 unsigned long *pages) 365 { 366 struct nouveau_dmem_chunk *chunk; 367 unsigned long c; 368 int ret; 369 370 memset(pages, 0xff, npages * sizeof(*pages)); 371 372 mutex_lock(&drm->dmem->mutex); 373 for (c = 0; c < npages;) { 374 unsigned long i; 375 376 chunk = nouveau_dmem_chunk_first_free_locked(drm); 377 if (chunk == NULL) { 378 mutex_unlock(&drm->dmem->mutex); 379 ret = nouveau_dmem_chunk_alloc(drm); 380 if (ret) { 381 if (c) 382 break; 383 return ret; 384 } 385 continue; 386 } 387 388 spin_lock(&chunk->lock); 389 i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); 390 while (i < DMEM_CHUNK_NPAGES && c < npages) { 391 pages[c] = chunk->pfn_first + i; 392 set_bit(i, chunk->bitmap); 393 chunk->callocated++; 394 c++; 395 396 i = find_next_zero_bit(chunk->bitmap, 397 DMEM_CHUNK_NPAGES, i); 398 } 399 spin_unlock(&chunk->lock); 400 } 401 mutex_unlock(&drm->dmem->mutex); 402 403 return 0; 404 } 405 406 static struct page * 407 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) 408 { 409 unsigned long pfns[1]; 410 struct page *page; 411 int ret; 412 413 /* FIXME stop all the miss-match API ... */ 414 ret = nouveau_dmem_pages_alloc(drm, 1, pfns); 415 if (ret) 416 return NULL; 417 418 page = pfn_to_page(pfns[0]); 419 get_page(page); 420 lock_page(page); 421 return page; 422 } 423 424 static void 425 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) 426 { 427 unlock_page(page); 428 put_page(page); 429 } 430 431 void 432 nouveau_dmem_resume(struct nouveau_drm *drm) 433 { 434 struct nouveau_dmem_chunk *chunk; 435 int ret; 436 437 if (drm->dmem == NULL) 438 return; 439 440 mutex_lock(&drm->dmem->mutex); 441 list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { 442 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 443 /* FIXME handle pin failure */ 444 WARN_ON(ret); 445 } 446 list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { 447 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 448 /* FIXME handle pin failure */ 449 WARN_ON(ret); 450 } 451 mutex_unlock(&drm->dmem->mutex); 452 } 453 454 void 455 nouveau_dmem_suspend(struct nouveau_drm *drm) 456 { 457 struct nouveau_dmem_chunk *chunk; 458 459 if (drm->dmem == NULL) 460 return; 461 462 mutex_lock(&drm->dmem->mutex); 463 list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { 464 nouveau_bo_unpin(chunk->bo); 465 } 466 list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { 467 nouveau_bo_unpin(chunk->bo); 468 } 469 mutex_unlock(&drm->dmem->mutex); 470 } 471 472 void 473 nouveau_dmem_fini(struct nouveau_drm *drm) 474 { 475 struct nouveau_dmem_chunk *chunk, *tmp; 476 477 if (drm->dmem == NULL) 478 return; 479 480 mutex_lock(&drm->dmem->mutex); 481 482 WARN_ON(!list_empty(&drm->dmem->chunk_free)); 483 WARN_ON(!list_empty(&drm->dmem->chunk_full)); 484 485 list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { 486 if (chunk->bo) { 487 nouveau_bo_unpin(chunk->bo); 488 nouveau_bo_ref(NULL, &chunk->bo); 489 } 490 list_del(&chunk->list); 491 kfree(chunk); 492 } 493 494 mutex_unlock(&drm->dmem->mutex); 495 } 496 497 static int 498 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, 499 enum nouveau_aper dst_aper, u64 dst_addr, 500 enum nouveau_aper src_aper, u64 src_addr) 501 { 502 struct nouveau_channel *chan = drm->dmem->migrate.chan; 503 u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | 504 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | 505 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | 506 (1 << 2) /* FLUSH_ENABLE_TRUE. */ | 507 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; 508 int ret; 509 510 ret = RING_SPACE(chan, 13); 511 if (ret) 512 return ret; 513 514 if (src_aper != NOUVEAU_APER_VIRT) { 515 switch (src_aper) { 516 case NOUVEAU_APER_VRAM: 517 BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); 518 break; 519 case NOUVEAU_APER_HOST: 520 BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); 521 break; 522 default: 523 return -EINVAL; 524 } 525 launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ 526 } 527 528 if (dst_aper != NOUVEAU_APER_VIRT) { 529 switch (dst_aper) { 530 case NOUVEAU_APER_VRAM: 531 BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); 532 break; 533 case NOUVEAU_APER_HOST: 534 BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); 535 break; 536 default: 537 return -EINVAL; 538 } 539 launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ 540 } 541 542 BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); 543 OUT_RING (chan, upper_32_bits(src_addr)); 544 OUT_RING (chan, lower_32_bits(src_addr)); 545 OUT_RING (chan, upper_32_bits(dst_addr)); 546 OUT_RING (chan, lower_32_bits(dst_addr)); 547 OUT_RING (chan, PAGE_SIZE); 548 OUT_RING (chan, PAGE_SIZE); 549 OUT_RING (chan, PAGE_SIZE); 550 OUT_RING (chan, npages); 551 BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); 552 OUT_RING (chan, launch_dma); 553 return 0; 554 } 555 556 static int 557 nouveau_dmem_migrate_init(struct nouveau_drm *drm) 558 { 559 switch (drm->ttm.copy.oclass) { 560 case PASCAL_DMA_COPY_A: 561 case PASCAL_DMA_COPY_B: 562 case VOLTA_DMA_COPY_A: 563 case TURING_DMA_COPY_A: 564 drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; 565 drm->dmem->migrate.chan = drm->ttm.chan; 566 return 0; 567 default: 568 break; 569 } 570 return -ENODEV; 571 } 572 573 void 574 nouveau_dmem_init(struct nouveau_drm *drm) 575 { 576 struct device *device = drm->dev->dev; 577 struct resource *res; 578 unsigned long i, size, pfn_first; 579 int ret; 580 581 /* This only make sense on PASCAL or newer */ 582 if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) 583 return; 584 585 if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) 586 return; 587 588 drm->dmem->drm = drm; 589 mutex_init(&drm->dmem->mutex); 590 INIT_LIST_HEAD(&drm->dmem->chunk_free); 591 INIT_LIST_HEAD(&drm->dmem->chunk_full); 592 INIT_LIST_HEAD(&drm->dmem->chunk_empty); 593 594 size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); 595 596 /* Initialize migration dma helpers before registering memory */ 597 ret = nouveau_dmem_migrate_init(drm); 598 if (ret) 599 goto out_free; 600 601 /* 602 * FIXME we need some kind of policy to decide how much VRAM we 603 * want to register with HMM. For now just register everything 604 * and latter if we want to do thing like over commit then we 605 * could revisit this. 606 */ 607 res = devm_request_free_mem_region(device, &iomem_resource, size); 608 if (IS_ERR(res)) 609 goto out_free; 610 drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 611 drm->dmem->pagemap.res = *res; 612 drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; 613 if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) 614 goto out_free; 615 616 pfn_first = res->start >> PAGE_SHIFT; 617 for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { 618 struct nouveau_dmem_chunk *chunk; 619 struct page *page; 620 unsigned long j; 621 622 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 623 if (chunk == NULL) { 624 nouveau_dmem_fini(drm); 625 return; 626 } 627 628 chunk->drm = drm; 629 chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES); 630 list_add_tail(&chunk->list, &drm->dmem->chunk_empty); 631 632 page = pfn_to_page(chunk->pfn_first); 633 for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) 634 page->zone_device_data = chunk; 635 } 636 637 NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); 638 return; 639 out_free: 640 kfree(drm->dmem); 641 drm->dmem = NULL; 642 } 643 644 static void 645 nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, 646 const unsigned long *src_pfns, 647 unsigned long *dst_pfns, 648 unsigned long start, 649 unsigned long end, 650 void *private) 651 { 652 struct nouveau_migrate *migrate = private; 653 struct nouveau_drm *drm = migrate->drm; 654 struct device *dev = drm->dev->dev; 655 unsigned long addr, i, npages = 0; 656 nouveau_migrate_copy_t copy; 657 int ret; 658 659 /* First allocate new memory */ 660 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 661 struct page *dpage, *spage; 662 663 dst_pfns[i] = 0; 664 spage = migrate_pfn_to_page(src_pfns[i]); 665 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 666 continue; 667 668 dpage = nouveau_dmem_page_alloc_locked(drm); 669 if (!dpage) 670 continue; 671 672 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | 673 MIGRATE_PFN_LOCKED | 674 MIGRATE_PFN_DEVICE; 675 npages++; 676 } 677 678 if (!npages) 679 return; 680 681 /* Allocate storage for DMA addresses, so we can unmap later. */ 682 migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); 683 if (!migrate->dma) 684 goto error; 685 686 /* Copy things over */ 687 copy = drm->dmem->migrate.copy_func; 688 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 689 struct nouveau_dmem_chunk *chunk; 690 struct page *spage, *dpage; 691 u64 src_addr, dst_addr; 692 693 dpage = migrate_pfn_to_page(dst_pfns[i]); 694 if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) 695 continue; 696 697 chunk = dpage->zone_device_data; 698 dst_addr = page_to_pfn(dpage) - chunk->pfn_first; 699 dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; 700 701 spage = migrate_pfn_to_page(src_pfns[i]); 702 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { 703 nouveau_dmem_page_free_locked(drm, dpage); 704 dst_pfns[i] = 0; 705 continue; 706 } 707 708 migrate->dma[migrate->dma_nr] = 709 dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, 710 PCI_DMA_BIDIRECTIONAL, 711 DMA_ATTR_SKIP_CPU_SYNC); 712 if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { 713 nouveau_dmem_page_free_locked(drm, dpage); 714 dst_pfns[i] = 0; 715 continue; 716 } 717 718 src_addr = migrate->dma[migrate->dma_nr++]; 719 720 ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, 721 NOUVEAU_APER_HOST, src_addr); 722 if (ret) { 723 nouveau_dmem_page_free_locked(drm, dpage); 724 dst_pfns[i] = 0; 725 continue; 726 } 727 } 728 729 nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); 730 731 return; 732 733 error: 734 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { 735 struct page *page; 736 737 if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) 738 continue; 739 740 page = migrate_pfn_to_page(dst_pfns[i]); 741 dst_pfns[i] = MIGRATE_PFN_ERROR; 742 if (page == NULL) 743 continue; 744 745 __free_page(page); 746 } 747 } 748 749 void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, 750 const unsigned long *src_pfns, 751 const unsigned long *dst_pfns, 752 unsigned long start, 753 unsigned long end, 754 void *private) 755 { 756 struct nouveau_migrate *migrate = private; 757 struct nouveau_drm *drm = migrate->drm; 758 759 if (migrate->fence) { 760 nouveau_fence_wait(migrate->fence, true, false); 761 nouveau_fence_unref(&migrate->fence); 762 } else { 763 /* 764 * FIXME wait for channel to be IDLE before finalizing 765 * the hmem object below (nouveau_migrate_hmem_fini()) ? 766 */ 767 } 768 769 while (migrate->dma_nr--) { 770 dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], 771 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 772 } 773 kfree(migrate->dma); 774 775 /* 776 * FIXME optimization: update GPU page table to point to newly 777 * migrated memory. 778 */ 779 } 780 781 static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { 782 .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, 783 .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, 784 }; 785 786 int 787 nouveau_dmem_migrate_vma(struct nouveau_drm *drm, 788 struct vm_area_struct *vma, 789 unsigned long start, 790 unsigned long end) 791 { 792 unsigned long *src_pfns, *dst_pfns, npages; 793 struct nouveau_migrate migrate = {0}; 794 unsigned long i, c, max; 795 int ret = 0; 796 797 npages = (end - start) >> PAGE_SHIFT; 798 max = min(SG_MAX_SINGLE_ALLOC, npages); 799 src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 800 if (src_pfns == NULL) 801 return -ENOMEM; 802 dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 803 if (dst_pfns == NULL) { 804 kfree(src_pfns); 805 return -ENOMEM; 806 } 807 808 migrate.drm = drm; 809 migrate.vma = vma; 810 migrate.npages = npages; 811 for (i = 0; i < npages; i += c) { 812 unsigned long next; 813 814 c = min(SG_MAX_SINGLE_ALLOC, npages); 815 next = start + (c << PAGE_SHIFT); 816 ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, 817 next, src_pfns, dst_pfns, &migrate); 818 if (ret) 819 goto out; 820 start = next; 821 } 822 823 out: 824 kfree(dst_pfns); 825 kfree(src_pfns); 826 return ret; 827 } 828 829 static inline bool 830 nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) 831 { 832 return is_device_private_page(page) && drm->dmem == page_to_dmem(page); 833 } 834 835 void 836 nouveau_dmem_convert_pfn(struct nouveau_drm *drm, 837 struct hmm_range *range) 838 { 839 unsigned long i, npages; 840 841 npages = (range->end - range->start) >> PAGE_SHIFT; 842 for (i = 0; i < npages; ++i) { 843 struct nouveau_dmem_chunk *chunk; 844 struct page *page; 845 uint64_t addr; 846 847 page = hmm_pfn_to_page(range, range->pfns[i]); 848 if (page == NULL) 849 continue; 850 851 if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { 852 continue; 853 } 854 855 if (!nouveau_dmem_page(drm, page)) { 856 WARN(1, "Some unknown device memory !\n"); 857 range->pfns[i] = 0; 858 continue; 859 } 860 861 chunk = page->zone_device_data; 862 addr = page_to_pfn(page) - chunk->pfn_first; 863 addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; 864 865 range->pfns[i] &= ((1UL << range->pfn_shift) - 1); 866 range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; 867 } 868 } 869