1 /* 2 * Copyright 2018 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #include "nouveau_dmem.h" 23 #include "nouveau_drv.h" 24 #include "nouveau_chan.h" 25 #include "nouveau_dma.h" 26 #include "nouveau_mem.h" 27 #include "nouveau_bo.h" 28 29 #include <nvif/class.h> 30 #include <nvif/object.h> 31 #include <nvif/if500b.h> 32 #include <nvif/if900b.h> 33 34 #include <linux/sched/mm.h> 35 #include <linux/hmm.h> 36 37 /* 38 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin 39 * it in vram while in use. We likely want to overhaul memory management for 40 * nouveau to be more page like (not necessarily with system page size but a 41 * bigger page size) at lowest level and have some shim layer on top that would 42 * provide the same functionality as TTM. 43 */ 44 #define DMEM_CHUNK_SIZE (2UL << 20) 45 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT) 46 47 struct nouveau_migrate; 48 49 enum nouveau_aper { 50 NOUVEAU_APER_VIRT, 51 NOUVEAU_APER_VRAM, 52 NOUVEAU_APER_HOST, 53 }; 54 55 typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages, 56 enum nouveau_aper, u64 dst_addr, 57 enum nouveau_aper, u64 src_addr); 58 59 struct nouveau_dmem_chunk { 60 struct list_head list; 61 struct nouveau_bo *bo; 62 struct nouveau_drm *drm; 63 unsigned long pfn_first; 64 unsigned long callocated; 65 unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; 66 spinlock_t lock; 67 }; 68 69 struct nouveau_dmem_migrate { 70 nouveau_migrate_copy_t copy_func; 71 struct nouveau_channel *chan; 72 }; 73 74 struct nouveau_dmem { 75 struct hmm_devmem *devmem; 76 struct nouveau_dmem_migrate migrate; 77 struct list_head chunk_free; 78 struct list_head chunk_full; 79 struct list_head chunk_empty; 80 struct mutex mutex; 81 }; 82 83 struct nouveau_dmem_fault { 84 struct nouveau_drm *drm; 85 struct nouveau_fence *fence; 86 dma_addr_t *dma; 87 unsigned long npages; 88 }; 89 90 struct nouveau_migrate { 91 struct vm_area_struct *vma; 92 struct nouveau_drm *drm; 93 struct nouveau_fence *fence; 94 unsigned long npages; 95 dma_addr_t *dma; 96 unsigned long dma_nr; 97 }; 98 99 static void 100 nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) 101 { 102 struct nouveau_dmem_chunk *chunk; 103 unsigned long idx; 104 105 chunk = (void *)hmm_devmem_page_get_drvdata(page); 106 idx = page_to_pfn(page) - chunk->pfn_first; 107 108 /* 109 * FIXME: 110 * 111 * This is really a bad example, we need to overhaul nouveau memory 112 * management to be more page focus and allow lighter locking scheme 113 * to be use in the process. 114 */ 115 spin_lock(&chunk->lock); 116 clear_bit(idx, chunk->bitmap); 117 WARN_ON(!chunk->callocated); 118 chunk->callocated--; 119 /* 120 * FIXME when chunk->callocated reach 0 we should add the chunk to 121 * a reclaim list so that it can be freed in case of memory pressure. 122 */ 123 spin_unlock(&chunk->lock); 124 } 125 126 static void 127 nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma, 128 const unsigned long *src_pfns, 129 unsigned long *dst_pfns, 130 unsigned long start, 131 unsigned long end, 132 void *private) 133 { 134 struct nouveau_dmem_fault *fault = private; 135 struct nouveau_drm *drm = fault->drm; 136 struct device *dev = drm->dev->dev; 137 unsigned long addr, i, npages = 0; 138 nouveau_migrate_copy_t copy; 139 int ret; 140 141 142 /* First allocate new memory */ 143 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 144 struct page *dpage, *spage; 145 146 dst_pfns[i] = 0; 147 spage = migrate_pfn_to_page(src_pfns[i]); 148 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 149 continue; 150 151 dpage = hmm_vma_alloc_locked_page(vma, addr); 152 if (!dpage) { 153 dst_pfns[i] = MIGRATE_PFN_ERROR; 154 continue; 155 } 156 157 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | 158 MIGRATE_PFN_LOCKED; 159 npages++; 160 } 161 162 /* Allocate storage for DMA addresses, so we can unmap later. */ 163 fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL); 164 if (!fault->dma) 165 goto error; 166 167 /* Copy things over */ 168 copy = drm->dmem->migrate.copy_func; 169 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 170 struct nouveau_dmem_chunk *chunk; 171 struct page *spage, *dpage; 172 u64 src_addr, dst_addr; 173 174 dpage = migrate_pfn_to_page(dst_pfns[i]); 175 if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) 176 continue; 177 178 spage = migrate_pfn_to_page(src_pfns[i]); 179 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { 180 dst_pfns[i] = MIGRATE_PFN_ERROR; 181 __free_page(dpage); 182 continue; 183 } 184 185 fault->dma[fault->npages] = 186 dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE, 187 PCI_DMA_BIDIRECTIONAL, 188 DMA_ATTR_SKIP_CPU_SYNC); 189 if (dma_mapping_error(dev, fault->dma[fault->npages])) { 190 dst_pfns[i] = MIGRATE_PFN_ERROR; 191 __free_page(dpage); 192 continue; 193 } 194 195 dst_addr = fault->dma[fault->npages++]; 196 197 chunk = (void *)hmm_devmem_page_get_drvdata(spage); 198 src_addr = page_to_pfn(spage) - chunk->pfn_first; 199 src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset; 200 201 ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr, 202 NOUVEAU_APER_VRAM, src_addr); 203 if (ret) { 204 dst_pfns[i] = MIGRATE_PFN_ERROR; 205 __free_page(dpage); 206 continue; 207 } 208 } 209 210 nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence); 211 212 return; 213 214 error: 215 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { 216 struct page *page; 217 218 if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) 219 continue; 220 221 page = migrate_pfn_to_page(dst_pfns[i]); 222 dst_pfns[i] = MIGRATE_PFN_ERROR; 223 if (page == NULL) 224 continue; 225 226 __free_page(page); 227 } 228 } 229 230 void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma, 231 const unsigned long *src_pfns, 232 const unsigned long *dst_pfns, 233 unsigned long start, 234 unsigned long end, 235 void *private) 236 { 237 struct nouveau_dmem_fault *fault = private; 238 struct nouveau_drm *drm = fault->drm; 239 240 if (fault->fence) { 241 nouveau_fence_wait(fault->fence, true, false); 242 nouveau_fence_unref(&fault->fence); 243 } else { 244 /* 245 * FIXME wait for channel to be IDLE before calling finalizing 246 * the hmem object below (nouveau_migrate_hmem_fini()). 247 */ 248 } 249 250 while (fault->npages--) { 251 dma_unmap_page(drm->dev->dev, fault->dma[fault->npages], 252 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 253 } 254 kfree(fault->dma); 255 } 256 257 static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = { 258 .alloc_and_copy = nouveau_dmem_fault_alloc_and_copy, 259 .finalize_and_map = nouveau_dmem_fault_finalize_and_map, 260 }; 261 262 static vm_fault_t 263 nouveau_dmem_fault(struct hmm_devmem *devmem, 264 struct vm_area_struct *vma, 265 unsigned long addr, 266 const struct page *page, 267 unsigned int flags, 268 pmd_t *pmdp) 269 { 270 struct drm_device *drm_dev = dev_get_drvdata(devmem->device); 271 unsigned long src[1] = {0}, dst[1] = {0}; 272 struct nouveau_dmem_fault fault = {0}; 273 int ret; 274 275 276 277 /* 278 * FIXME what we really want is to find some heuristic to migrate more 279 * than just one page on CPU fault. When such fault happens it is very 280 * likely that more surrounding page will CPU fault too. 281 */ 282 fault.drm = nouveau_drm(drm_dev); 283 ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr, 284 addr + PAGE_SIZE, src, dst, &fault); 285 if (ret) 286 return VM_FAULT_SIGBUS; 287 288 if (dst[0] == MIGRATE_PFN_ERROR) 289 return VM_FAULT_SIGBUS; 290 291 return 0; 292 } 293 294 static const struct hmm_devmem_ops 295 nouveau_dmem_devmem_ops = { 296 .free = nouveau_dmem_free, 297 .fault = nouveau_dmem_fault, 298 }; 299 300 static int 301 nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) 302 { 303 struct nouveau_dmem_chunk *chunk; 304 int ret; 305 306 if (drm->dmem == NULL) 307 return -EINVAL; 308 309 mutex_lock(&drm->dmem->mutex); 310 chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, 311 struct nouveau_dmem_chunk, 312 list); 313 if (chunk == NULL) { 314 mutex_unlock(&drm->dmem->mutex); 315 return -ENOMEM; 316 } 317 318 list_del(&chunk->list); 319 mutex_unlock(&drm->dmem->mutex); 320 321 ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, 322 TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, 323 &chunk->bo); 324 if (ret) 325 goto out; 326 327 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 328 if (ret) { 329 nouveau_bo_ref(NULL, &chunk->bo); 330 goto out; 331 } 332 333 bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); 334 spin_lock_init(&chunk->lock); 335 336 out: 337 mutex_lock(&drm->dmem->mutex); 338 if (chunk->bo) 339 list_add(&chunk->list, &drm->dmem->chunk_empty); 340 else 341 list_add_tail(&chunk->list, &drm->dmem->chunk_empty); 342 mutex_unlock(&drm->dmem->mutex); 343 344 return ret; 345 } 346 347 static struct nouveau_dmem_chunk * 348 nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) 349 { 350 struct nouveau_dmem_chunk *chunk; 351 352 chunk = list_first_entry_or_null(&drm->dmem->chunk_free, 353 struct nouveau_dmem_chunk, 354 list); 355 if (chunk) 356 return chunk; 357 358 chunk = list_first_entry_or_null(&drm->dmem->chunk_empty, 359 struct nouveau_dmem_chunk, 360 list); 361 if (chunk->bo) 362 return chunk; 363 364 return NULL; 365 } 366 367 static int 368 nouveau_dmem_pages_alloc(struct nouveau_drm *drm, 369 unsigned long npages, 370 unsigned long *pages) 371 { 372 struct nouveau_dmem_chunk *chunk; 373 unsigned long c; 374 int ret; 375 376 memset(pages, 0xff, npages * sizeof(*pages)); 377 378 mutex_lock(&drm->dmem->mutex); 379 for (c = 0; c < npages;) { 380 unsigned long i; 381 382 chunk = nouveau_dmem_chunk_first_free_locked(drm); 383 if (chunk == NULL) { 384 mutex_unlock(&drm->dmem->mutex); 385 ret = nouveau_dmem_chunk_alloc(drm); 386 if (ret) { 387 if (c) 388 break; 389 return ret; 390 } 391 continue; 392 } 393 394 spin_lock(&chunk->lock); 395 i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES); 396 while (i < DMEM_CHUNK_NPAGES && c < npages) { 397 pages[c] = chunk->pfn_first + i; 398 set_bit(i, chunk->bitmap); 399 chunk->callocated++; 400 c++; 401 402 i = find_next_zero_bit(chunk->bitmap, 403 DMEM_CHUNK_NPAGES, i); 404 } 405 spin_unlock(&chunk->lock); 406 } 407 mutex_unlock(&drm->dmem->mutex); 408 409 return 0; 410 } 411 412 static struct page * 413 nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) 414 { 415 unsigned long pfns[1]; 416 struct page *page; 417 int ret; 418 419 /* FIXME stop all the miss-match API ... */ 420 ret = nouveau_dmem_pages_alloc(drm, 1, pfns); 421 if (ret) 422 return NULL; 423 424 page = pfn_to_page(pfns[0]); 425 get_page(page); 426 lock_page(page); 427 return page; 428 } 429 430 static void 431 nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page) 432 { 433 unlock_page(page); 434 put_page(page); 435 } 436 437 void 438 nouveau_dmem_resume(struct nouveau_drm *drm) 439 { 440 struct nouveau_dmem_chunk *chunk; 441 int ret; 442 443 if (drm->dmem == NULL) 444 return; 445 446 mutex_lock(&drm->dmem->mutex); 447 list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { 448 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 449 /* FIXME handle pin failure */ 450 WARN_ON(ret); 451 } 452 list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { 453 ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); 454 /* FIXME handle pin failure */ 455 WARN_ON(ret); 456 } 457 mutex_unlock(&drm->dmem->mutex); 458 } 459 460 void 461 nouveau_dmem_suspend(struct nouveau_drm *drm) 462 { 463 struct nouveau_dmem_chunk *chunk; 464 465 if (drm->dmem == NULL) 466 return; 467 468 mutex_lock(&drm->dmem->mutex); 469 list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { 470 nouveau_bo_unpin(chunk->bo); 471 } 472 list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { 473 nouveau_bo_unpin(chunk->bo); 474 } 475 mutex_unlock(&drm->dmem->mutex); 476 } 477 478 void 479 nouveau_dmem_fini(struct nouveau_drm *drm) 480 { 481 struct nouveau_dmem_chunk *chunk, *tmp; 482 483 if (drm->dmem == NULL) 484 return; 485 486 mutex_lock(&drm->dmem->mutex); 487 488 WARN_ON(!list_empty(&drm->dmem->chunk_free)); 489 WARN_ON(!list_empty(&drm->dmem->chunk_full)); 490 491 list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) { 492 if (chunk->bo) { 493 nouveau_bo_unpin(chunk->bo); 494 nouveau_bo_ref(NULL, &chunk->bo); 495 } 496 list_del(&chunk->list); 497 kfree(chunk); 498 } 499 500 mutex_unlock(&drm->dmem->mutex); 501 } 502 503 static int 504 nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages, 505 enum nouveau_aper dst_aper, u64 dst_addr, 506 enum nouveau_aper src_aper, u64 src_addr) 507 { 508 struct nouveau_channel *chan = drm->dmem->migrate.chan; 509 u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ | 510 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ | 511 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ | 512 (1 << 2) /* FLUSH_ENABLE_TRUE. */ | 513 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */; 514 int ret; 515 516 ret = RING_SPACE(chan, 13); 517 if (ret) 518 return ret; 519 520 if (src_aper != NOUVEAU_APER_VIRT) { 521 switch (src_aper) { 522 case NOUVEAU_APER_VRAM: 523 BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0); 524 break; 525 case NOUVEAU_APER_HOST: 526 BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1); 527 break; 528 default: 529 return -EINVAL; 530 } 531 launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */ 532 } 533 534 if (dst_aper != NOUVEAU_APER_VIRT) { 535 switch (dst_aper) { 536 case NOUVEAU_APER_VRAM: 537 BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0); 538 break; 539 case NOUVEAU_APER_HOST: 540 BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1); 541 break; 542 default: 543 return -EINVAL; 544 } 545 launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */ 546 } 547 548 BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8); 549 OUT_RING (chan, upper_32_bits(src_addr)); 550 OUT_RING (chan, lower_32_bits(src_addr)); 551 OUT_RING (chan, upper_32_bits(dst_addr)); 552 OUT_RING (chan, lower_32_bits(dst_addr)); 553 OUT_RING (chan, PAGE_SIZE); 554 OUT_RING (chan, PAGE_SIZE); 555 OUT_RING (chan, PAGE_SIZE); 556 OUT_RING (chan, npages); 557 BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1); 558 OUT_RING (chan, launch_dma); 559 return 0; 560 } 561 562 static int 563 nouveau_dmem_migrate_init(struct nouveau_drm *drm) 564 { 565 switch (drm->ttm.copy.oclass) { 566 case PASCAL_DMA_COPY_A: 567 case PASCAL_DMA_COPY_B: 568 case VOLTA_DMA_COPY_A: 569 case TURING_DMA_COPY_A: 570 drm->dmem->migrate.copy_func = nvc0b5_migrate_copy; 571 drm->dmem->migrate.chan = drm->ttm.chan; 572 return 0; 573 default: 574 break; 575 } 576 return -ENODEV; 577 } 578 579 void 580 nouveau_dmem_init(struct nouveau_drm *drm) 581 { 582 struct device *device = drm->dev->dev; 583 unsigned long i, size; 584 int ret; 585 586 /* This only make sense on PASCAL or newer */ 587 if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL) 588 return; 589 590 if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL))) 591 return; 592 593 mutex_init(&drm->dmem->mutex); 594 INIT_LIST_HEAD(&drm->dmem->chunk_free); 595 INIT_LIST_HEAD(&drm->dmem->chunk_full); 596 INIT_LIST_HEAD(&drm->dmem->chunk_empty); 597 598 size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE); 599 600 /* Initialize migration dma helpers before registering memory */ 601 ret = nouveau_dmem_migrate_init(drm); 602 if (ret) { 603 kfree(drm->dmem); 604 drm->dmem = NULL; 605 return; 606 } 607 608 /* 609 * FIXME we need some kind of policy to decide how much VRAM we 610 * want to register with HMM. For now just register everything 611 * and latter if we want to do thing like over commit then we 612 * could revisit this. 613 */ 614 drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops, 615 device, size); 616 if (IS_ERR(drm->dmem->devmem)) { 617 kfree(drm->dmem); 618 drm->dmem = NULL; 619 return; 620 } 621 622 for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) { 623 struct nouveau_dmem_chunk *chunk; 624 struct page *page; 625 unsigned long j; 626 627 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 628 if (chunk == NULL) { 629 nouveau_dmem_fini(drm); 630 return; 631 } 632 633 chunk->drm = drm; 634 chunk->pfn_first = drm->dmem->devmem->pfn_first; 635 chunk->pfn_first += (i * DMEM_CHUNK_NPAGES); 636 list_add_tail(&chunk->list, &drm->dmem->chunk_empty); 637 638 page = pfn_to_page(chunk->pfn_first); 639 for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) { 640 hmm_devmem_page_set_drvdata(page, (long)chunk); 641 } 642 } 643 644 NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20); 645 } 646 647 static void 648 nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma, 649 const unsigned long *src_pfns, 650 unsigned long *dst_pfns, 651 unsigned long start, 652 unsigned long end, 653 void *private) 654 { 655 struct nouveau_migrate *migrate = private; 656 struct nouveau_drm *drm = migrate->drm; 657 struct device *dev = drm->dev->dev; 658 unsigned long addr, i, npages = 0; 659 nouveau_migrate_copy_t copy; 660 int ret; 661 662 /* First allocate new memory */ 663 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 664 struct page *dpage, *spage; 665 666 dst_pfns[i] = 0; 667 spage = migrate_pfn_to_page(src_pfns[i]); 668 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 669 continue; 670 671 dpage = nouveau_dmem_page_alloc_locked(drm); 672 if (!dpage) 673 continue; 674 675 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) | 676 MIGRATE_PFN_LOCKED | 677 MIGRATE_PFN_DEVICE; 678 npages++; 679 } 680 681 if (!npages) 682 return; 683 684 /* Allocate storage for DMA addresses, so we can unmap later. */ 685 migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL); 686 if (!migrate->dma) 687 goto error; 688 689 /* Copy things over */ 690 copy = drm->dmem->migrate.copy_func; 691 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) { 692 struct nouveau_dmem_chunk *chunk; 693 struct page *spage, *dpage; 694 u64 src_addr, dst_addr; 695 696 dpage = migrate_pfn_to_page(dst_pfns[i]); 697 if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR) 698 continue; 699 700 chunk = (void *)hmm_devmem_page_get_drvdata(dpage); 701 dst_addr = page_to_pfn(dpage) - chunk->pfn_first; 702 dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset; 703 704 spage = migrate_pfn_to_page(src_pfns[i]); 705 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) { 706 nouveau_dmem_page_free_locked(drm, dpage); 707 dst_pfns[i] = 0; 708 continue; 709 } 710 711 migrate->dma[migrate->dma_nr] = 712 dma_map_page_attrs(dev, spage, 0, PAGE_SIZE, 713 PCI_DMA_BIDIRECTIONAL, 714 DMA_ATTR_SKIP_CPU_SYNC); 715 if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) { 716 nouveau_dmem_page_free_locked(drm, dpage); 717 dst_pfns[i] = 0; 718 continue; 719 } 720 721 src_addr = migrate->dma[migrate->dma_nr++]; 722 723 ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr, 724 NOUVEAU_APER_HOST, src_addr); 725 if (ret) { 726 nouveau_dmem_page_free_locked(drm, dpage); 727 dst_pfns[i] = 0; 728 continue; 729 } 730 } 731 732 nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence); 733 734 return; 735 736 error: 737 for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) { 738 struct page *page; 739 740 if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR) 741 continue; 742 743 page = migrate_pfn_to_page(dst_pfns[i]); 744 dst_pfns[i] = MIGRATE_PFN_ERROR; 745 if (page == NULL) 746 continue; 747 748 __free_page(page); 749 } 750 } 751 752 void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma, 753 const unsigned long *src_pfns, 754 const unsigned long *dst_pfns, 755 unsigned long start, 756 unsigned long end, 757 void *private) 758 { 759 struct nouveau_migrate *migrate = private; 760 struct nouveau_drm *drm = migrate->drm; 761 762 if (migrate->fence) { 763 nouveau_fence_wait(migrate->fence, true, false); 764 nouveau_fence_unref(&migrate->fence); 765 } else { 766 /* 767 * FIXME wait for channel to be IDLE before finalizing 768 * the hmem object below (nouveau_migrate_hmem_fini()) ? 769 */ 770 } 771 772 while (migrate->dma_nr--) { 773 dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr], 774 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 775 } 776 kfree(migrate->dma); 777 778 /* 779 * FIXME optimization: update GPU page table to point to newly 780 * migrated memory. 781 */ 782 } 783 784 static const struct migrate_vma_ops nouveau_dmem_migrate_ops = { 785 .alloc_and_copy = nouveau_dmem_migrate_alloc_and_copy, 786 .finalize_and_map = nouveau_dmem_migrate_finalize_and_map, 787 }; 788 789 int 790 nouveau_dmem_migrate_vma(struct nouveau_drm *drm, 791 struct vm_area_struct *vma, 792 unsigned long start, 793 unsigned long end) 794 { 795 unsigned long *src_pfns, *dst_pfns, npages; 796 struct nouveau_migrate migrate = {0}; 797 unsigned long i, c, max; 798 int ret = 0; 799 800 npages = (end - start) >> PAGE_SHIFT; 801 max = min(SG_MAX_SINGLE_ALLOC, npages); 802 src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 803 if (src_pfns == NULL) 804 return -ENOMEM; 805 dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL); 806 if (dst_pfns == NULL) { 807 kfree(src_pfns); 808 return -ENOMEM; 809 } 810 811 migrate.drm = drm; 812 migrate.vma = vma; 813 migrate.npages = npages; 814 for (i = 0; i < npages; i += c) { 815 unsigned long next; 816 817 c = min(SG_MAX_SINGLE_ALLOC, npages); 818 next = start + (c << PAGE_SHIFT); 819 ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start, 820 next, src_pfns, dst_pfns, &migrate); 821 if (ret) 822 goto out; 823 start = next; 824 } 825 826 out: 827 kfree(dst_pfns); 828 kfree(src_pfns); 829 return ret; 830 } 831 832 static inline bool 833 nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) 834 { 835 if (!is_device_private_page(page)) 836 return false; 837 838 if (drm->dmem->devmem != page->pgmap->data) 839 return false; 840 841 return true; 842 } 843 844 void 845 nouveau_dmem_convert_pfn(struct nouveau_drm *drm, 846 struct hmm_range *range) 847 { 848 unsigned long i, npages; 849 850 npages = (range->end - range->start) >> PAGE_SHIFT; 851 for (i = 0; i < npages; ++i) { 852 struct nouveau_dmem_chunk *chunk; 853 struct page *page; 854 uint64_t addr; 855 856 page = hmm_pfn_to_page(range, range->pfns[i]); 857 if (page == NULL) 858 continue; 859 860 if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { 861 continue; 862 } 863 864 if (!nouveau_dmem_page(drm, page)) { 865 WARN(1, "Some unknown device memory !\n"); 866 range->pfns[i] = 0; 867 continue; 868 } 869 870 chunk = (void *)hmm_devmem_page_get_drvdata(page); 871 addr = page_to_pfn(page) - chunk->pfn_first; 872 addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT; 873 874 range->pfns[i] &= ((1UL << range->pfn_shift) - 1); 875 range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; 876 } 877 } 878