1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This is a module to test the HMM (Heterogeneous Memory Management) 4 * mirror and zone device private memory migration APIs of the kernel. 5 * Userspace programs can register with the driver to mirror their own address 6 * space and can use the device to read/write any valid virtual address. 7 */ 8 #include <linux/init.h> 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/module.h> 12 #include <linux/kernel.h> 13 #include <linux/cdev.h> 14 #include <linux/device.h> 15 #include <linux/memremap.h> 16 #include <linux/mutex.h> 17 #include <linux/rwsem.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 #include <linux/highmem.h> 21 #include <linux/delay.h> 22 #include <linux/pagemap.h> 23 #include <linux/hmm.h> 24 #include <linux/vmalloc.h> 25 #include <linux/swap.h> 26 #include <linux/swapops.h> 27 #include <linux/sched/mm.h> 28 #include <linux/platform_device.h> 29 #include <linux/rmap.h> 30 #include <linux/mmu_notifier.h> 31 #include <linux/migrate.h> 32 33 #include "test_hmm_uapi.h" 34 35 #define DMIRROR_NDEVICES 4 36 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 37 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) 38 #define DEVMEM_CHUNKS_RESERVE 16 39 40 /* 41 * For device_private pages, dpage is just a dummy struct page 42 * representing a piece of device memory. dmirror_devmem_alloc_page 43 * allocates a real system memory page as backing storage to fake a 44 * real device. zone_device_data points to that backing page. But 45 * for device_coherent memory, the struct page represents real 46 * physical CPU-accessible memory that we can use directly. 47 */ 48 #define BACKING_PAGE(page) (is_device_private_page((page)) ? \ 49 (page)->zone_device_data : (page)) 50 51 static unsigned long spm_addr_dev0; 52 module_param(spm_addr_dev0, long, 0644); 53 MODULE_PARM_DESC(spm_addr_dev0, 54 "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 55 56 static unsigned long spm_addr_dev1; 57 module_param(spm_addr_dev1, long, 0644); 58 MODULE_PARM_DESC(spm_addr_dev1, 59 "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); 60 61 static const struct dev_pagemap_ops dmirror_devmem_ops; 62 static const struct mmu_interval_notifier_ops dmirror_min_ops; 63 static dev_t dmirror_dev; 64 65 struct dmirror_device; 66 67 struct dmirror_bounce { 68 void *ptr; 69 unsigned long size; 70 unsigned long addr; 71 unsigned long cpages; 72 }; 73 74 #define DPT_XA_TAG_ATOMIC 1UL 75 #define DPT_XA_TAG_WRITE 3UL 76 77 /* 78 * Data structure to track address ranges and register for mmu interval 79 * notifier updates. 80 */ 81 struct dmirror_interval { 82 struct mmu_interval_notifier notifier; 83 struct dmirror *dmirror; 84 }; 85 86 /* 87 * Data attached to the open device file. 88 * Note that it might be shared after a fork(). 89 */ 90 struct dmirror { 91 struct dmirror_device *mdevice; 92 struct xarray pt; 93 struct mmu_interval_notifier notifier; 94 struct mutex mutex; 95 }; 96 97 /* 98 * ZONE_DEVICE pages for migration and simulating device memory. 99 */ 100 struct dmirror_chunk { 101 struct dev_pagemap pagemap; 102 struct dmirror_device *mdevice; 103 bool remove; 104 }; 105 106 /* 107 * Per device data. 108 */ 109 struct dmirror_device { 110 struct cdev cdevice; 111 unsigned int zone_device_type; 112 struct device device; 113 114 unsigned int devmem_capacity; 115 unsigned int devmem_count; 116 struct dmirror_chunk **devmem_chunks; 117 struct mutex devmem_lock; /* protects the above */ 118 119 unsigned long calloc; 120 unsigned long cfree; 121 struct page *free_pages; 122 spinlock_t lock; /* protects the above */ 123 }; 124 125 static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; 126 127 static int dmirror_bounce_init(struct dmirror_bounce *bounce, 128 unsigned long addr, 129 unsigned long size) 130 { 131 bounce->addr = addr; 132 bounce->size = size; 133 bounce->cpages = 0; 134 bounce->ptr = vmalloc(size); 135 if (!bounce->ptr) 136 return -ENOMEM; 137 return 0; 138 } 139 140 static bool dmirror_is_private_zone(struct dmirror_device *mdevice) 141 { 142 return (mdevice->zone_device_type == 143 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; 144 } 145 146 static enum migrate_vma_direction 147 dmirror_select_device(struct dmirror *dmirror) 148 { 149 return (dmirror->mdevice->zone_device_type == 150 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? 151 MIGRATE_VMA_SELECT_DEVICE_PRIVATE : 152 MIGRATE_VMA_SELECT_DEVICE_COHERENT; 153 } 154 155 static void dmirror_bounce_fini(struct dmirror_bounce *bounce) 156 { 157 vfree(bounce->ptr); 158 } 159 160 static int dmirror_fops_open(struct inode *inode, struct file *filp) 161 { 162 struct cdev *cdev = inode->i_cdev; 163 struct dmirror *dmirror; 164 int ret; 165 166 /* Mirror this process address space */ 167 dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); 168 if (dmirror == NULL) 169 return -ENOMEM; 170 171 dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); 172 mutex_init(&dmirror->mutex); 173 xa_init(&dmirror->pt); 174 175 ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, 176 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); 177 if (ret) { 178 kfree(dmirror); 179 return ret; 180 } 181 182 filp->private_data = dmirror; 183 return 0; 184 } 185 186 static int dmirror_fops_release(struct inode *inode, struct file *filp) 187 { 188 struct dmirror *dmirror = filp->private_data; 189 190 mmu_interval_notifier_remove(&dmirror->notifier); 191 xa_destroy(&dmirror->pt); 192 kfree(dmirror); 193 return 0; 194 } 195 196 static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page) 197 { 198 return container_of(page->pgmap, struct dmirror_chunk, pagemap); 199 } 200 201 static struct dmirror_device *dmirror_page_to_device(struct page *page) 202 203 { 204 return dmirror_page_to_chunk(page)->mdevice; 205 } 206 207 static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) 208 { 209 unsigned long *pfns = range->hmm_pfns; 210 unsigned long pfn; 211 212 for (pfn = (range->start >> PAGE_SHIFT); 213 pfn < (range->end >> PAGE_SHIFT); 214 pfn++, pfns++) { 215 struct page *page; 216 void *entry; 217 218 /* 219 * Since we asked for hmm_range_fault() to populate pages, 220 * it shouldn't return an error entry on success. 221 */ 222 WARN_ON(*pfns & HMM_PFN_ERROR); 223 WARN_ON(!(*pfns & HMM_PFN_VALID)); 224 225 page = hmm_pfn_to_page(*pfns); 226 WARN_ON(!page); 227 228 entry = page; 229 if (*pfns & HMM_PFN_WRITE) 230 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 231 else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) 232 return -EFAULT; 233 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 234 if (xa_is_err(entry)) 235 return xa_err(entry); 236 } 237 238 return 0; 239 } 240 241 static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, 242 unsigned long end) 243 { 244 unsigned long pfn; 245 void *entry; 246 247 /* 248 * The XArray doesn't hold references to pages since it relies on 249 * the mmu notifier to clear page pointers when they become stale. 250 * Therefore, it is OK to just clear the entry. 251 */ 252 xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, 253 end >> PAGE_SHIFT) 254 xa_erase(&dmirror->pt, pfn); 255 } 256 257 static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, 258 const struct mmu_notifier_range *range, 259 unsigned long cur_seq) 260 { 261 struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); 262 263 /* 264 * Ignore invalidation callbacks for device private pages since 265 * the invalidation is handled as part of the migration process. 266 */ 267 if (range->event == MMU_NOTIFY_MIGRATE && 268 range->owner == dmirror->mdevice) 269 return true; 270 271 if (mmu_notifier_range_blockable(range)) 272 mutex_lock(&dmirror->mutex); 273 else if (!mutex_trylock(&dmirror->mutex)) 274 return false; 275 276 mmu_interval_set_seq(mni, cur_seq); 277 dmirror_do_update(dmirror, range->start, range->end); 278 279 mutex_unlock(&dmirror->mutex); 280 return true; 281 } 282 283 static const struct mmu_interval_notifier_ops dmirror_min_ops = { 284 .invalidate = dmirror_interval_invalidate, 285 }; 286 287 static int dmirror_range_fault(struct dmirror *dmirror, 288 struct hmm_range *range) 289 { 290 struct mm_struct *mm = dmirror->notifier.mm; 291 unsigned long timeout = 292 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 293 int ret; 294 295 while (true) { 296 if (time_after(jiffies, timeout)) { 297 ret = -EBUSY; 298 goto out; 299 } 300 301 range->notifier_seq = mmu_interval_read_begin(range->notifier); 302 mmap_read_lock(mm); 303 ret = hmm_range_fault(range); 304 mmap_read_unlock(mm); 305 if (ret) { 306 if (ret == -EBUSY) 307 continue; 308 goto out; 309 } 310 311 mutex_lock(&dmirror->mutex); 312 if (mmu_interval_read_retry(range->notifier, 313 range->notifier_seq)) { 314 mutex_unlock(&dmirror->mutex); 315 continue; 316 } 317 break; 318 } 319 320 ret = dmirror_do_fault(dmirror, range); 321 322 mutex_unlock(&dmirror->mutex); 323 out: 324 return ret; 325 } 326 327 static int dmirror_fault(struct dmirror *dmirror, unsigned long start, 328 unsigned long end, bool write) 329 { 330 struct mm_struct *mm = dmirror->notifier.mm; 331 unsigned long addr; 332 unsigned long pfns[64]; 333 struct hmm_range range = { 334 .notifier = &dmirror->notifier, 335 .hmm_pfns = pfns, 336 .pfn_flags_mask = 0, 337 .default_flags = 338 HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), 339 .dev_private_owner = dmirror->mdevice, 340 }; 341 int ret = 0; 342 343 /* Since the mm is for the mirrored process, get a reference first. */ 344 if (!mmget_not_zero(mm)) 345 return 0; 346 347 for (addr = start; addr < end; addr = range.end) { 348 range.start = addr; 349 range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 350 351 ret = dmirror_range_fault(dmirror, &range); 352 if (ret) 353 break; 354 } 355 356 mmput(mm); 357 return ret; 358 } 359 360 static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, 361 unsigned long end, struct dmirror_bounce *bounce) 362 { 363 unsigned long pfn; 364 void *ptr; 365 366 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 367 368 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 369 void *entry; 370 struct page *page; 371 void *tmp; 372 373 entry = xa_load(&dmirror->pt, pfn); 374 page = xa_untag_pointer(entry); 375 if (!page) 376 return -ENOENT; 377 378 tmp = kmap(page); 379 memcpy(ptr, tmp, PAGE_SIZE); 380 kunmap(page); 381 382 ptr += PAGE_SIZE; 383 bounce->cpages++; 384 } 385 386 return 0; 387 } 388 389 static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 390 { 391 struct dmirror_bounce bounce; 392 unsigned long start, end; 393 unsigned long size = cmd->npages << PAGE_SHIFT; 394 int ret; 395 396 start = cmd->addr; 397 end = start + size; 398 if (end < start) 399 return -EINVAL; 400 401 ret = dmirror_bounce_init(&bounce, start, size); 402 if (ret) 403 return ret; 404 405 while (1) { 406 mutex_lock(&dmirror->mutex); 407 ret = dmirror_do_read(dmirror, start, end, &bounce); 408 mutex_unlock(&dmirror->mutex); 409 if (ret != -ENOENT) 410 break; 411 412 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 413 ret = dmirror_fault(dmirror, start, end, false); 414 if (ret) 415 break; 416 cmd->faults++; 417 } 418 419 if (ret == 0) { 420 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 421 bounce.size)) 422 ret = -EFAULT; 423 } 424 cmd->cpages = bounce.cpages; 425 dmirror_bounce_fini(&bounce); 426 return ret; 427 } 428 429 static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, 430 unsigned long end, struct dmirror_bounce *bounce) 431 { 432 unsigned long pfn; 433 void *ptr; 434 435 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 436 437 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 438 void *entry; 439 struct page *page; 440 void *tmp; 441 442 entry = xa_load(&dmirror->pt, pfn); 443 page = xa_untag_pointer(entry); 444 if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) 445 return -ENOENT; 446 447 tmp = kmap(page); 448 memcpy(tmp, ptr, PAGE_SIZE); 449 kunmap(page); 450 451 ptr += PAGE_SIZE; 452 bounce->cpages++; 453 } 454 455 return 0; 456 } 457 458 static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 459 { 460 struct dmirror_bounce bounce; 461 unsigned long start, end; 462 unsigned long size = cmd->npages << PAGE_SHIFT; 463 int ret; 464 465 start = cmd->addr; 466 end = start + size; 467 if (end < start) 468 return -EINVAL; 469 470 ret = dmirror_bounce_init(&bounce, start, size); 471 if (ret) 472 return ret; 473 if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), 474 bounce.size)) { 475 ret = -EFAULT; 476 goto fini; 477 } 478 479 while (1) { 480 mutex_lock(&dmirror->mutex); 481 ret = dmirror_do_write(dmirror, start, end, &bounce); 482 mutex_unlock(&dmirror->mutex); 483 if (ret != -ENOENT) 484 break; 485 486 start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 487 ret = dmirror_fault(dmirror, start, end, true); 488 if (ret) 489 break; 490 cmd->faults++; 491 } 492 493 fini: 494 cmd->cpages = bounce.cpages; 495 dmirror_bounce_fini(&bounce); 496 return ret; 497 } 498 499 static int dmirror_allocate_chunk(struct dmirror_device *mdevice, 500 struct page **ppage) 501 { 502 struct dmirror_chunk *devmem; 503 struct resource *res = NULL; 504 unsigned long pfn; 505 unsigned long pfn_first; 506 unsigned long pfn_last; 507 void *ptr; 508 int ret = -ENOMEM; 509 510 devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); 511 if (!devmem) 512 return ret; 513 514 switch (mdevice->zone_device_type) { 515 case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: 516 res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, 517 "hmm_dmirror"); 518 if (IS_ERR_OR_NULL(res)) 519 goto err_devmem; 520 devmem->pagemap.range.start = res->start; 521 devmem->pagemap.range.end = res->end; 522 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 523 break; 524 case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: 525 devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? 526 spm_addr_dev0 : 527 spm_addr_dev1; 528 devmem->pagemap.range.end = devmem->pagemap.range.start + 529 DEVMEM_CHUNK_SIZE - 1; 530 devmem->pagemap.type = MEMORY_DEVICE_COHERENT; 531 break; 532 default: 533 ret = -EINVAL; 534 goto err_devmem; 535 } 536 537 devmem->pagemap.nr_range = 1; 538 devmem->pagemap.ops = &dmirror_devmem_ops; 539 devmem->pagemap.owner = mdevice; 540 541 mutex_lock(&mdevice->devmem_lock); 542 543 if (mdevice->devmem_count == mdevice->devmem_capacity) { 544 struct dmirror_chunk **new_chunks; 545 unsigned int new_capacity; 546 547 new_capacity = mdevice->devmem_capacity + 548 DEVMEM_CHUNKS_RESERVE; 549 new_chunks = krealloc(mdevice->devmem_chunks, 550 sizeof(new_chunks[0]) * new_capacity, 551 GFP_KERNEL); 552 if (!new_chunks) 553 goto err_release; 554 mdevice->devmem_capacity = new_capacity; 555 mdevice->devmem_chunks = new_chunks; 556 } 557 ptr = memremap_pages(&devmem->pagemap, numa_node_id()); 558 if (IS_ERR_OR_NULL(ptr)) { 559 if (ptr) 560 ret = PTR_ERR(ptr); 561 else 562 ret = -EFAULT; 563 goto err_release; 564 } 565 566 devmem->mdevice = mdevice; 567 pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; 568 pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT); 569 mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; 570 571 mutex_unlock(&mdevice->devmem_lock); 572 573 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", 574 DEVMEM_CHUNK_SIZE / (1024 * 1024), 575 mdevice->devmem_count, 576 mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), 577 pfn_first, pfn_last); 578 579 spin_lock(&mdevice->lock); 580 for (pfn = pfn_first; pfn < pfn_last; pfn++) { 581 struct page *page = pfn_to_page(pfn); 582 583 page->zone_device_data = mdevice->free_pages; 584 mdevice->free_pages = page; 585 } 586 if (ppage) { 587 *ppage = mdevice->free_pages; 588 mdevice->free_pages = (*ppage)->zone_device_data; 589 mdevice->calloc++; 590 } 591 spin_unlock(&mdevice->lock); 592 593 return 0; 594 595 err_release: 596 mutex_unlock(&mdevice->devmem_lock); 597 if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 598 release_mem_region(devmem->pagemap.range.start, 599 range_len(&devmem->pagemap.range)); 600 err_devmem: 601 kfree(devmem); 602 603 return ret; 604 } 605 606 static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) 607 { 608 struct page *dpage = NULL; 609 struct page *rpage = NULL; 610 611 /* 612 * For ZONE_DEVICE private type, this is a fake device so we allocate 613 * real system memory to store our device memory. 614 * For ZONE_DEVICE coherent type we use the actual dpage to store the 615 * data and ignore rpage. 616 */ 617 if (dmirror_is_private_zone(mdevice)) { 618 rpage = alloc_page(GFP_HIGHUSER); 619 if (!rpage) 620 return NULL; 621 } 622 spin_lock(&mdevice->lock); 623 624 if (mdevice->free_pages) { 625 dpage = mdevice->free_pages; 626 mdevice->free_pages = dpage->zone_device_data; 627 mdevice->calloc++; 628 spin_unlock(&mdevice->lock); 629 } else { 630 spin_unlock(&mdevice->lock); 631 if (dmirror_allocate_chunk(mdevice, &dpage)) 632 goto error; 633 } 634 635 zone_device_page_init(dpage); 636 dpage->zone_device_data = rpage; 637 return dpage; 638 639 error: 640 if (rpage) 641 __free_page(rpage); 642 return NULL; 643 } 644 645 static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, 646 struct dmirror *dmirror) 647 { 648 struct dmirror_device *mdevice = dmirror->mdevice; 649 const unsigned long *src = args->src; 650 unsigned long *dst = args->dst; 651 unsigned long addr; 652 653 for (addr = args->start; addr < args->end; addr += PAGE_SIZE, 654 src++, dst++) { 655 struct page *spage; 656 struct page *dpage; 657 struct page *rpage; 658 659 if (!(*src & MIGRATE_PFN_MIGRATE)) 660 continue; 661 662 /* 663 * Note that spage might be NULL which is OK since it is an 664 * unallocated pte_none() or read-only zero page. 665 */ 666 spage = migrate_pfn_to_page(*src); 667 if (WARN(spage && is_zone_device_page(spage), 668 "page already in device spage pfn: 0x%lx\n", 669 page_to_pfn(spage))) 670 continue; 671 672 dpage = dmirror_devmem_alloc_page(mdevice); 673 if (!dpage) 674 continue; 675 676 rpage = BACKING_PAGE(dpage); 677 if (spage) 678 copy_highpage(rpage, spage); 679 else 680 clear_highpage(rpage); 681 682 /* 683 * Normally, a device would use the page->zone_device_data to 684 * point to the mirror but here we use it to hold the page for 685 * the simulated device memory and that page holds the pointer 686 * to the mirror. 687 */ 688 rpage->zone_device_data = dmirror; 689 690 pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", 691 page_to_pfn(spage), page_to_pfn(dpage)); 692 *dst = migrate_pfn(page_to_pfn(dpage)); 693 if ((*src & MIGRATE_PFN_WRITE) || 694 (!spage && args->vma->vm_flags & VM_WRITE)) 695 *dst |= MIGRATE_PFN_WRITE; 696 } 697 } 698 699 static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, 700 unsigned long end) 701 { 702 unsigned long pfn; 703 704 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 705 void *entry; 706 707 entry = xa_load(&dmirror->pt, pfn); 708 if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) 709 return -EPERM; 710 } 711 712 return 0; 713 } 714 715 static int dmirror_atomic_map(unsigned long start, unsigned long end, 716 struct page **pages, struct dmirror *dmirror) 717 { 718 unsigned long pfn, mapped = 0; 719 int i; 720 721 /* Map the migrated pages into the device's page tables. */ 722 mutex_lock(&dmirror->mutex); 723 724 for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { 725 void *entry; 726 727 if (!pages[i]) 728 continue; 729 730 entry = pages[i]; 731 entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); 732 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 733 if (xa_is_err(entry)) { 734 mutex_unlock(&dmirror->mutex); 735 return xa_err(entry); 736 } 737 738 mapped++; 739 } 740 741 mutex_unlock(&dmirror->mutex); 742 return mapped; 743 } 744 745 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 746 struct dmirror *dmirror) 747 { 748 unsigned long start = args->start; 749 unsigned long end = args->end; 750 const unsigned long *src = args->src; 751 const unsigned long *dst = args->dst; 752 unsigned long pfn; 753 754 /* Map the migrated pages into the device's page tables. */ 755 mutex_lock(&dmirror->mutex); 756 757 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, 758 src++, dst++) { 759 struct page *dpage; 760 void *entry; 761 762 if (!(*src & MIGRATE_PFN_MIGRATE)) 763 continue; 764 765 dpage = migrate_pfn_to_page(*dst); 766 if (!dpage) 767 continue; 768 769 entry = BACKING_PAGE(dpage); 770 if (*dst & MIGRATE_PFN_WRITE) 771 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 772 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 773 if (xa_is_err(entry)) { 774 mutex_unlock(&dmirror->mutex); 775 return xa_err(entry); 776 } 777 } 778 779 mutex_unlock(&dmirror->mutex); 780 return 0; 781 } 782 783 static int dmirror_exclusive(struct dmirror *dmirror, 784 struct hmm_dmirror_cmd *cmd) 785 { 786 unsigned long start, end, addr; 787 unsigned long size = cmd->npages << PAGE_SHIFT; 788 struct mm_struct *mm = dmirror->notifier.mm; 789 struct page *pages[64]; 790 struct dmirror_bounce bounce; 791 unsigned long next; 792 int ret; 793 794 start = cmd->addr; 795 end = start + size; 796 if (end < start) 797 return -EINVAL; 798 799 /* Since the mm is for the mirrored process, get a reference first. */ 800 if (!mmget_not_zero(mm)) 801 return -EINVAL; 802 803 mmap_read_lock(mm); 804 for (addr = start; addr < end; addr = next) { 805 unsigned long mapped = 0; 806 int i; 807 808 if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) 809 next = end; 810 else 811 next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); 812 813 ret = make_device_exclusive_range(mm, addr, next, pages, NULL); 814 /* 815 * Do dmirror_atomic_map() iff all pages are marked for 816 * exclusive access to avoid accessing uninitialized 817 * fields of pages. 818 */ 819 if (ret == (next - addr) >> PAGE_SHIFT) 820 mapped = dmirror_atomic_map(addr, next, pages, dmirror); 821 for (i = 0; i < ret; i++) { 822 if (pages[i]) { 823 unlock_page(pages[i]); 824 put_page(pages[i]); 825 } 826 } 827 828 if (addr + (mapped << PAGE_SHIFT) < next) { 829 mmap_read_unlock(mm); 830 mmput(mm); 831 return -EBUSY; 832 } 833 } 834 mmap_read_unlock(mm); 835 mmput(mm); 836 837 /* Return the migrated data for verification. */ 838 ret = dmirror_bounce_init(&bounce, start, size); 839 if (ret) 840 return ret; 841 mutex_lock(&dmirror->mutex); 842 ret = dmirror_do_read(dmirror, start, end, &bounce); 843 mutex_unlock(&dmirror->mutex); 844 if (ret == 0) { 845 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 846 bounce.size)) 847 ret = -EFAULT; 848 } 849 850 cmd->cpages = bounce.cpages; 851 dmirror_bounce_fini(&bounce); 852 return ret; 853 } 854 855 static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, 856 struct dmirror *dmirror) 857 { 858 const unsigned long *src = args->src; 859 unsigned long *dst = args->dst; 860 unsigned long start = args->start; 861 unsigned long end = args->end; 862 unsigned long addr; 863 864 for (addr = start; addr < end; addr += PAGE_SIZE, 865 src++, dst++) { 866 struct page *dpage, *spage; 867 868 spage = migrate_pfn_to_page(*src); 869 if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) 870 continue; 871 872 if (WARN_ON(!is_device_private_page(spage) && 873 !is_device_coherent_page(spage))) 874 continue; 875 spage = BACKING_PAGE(spage); 876 dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); 877 if (!dpage) 878 continue; 879 pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", 880 page_to_pfn(spage), page_to_pfn(dpage)); 881 882 lock_page(dpage); 883 xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); 884 copy_highpage(dpage, spage); 885 *dst = migrate_pfn(page_to_pfn(dpage)); 886 if (*src & MIGRATE_PFN_WRITE) 887 *dst |= MIGRATE_PFN_WRITE; 888 } 889 return 0; 890 } 891 892 static unsigned long 893 dmirror_successful_migrated_pages(struct migrate_vma *migrate) 894 { 895 unsigned long cpages = 0; 896 unsigned long i; 897 898 for (i = 0; i < migrate->npages; i++) { 899 if (migrate->src[i] & MIGRATE_PFN_VALID && 900 migrate->src[i] & MIGRATE_PFN_MIGRATE) 901 cpages++; 902 } 903 return cpages; 904 } 905 906 static int dmirror_migrate_to_system(struct dmirror *dmirror, 907 struct hmm_dmirror_cmd *cmd) 908 { 909 unsigned long start, end, addr; 910 unsigned long size = cmd->npages << PAGE_SHIFT; 911 struct mm_struct *mm = dmirror->notifier.mm; 912 struct vm_area_struct *vma; 913 unsigned long src_pfns[64] = { 0 }; 914 unsigned long dst_pfns[64] = { 0 }; 915 struct migrate_vma args = { 0 }; 916 unsigned long next; 917 int ret; 918 919 start = cmd->addr; 920 end = start + size; 921 if (end < start) 922 return -EINVAL; 923 924 /* Since the mm is for the mirrored process, get a reference first. */ 925 if (!mmget_not_zero(mm)) 926 return -EINVAL; 927 928 cmd->cpages = 0; 929 mmap_read_lock(mm); 930 for (addr = start; addr < end; addr = next) { 931 vma = vma_lookup(mm, addr); 932 if (!vma || !(vma->vm_flags & VM_READ)) { 933 ret = -EINVAL; 934 goto out; 935 } 936 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 937 if (next > vma->vm_end) 938 next = vma->vm_end; 939 940 args.vma = vma; 941 args.src = src_pfns; 942 args.dst = dst_pfns; 943 args.start = addr; 944 args.end = next; 945 args.pgmap_owner = dmirror->mdevice; 946 args.flags = dmirror_select_device(dmirror); 947 948 ret = migrate_vma_setup(&args); 949 if (ret) 950 goto out; 951 952 pr_debug("Migrating from device mem to sys mem\n"); 953 dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 954 955 migrate_vma_pages(&args); 956 cmd->cpages += dmirror_successful_migrated_pages(&args); 957 migrate_vma_finalize(&args); 958 } 959 out: 960 mmap_read_unlock(mm); 961 mmput(mm); 962 963 return ret; 964 } 965 966 static int dmirror_migrate_to_device(struct dmirror *dmirror, 967 struct hmm_dmirror_cmd *cmd) 968 { 969 unsigned long start, end, addr; 970 unsigned long size = cmd->npages << PAGE_SHIFT; 971 struct mm_struct *mm = dmirror->notifier.mm; 972 struct vm_area_struct *vma; 973 unsigned long src_pfns[64] = { 0 }; 974 unsigned long dst_pfns[64] = { 0 }; 975 struct dmirror_bounce bounce; 976 struct migrate_vma args = { 0 }; 977 unsigned long next; 978 int ret; 979 980 start = cmd->addr; 981 end = start + size; 982 if (end < start) 983 return -EINVAL; 984 985 /* Since the mm is for the mirrored process, get a reference first. */ 986 if (!mmget_not_zero(mm)) 987 return -EINVAL; 988 989 mmap_read_lock(mm); 990 for (addr = start; addr < end; addr = next) { 991 vma = vma_lookup(mm, addr); 992 if (!vma || !(vma->vm_flags & VM_READ)) { 993 ret = -EINVAL; 994 goto out; 995 } 996 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 997 if (next > vma->vm_end) 998 next = vma->vm_end; 999 1000 args.vma = vma; 1001 args.src = src_pfns; 1002 args.dst = dst_pfns; 1003 args.start = addr; 1004 args.end = next; 1005 args.pgmap_owner = dmirror->mdevice; 1006 args.flags = MIGRATE_VMA_SELECT_SYSTEM; 1007 ret = migrate_vma_setup(&args); 1008 if (ret) 1009 goto out; 1010 1011 pr_debug("Migrating from sys mem to device mem\n"); 1012 dmirror_migrate_alloc_and_copy(&args, dmirror); 1013 migrate_vma_pages(&args); 1014 dmirror_migrate_finalize_and_map(&args, dmirror); 1015 migrate_vma_finalize(&args); 1016 } 1017 mmap_read_unlock(mm); 1018 mmput(mm); 1019 1020 /* 1021 * Return the migrated data for verification. 1022 * Only for pages in device zone 1023 */ 1024 ret = dmirror_bounce_init(&bounce, start, size); 1025 if (ret) 1026 return ret; 1027 mutex_lock(&dmirror->mutex); 1028 ret = dmirror_do_read(dmirror, start, end, &bounce); 1029 mutex_unlock(&dmirror->mutex); 1030 if (ret == 0) { 1031 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 1032 bounce.size)) 1033 ret = -EFAULT; 1034 } 1035 cmd->cpages = bounce.cpages; 1036 dmirror_bounce_fini(&bounce); 1037 return ret; 1038 1039 out: 1040 mmap_read_unlock(mm); 1041 mmput(mm); 1042 return ret; 1043 } 1044 1045 static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, 1046 unsigned char *perm, unsigned long entry) 1047 { 1048 struct page *page; 1049 1050 if (entry & HMM_PFN_ERROR) { 1051 *perm = HMM_DMIRROR_PROT_ERROR; 1052 return; 1053 } 1054 if (!(entry & HMM_PFN_VALID)) { 1055 *perm = HMM_DMIRROR_PROT_NONE; 1056 return; 1057 } 1058 1059 page = hmm_pfn_to_page(entry); 1060 if (is_device_private_page(page)) { 1061 /* Is the page migrated to this device or some other? */ 1062 if (dmirror->mdevice == dmirror_page_to_device(page)) 1063 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; 1064 else 1065 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; 1066 } else if (is_device_coherent_page(page)) { 1067 /* Is the page migrated to this device or some other? */ 1068 if (dmirror->mdevice == dmirror_page_to_device(page)) 1069 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL; 1070 else 1071 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE; 1072 } else if (is_zero_pfn(page_to_pfn(page))) 1073 *perm = HMM_DMIRROR_PROT_ZERO; 1074 else 1075 *perm = HMM_DMIRROR_PROT_NONE; 1076 if (entry & HMM_PFN_WRITE) 1077 *perm |= HMM_DMIRROR_PROT_WRITE; 1078 else 1079 *perm |= HMM_DMIRROR_PROT_READ; 1080 if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT) 1081 *perm |= HMM_DMIRROR_PROT_PMD; 1082 else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT) 1083 *perm |= HMM_DMIRROR_PROT_PUD; 1084 } 1085 1086 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, 1087 const struct mmu_notifier_range *range, 1088 unsigned long cur_seq) 1089 { 1090 struct dmirror_interval *dmi = 1091 container_of(mni, struct dmirror_interval, notifier); 1092 struct dmirror *dmirror = dmi->dmirror; 1093 1094 if (mmu_notifier_range_blockable(range)) 1095 mutex_lock(&dmirror->mutex); 1096 else if (!mutex_trylock(&dmirror->mutex)) 1097 return false; 1098 1099 /* 1100 * Snapshots only need to set the sequence number since any 1101 * invalidation in the interval invalidates the whole snapshot. 1102 */ 1103 mmu_interval_set_seq(mni, cur_seq); 1104 1105 mutex_unlock(&dmirror->mutex); 1106 return true; 1107 } 1108 1109 static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { 1110 .invalidate = dmirror_snapshot_invalidate, 1111 }; 1112 1113 static int dmirror_range_snapshot(struct dmirror *dmirror, 1114 struct hmm_range *range, 1115 unsigned char *perm) 1116 { 1117 struct mm_struct *mm = dmirror->notifier.mm; 1118 struct dmirror_interval notifier; 1119 unsigned long timeout = 1120 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 1121 unsigned long i; 1122 unsigned long n; 1123 int ret = 0; 1124 1125 notifier.dmirror = dmirror; 1126 range->notifier = ¬ifier.notifier; 1127 1128 ret = mmu_interval_notifier_insert(range->notifier, mm, 1129 range->start, range->end - range->start, 1130 &dmirror_mrn_ops); 1131 if (ret) 1132 return ret; 1133 1134 while (true) { 1135 if (time_after(jiffies, timeout)) { 1136 ret = -EBUSY; 1137 goto out; 1138 } 1139 1140 range->notifier_seq = mmu_interval_read_begin(range->notifier); 1141 1142 mmap_read_lock(mm); 1143 ret = hmm_range_fault(range); 1144 mmap_read_unlock(mm); 1145 if (ret) { 1146 if (ret == -EBUSY) 1147 continue; 1148 goto out; 1149 } 1150 1151 mutex_lock(&dmirror->mutex); 1152 if (mmu_interval_read_retry(range->notifier, 1153 range->notifier_seq)) { 1154 mutex_unlock(&dmirror->mutex); 1155 continue; 1156 } 1157 break; 1158 } 1159 1160 n = (range->end - range->start) >> PAGE_SHIFT; 1161 for (i = 0; i < n; i++) 1162 dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); 1163 1164 mutex_unlock(&dmirror->mutex); 1165 out: 1166 mmu_interval_notifier_remove(range->notifier); 1167 return ret; 1168 } 1169 1170 static int dmirror_snapshot(struct dmirror *dmirror, 1171 struct hmm_dmirror_cmd *cmd) 1172 { 1173 struct mm_struct *mm = dmirror->notifier.mm; 1174 unsigned long start, end; 1175 unsigned long size = cmd->npages << PAGE_SHIFT; 1176 unsigned long addr; 1177 unsigned long next; 1178 unsigned long pfns[64]; 1179 unsigned char perm[64]; 1180 char __user *uptr; 1181 struct hmm_range range = { 1182 .hmm_pfns = pfns, 1183 .dev_private_owner = dmirror->mdevice, 1184 }; 1185 int ret = 0; 1186 1187 start = cmd->addr; 1188 end = start + size; 1189 if (end < start) 1190 return -EINVAL; 1191 1192 /* Since the mm is for the mirrored process, get a reference first. */ 1193 if (!mmget_not_zero(mm)) 1194 return -EINVAL; 1195 1196 /* 1197 * Register a temporary notifier to detect invalidations even if it 1198 * overlaps with other mmu_interval_notifiers. 1199 */ 1200 uptr = u64_to_user_ptr(cmd->ptr); 1201 for (addr = start; addr < end; addr = next) { 1202 unsigned long n; 1203 1204 next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 1205 range.start = addr; 1206 range.end = next; 1207 1208 ret = dmirror_range_snapshot(dmirror, &range, perm); 1209 if (ret) 1210 break; 1211 1212 n = (range.end - range.start) >> PAGE_SHIFT; 1213 if (copy_to_user(uptr, perm, n)) { 1214 ret = -EFAULT; 1215 break; 1216 } 1217 1218 cmd->cpages += n; 1219 uptr += n; 1220 } 1221 mmput(mm); 1222 1223 return ret; 1224 } 1225 1226 static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk) 1227 { 1228 unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT; 1229 unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT; 1230 unsigned long npages = end_pfn - start_pfn + 1; 1231 unsigned long i; 1232 unsigned long *src_pfns; 1233 unsigned long *dst_pfns; 1234 1235 src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL); 1236 dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL); 1237 1238 migrate_device_range(src_pfns, start_pfn, npages); 1239 for (i = 0; i < npages; i++) { 1240 struct page *dpage, *spage; 1241 1242 spage = migrate_pfn_to_page(src_pfns[i]); 1243 if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) 1244 continue; 1245 1246 if (WARN_ON(!is_device_private_page(spage) && 1247 !is_device_coherent_page(spage))) 1248 continue; 1249 spage = BACKING_PAGE(spage); 1250 dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL); 1251 lock_page(dpage); 1252 copy_highpage(dpage, spage); 1253 dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)); 1254 if (src_pfns[i] & MIGRATE_PFN_WRITE) 1255 dst_pfns[i] |= MIGRATE_PFN_WRITE; 1256 } 1257 migrate_device_pages(src_pfns, dst_pfns, npages); 1258 migrate_device_finalize(src_pfns, dst_pfns, npages); 1259 kfree(src_pfns); 1260 kfree(dst_pfns); 1261 } 1262 1263 /* Removes free pages from the free list so they can't be re-allocated */ 1264 static void dmirror_remove_free_pages(struct dmirror_chunk *devmem) 1265 { 1266 struct dmirror_device *mdevice = devmem->mdevice; 1267 struct page *page; 1268 1269 for (page = mdevice->free_pages; page; page = page->zone_device_data) 1270 if (dmirror_page_to_chunk(page) == devmem) 1271 mdevice->free_pages = page->zone_device_data; 1272 } 1273 1274 static void dmirror_device_remove_chunks(struct dmirror_device *mdevice) 1275 { 1276 unsigned int i; 1277 1278 mutex_lock(&mdevice->devmem_lock); 1279 if (mdevice->devmem_chunks) { 1280 for (i = 0; i < mdevice->devmem_count; i++) { 1281 struct dmirror_chunk *devmem = 1282 mdevice->devmem_chunks[i]; 1283 1284 spin_lock(&mdevice->lock); 1285 devmem->remove = true; 1286 dmirror_remove_free_pages(devmem); 1287 spin_unlock(&mdevice->lock); 1288 1289 dmirror_device_evict_chunk(devmem); 1290 memunmap_pages(&devmem->pagemap); 1291 if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) 1292 release_mem_region(devmem->pagemap.range.start, 1293 range_len(&devmem->pagemap.range)); 1294 kfree(devmem); 1295 } 1296 mdevice->devmem_count = 0; 1297 mdevice->devmem_capacity = 0; 1298 mdevice->free_pages = NULL; 1299 kfree(mdevice->devmem_chunks); 1300 mdevice->devmem_chunks = NULL; 1301 } 1302 mutex_unlock(&mdevice->devmem_lock); 1303 } 1304 1305 static long dmirror_fops_unlocked_ioctl(struct file *filp, 1306 unsigned int command, 1307 unsigned long arg) 1308 { 1309 void __user *uarg = (void __user *)arg; 1310 struct hmm_dmirror_cmd cmd; 1311 struct dmirror *dmirror; 1312 int ret; 1313 1314 dmirror = filp->private_data; 1315 if (!dmirror) 1316 return -EINVAL; 1317 1318 if (copy_from_user(&cmd, uarg, sizeof(cmd))) 1319 return -EFAULT; 1320 1321 if (cmd.addr & ~PAGE_MASK) 1322 return -EINVAL; 1323 if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) 1324 return -EINVAL; 1325 1326 cmd.cpages = 0; 1327 cmd.faults = 0; 1328 1329 switch (command) { 1330 case HMM_DMIRROR_READ: 1331 ret = dmirror_read(dmirror, &cmd); 1332 break; 1333 1334 case HMM_DMIRROR_WRITE: 1335 ret = dmirror_write(dmirror, &cmd); 1336 break; 1337 1338 case HMM_DMIRROR_MIGRATE_TO_DEV: 1339 ret = dmirror_migrate_to_device(dmirror, &cmd); 1340 break; 1341 1342 case HMM_DMIRROR_MIGRATE_TO_SYS: 1343 ret = dmirror_migrate_to_system(dmirror, &cmd); 1344 break; 1345 1346 case HMM_DMIRROR_EXCLUSIVE: 1347 ret = dmirror_exclusive(dmirror, &cmd); 1348 break; 1349 1350 case HMM_DMIRROR_CHECK_EXCLUSIVE: 1351 ret = dmirror_check_atomic(dmirror, cmd.addr, 1352 cmd.addr + (cmd.npages << PAGE_SHIFT)); 1353 break; 1354 1355 case HMM_DMIRROR_SNAPSHOT: 1356 ret = dmirror_snapshot(dmirror, &cmd); 1357 break; 1358 1359 case HMM_DMIRROR_RELEASE: 1360 dmirror_device_remove_chunks(dmirror->mdevice); 1361 ret = 0; 1362 break; 1363 1364 default: 1365 return -EINVAL; 1366 } 1367 if (ret) 1368 return ret; 1369 1370 if (copy_to_user(uarg, &cmd, sizeof(cmd))) 1371 return -EFAULT; 1372 1373 return 0; 1374 } 1375 1376 static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma) 1377 { 1378 unsigned long addr; 1379 1380 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1381 struct page *page; 1382 int ret; 1383 1384 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1385 if (!page) 1386 return -ENOMEM; 1387 1388 ret = vm_insert_page(vma, addr, page); 1389 if (ret) { 1390 __free_page(page); 1391 return ret; 1392 } 1393 put_page(page); 1394 } 1395 1396 return 0; 1397 } 1398 1399 static const struct file_operations dmirror_fops = { 1400 .open = dmirror_fops_open, 1401 .release = dmirror_fops_release, 1402 .mmap = dmirror_fops_mmap, 1403 .unlocked_ioctl = dmirror_fops_unlocked_ioctl, 1404 .llseek = default_llseek, 1405 .owner = THIS_MODULE, 1406 }; 1407 1408 static void dmirror_devmem_free(struct page *page) 1409 { 1410 struct page *rpage = BACKING_PAGE(page); 1411 struct dmirror_device *mdevice; 1412 1413 if (rpage != page) 1414 __free_page(rpage); 1415 1416 mdevice = dmirror_page_to_device(page); 1417 spin_lock(&mdevice->lock); 1418 1419 /* Return page to our allocator if not freeing the chunk */ 1420 if (!dmirror_page_to_chunk(page)->remove) { 1421 mdevice->cfree++; 1422 page->zone_device_data = mdevice->free_pages; 1423 mdevice->free_pages = page; 1424 } 1425 spin_unlock(&mdevice->lock); 1426 } 1427 1428 static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) 1429 { 1430 struct migrate_vma args = { 0 }; 1431 unsigned long src_pfns = 0; 1432 unsigned long dst_pfns = 0; 1433 struct page *rpage; 1434 struct dmirror *dmirror; 1435 vm_fault_t ret; 1436 1437 /* 1438 * Normally, a device would use the page->zone_device_data to point to 1439 * the mirror but here we use it to hold the page for the simulated 1440 * device memory and that page holds the pointer to the mirror. 1441 */ 1442 rpage = vmf->page->zone_device_data; 1443 dmirror = rpage->zone_device_data; 1444 1445 /* FIXME demonstrate how we can adjust migrate range */ 1446 args.vma = vmf->vma; 1447 args.start = vmf->address; 1448 args.end = args.start + PAGE_SIZE; 1449 args.src = &src_pfns; 1450 args.dst = &dst_pfns; 1451 args.pgmap_owner = dmirror->mdevice; 1452 args.flags = dmirror_select_device(dmirror); 1453 args.fault_page = vmf->page; 1454 1455 if (migrate_vma_setup(&args)) 1456 return VM_FAULT_SIGBUS; 1457 1458 ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 1459 if (ret) 1460 return ret; 1461 migrate_vma_pages(&args); 1462 /* 1463 * No device finalize step is needed since 1464 * dmirror_devmem_fault_alloc_and_copy() will have already 1465 * invalidated the device page table. 1466 */ 1467 migrate_vma_finalize(&args); 1468 return 0; 1469 } 1470 1471 static const struct dev_pagemap_ops dmirror_devmem_ops = { 1472 .page_free = dmirror_devmem_free, 1473 .migrate_to_ram = dmirror_devmem_fault, 1474 }; 1475 1476 static int dmirror_device_init(struct dmirror_device *mdevice, int id) 1477 { 1478 dev_t dev; 1479 int ret; 1480 1481 dev = MKDEV(MAJOR(dmirror_dev), id); 1482 mutex_init(&mdevice->devmem_lock); 1483 spin_lock_init(&mdevice->lock); 1484 1485 cdev_init(&mdevice->cdevice, &dmirror_fops); 1486 mdevice->cdevice.owner = THIS_MODULE; 1487 device_initialize(&mdevice->device); 1488 mdevice->device.devt = dev; 1489 1490 ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id); 1491 if (ret) 1492 return ret; 1493 1494 ret = cdev_device_add(&mdevice->cdevice, &mdevice->device); 1495 if (ret) 1496 return ret; 1497 1498 /* Build a list of free ZONE_DEVICE struct pages */ 1499 return dmirror_allocate_chunk(mdevice, NULL); 1500 } 1501 1502 static void dmirror_device_remove(struct dmirror_device *mdevice) 1503 { 1504 dmirror_device_remove_chunks(mdevice); 1505 cdev_device_del(&mdevice->cdevice, &mdevice->device); 1506 } 1507 1508 static int __init hmm_dmirror_init(void) 1509 { 1510 int ret; 1511 int id = 0; 1512 int ndevices = 0; 1513 1514 ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, 1515 "HMM_DMIRROR"); 1516 if (ret) 1517 goto err_unreg; 1518 1519 memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); 1520 dmirror_devices[ndevices++].zone_device_type = 1521 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1522 dmirror_devices[ndevices++].zone_device_type = 1523 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; 1524 if (spm_addr_dev0 && spm_addr_dev1) { 1525 dmirror_devices[ndevices++].zone_device_type = 1526 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1527 dmirror_devices[ndevices++].zone_device_type = 1528 HMM_DMIRROR_MEMORY_DEVICE_COHERENT; 1529 } 1530 for (id = 0; id < ndevices; id++) { 1531 ret = dmirror_device_init(dmirror_devices + id, id); 1532 if (ret) 1533 goto err_chrdev; 1534 } 1535 1536 pr_info("HMM test module loaded. This is only for testing HMM.\n"); 1537 return 0; 1538 1539 err_chrdev: 1540 while (--id >= 0) 1541 dmirror_device_remove(dmirror_devices + id); 1542 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1543 err_unreg: 1544 return ret; 1545 } 1546 1547 static void __exit hmm_dmirror_exit(void) 1548 { 1549 int id; 1550 1551 for (id = 0; id < DMIRROR_NDEVICES; id++) 1552 if (dmirror_devices[id].zone_device_type) 1553 dmirror_device_remove(dmirror_devices + id); 1554 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1555 } 1556 1557 module_init(hmm_dmirror_init); 1558 module_exit(hmm_dmirror_exit); 1559 MODULE_LICENSE("GPL"); 1560