1b2ef9f5aSRalph Campbell // SPDX-License-Identifier: GPL-2.0 2b2ef9f5aSRalph Campbell /* 3b2ef9f5aSRalph Campbell * This is a module to test the HMM (Heterogeneous Memory Management) 4b2ef9f5aSRalph Campbell * mirror and zone device private memory migration APIs of the kernel. 5b2ef9f5aSRalph Campbell * Userspace programs can register with the driver to mirror their own address 6b2ef9f5aSRalph Campbell * space and can use the device to read/write any valid virtual address. 7b2ef9f5aSRalph Campbell */ 8b2ef9f5aSRalph Campbell #include <linux/init.h> 9b2ef9f5aSRalph Campbell #include <linux/fs.h> 10b2ef9f5aSRalph Campbell #include <linux/mm.h> 11b2ef9f5aSRalph Campbell #include <linux/module.h> 12b2ef9f5aSRalph Campbell #include <linux/kernel.h> 13b2ef9f5aSRalph Campbell #include <linux/cdev.h> 14b2ef9f5aSRalph Campbell #include <linux/device.h> 15dc90f084SChristoph Hellwig #include <linux/memremap.h> 16b2ef9f5aSRalph Campbell #include <linux/mutex.h> 17b2ef9f5aSRalph Campbell #include <linux/rwsem.h> 18b2ef9f5aSRalph Campbell #include <linux/sched.h> 19b2ef9f5aSRalph Campbell #include <linux/slab.h> 20b2ef9f5aSRalph Campbell #include <linux/highmem.h> 21b2ef9f5aSRalph Campbell #include <linux/delay.h> 22b2ef9f5aSRalph Campbell #include <linux/pagemap.h> 23b2ef9f5aSRalph Campbell #include <linux/hmm.h> 24b2ef9f5aSRalph Campbell #include <linux/vmalloc.h> 25b2ef9f5aSRalph Campbell #include <linux/swap.h> 26b2ef9f5aSRalph Campbell #include <linux/swapops.h> 27b2ef9f5aSRalph Campbell #include <linux/sched/mm.h> 28b2ef9f5aSRalph Campbell #include <linux/platform_device.h> 29b659baeaSAlistair Popple #include <linux/rmap.h> 30730ff521SChristoph Hellwig #include <linux/mmu_notifier.h> 31730ff521SChristoph Hellwig #include <linux/migrate.h> 32b2ef9f5aSRalph Campbell 33b2ef9f5aSRalph Campbell #include "test_hmm_uapi.h" 34b2ef9f5aSRalph Campbell 35b2ef9f5aSRalph Campbell #define DMIRROR_NDEVICES 2 36b2ef9f5aSRalph Campbell #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 37b2ef9f5aSRalph Campbell #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) 38b2ef9f5aSRalph Campbell #define DEVMEM_CHUNKS_RESERVE 16 39b2ef9f5aSRalph Campbell 40b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops; 41b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops; 42b2ef9f5aSRalph Campbell static dev_t dmirror_dev; 43b2ef9f5aSRalph Campbell 44b2ef9f5aSRalph Campbell struct dmirror_device; 45b2ef9f5aSRalph Campbell 46b2ef9f5aSRalph Campbell struct dmirror_bounce { 47b2ef9f5aSRalph Campbell void *ptr; 48b2ef9f5aSRalph Campbell unsigned long size; 49b2ef9f5aSRalph Campbell unsigned long addr; 50b2ef9f5aSRalph Campbell unsigned long cpages; 51b2ef9f5aSRalph Campbell }; 52b2ef9f5aSRalph Campbell 53b659baeaSAlistair Popple #define DPT_XA_TAG_ATOMIC 1UL 54b2ef9f5aSRalph Campbell #define DPT_XA_TAG_WRITE 3UL 55b2ef9f5aSRalph Campbell 56b2ef9f5aSRalph Campbell /* 57b2ef9f5aSRalph Campbell * Data structure to track address ranges and register for mmu interval 58b2ef9f5aSRalph Campbell * notifier updates. 59b2ef9f5aSRalph Campbell */ 60b2ef9f5aSRalph Campbell struct dmirror_interval { 61b2ef9f5aSRalph Campbell struct mmu_interval_notifier notifier; 62b2ef9f5aSRalph Campbell struct dmirror *dmirror; 63b2ef9f5aSRalph Campbell }; 64b2ef9f5aSRalph Campbell 65b2ef9f5aSRalph Campbell /* 66b2ef9f5aSRalph Campbell * Data attached to the open device file. 67b2ef9f5aSRalph Campbell * Note that it might be shared after a fork(). 68b2ef9f5aSRalph Campbell */ 69b2ef9f5aSRalph Campbell struct dmirror { 70b2ef9f5aSRalph Campbell struct dmirror_device *mdevice; 71b2ef9f5aSRalph Campbell struct xarray pt; 72b2ef9f5aSRalph Campbell struct mmu_interval_notifier notifier; 73b2ef9f5aSRalph Campbell struct mutex mutex; 74b2ef9f5aSRalph Campbell }; 75b2ef9f5aSRalph Campbell 76b2ef9f5aSRalph Campbell /* 77b2ef9f5aSRalph Campbell * ZONE_DEVICE pages for migration and simulating device memory. 78b2ef9f5aSRalph Campbell */ 79b2ef9f5aSRalph Campbell struct dmirror_chunk { 80b2ef9f5aSRalph Campbell struct dev_pagemap pagemap; 81b2ef9f5aSRalph Campbell struct dmirror_device *mdevice; 82b2ef9f5aSRalph Campbell }; 83b2ef9f5aSRalph Campbell 84b2ef9f5aSRalph Campbell /* 85b2ef9f5aSRalph Campbell * Per device data. 86b2ef9f5aSRalph Campbell */ 87b2ef9f5aSRalph Campbell struct dmirror_device { 88b2ef9f5aSRalph Campbell struct cdev cdevice; 89b2ef9f5aSRalph Campbell struct hmm_devmem *devmem; 90b2ef9f5aSRalph Campbell 91b2ef9f5aSRalph Campbell unsigned int devmem_capacity; 92b2ef9f5aSRalph Campbell unsigned int devmem_count; 93b2ef9f5aSRalph Campbell struct dmirror_chunk **devmem_chunks; 94b2ef9f5aSRalph Campbell struct mutex devmem_lock; /* protects the above */ 95b2ef9f5aSRalph Campbell 96b2ef9f5aSRalph Campbell unsigned long calloc; 97b2ef9f5aSRalph Campbell unsigned long cfree; 98b2ef9f5aSRalph Campbell struct page *free_pages; 99b2ef9f5aSRalph Campbell spinlock_t lock; /* protects the above */ 100b2ef9f5aSRalph Campbell }; 101b2ef9f5aSRalph Campbell 102b2ef9f5aSRalph Campbell static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES]; 103b2ef9f5aSRalph Campbell 104b2ef9f5aSRalph Campbell static int dmirror_bounce_init(struct dmirror_bounce *bounce, 105b2ef9f5aSRalph Campbell unsigned long addr, 106b2ef9f5aSRalph Campbell unsigned long size) 107b2ef9f5aSRalph Campbell { 108b2ef9f5aSRalph Campbell bounce->addr = addr; 109b2ef9f5aSRalph Campbell bounce->size = size; 110b2ef9f5aSRalph Campbell bounce->cpages = 0; 111b2ef9f5aSRalph Campbell bounce->ptr = vmalloc(size); 112b2ef9f5aSRalph Campbell if (!bounce->ptr) 113b2ef9f5aSRalph Campbell return -ENOMEM; 114b2ef9f5aSRalph Campbell return 0; 115b2ef9f5aSRalph Campbell } 116b2ef9f5aSRalph Campbell 117b2ef9f5aSRalph Campbell static void dmirror_bounce_fini(struct dmirror_bounce *bounce) 118b2ef9f5aSRalph Campbell { 119b2ef9f5aSRalph Campbell vfree(bounce->ptr); 120b2ef9f5aSRalph Campbell } 121b2ef9f5aSRalph Campbell 122b2ef9f5aSRalph Campbell static int dmirror_fops_open(struct inode *inode, struct file *filp) 123b2ef9f5aSRalph Campbell { 124b2ef9f5aSRalph Campbell struct cdev *cdev = inode->i_cdev; 125b2ef9f5aSRalph Campbell struct dmirror *dmirror; 126b2ef9f5aSRalph Campbell int ret; 127b2ef9f5aSRalph Campbell 128b2ef9f5aSRalph Campbell /* Mirror this process address space */ 129b2ef9f5aSRalph Campbell dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL); 130b2ef9f5aSRalph Campbell if (dmirror == NULL) 131b2ef9f5aSRalph Campbell return -ENOMEM; 132b2ef9f5aSRalph Campbell 133b2ef9f5aSRalph Campbell dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice); 134b2ef9f5aSRalph Campbell mutex_init(&dmirror->mutex); 135b2ef9f5aSRalph Campbell xa_init(&dmirror->pt); 136b2ef9f5aSRalph Campbell 137b2ef9f5aSRalph Campbell ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm, 138b2ef9f5aSRalph Campbell 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops); 139b2ef9f5aSRalph Campbell if (ret) { 140b2ef9f5aSRalph Campbell kfree(dmirror); 141b2ef9f5aSRalph Campbell return ret; 142b2ef9f5aSRalph Campbell } 143b2ef9f5aSRalph Campbell 144b2ef9f5aSRalph Campbell filp->private_data = dmirror; 145b2ef9f5aSRalph Campbell return 0; 146b2ef9f5aSRalph Campbell } 147b2ef9f5aSRalph Campbell 148b2ef9f5aSRalph Campbell static int dmirror_fops_release(struct inode *inode, struct file *filp) 149b2ef9f5aSRalph Campbell { 150b2ef9f5aSRalph Campbell struct dmirror *dmirror = filp->private_data; 151b2ef9f5aSRalph Campbell 152b2ef9f5aSRalph Campbell mmu_interval_notifier_remove(&dmirror->notifier); 153b2ef9f5aSRalph Campbell xa_destroy(&dmirror->pt); 154b2ef9f5aSRalph Campbell kfree(dmirror); 155b2ef9f5aSRalph Campbell return 0; 156b2ef9f5aSRalph Campbell } 157b2ef9f5aSRalph Campbell 158b2ef9f5aSRalph Campbell static struct dmirror_device *dmirror_page_to_device(struct page *page) 159b2ef9f5aSRalph Campbell 160b2ef9f5aSRalph Campbell { 161b2ef9f5aSRalph Campbell return container_of(page->pgmap, struct dmirror_chunk, 162b2ef9f5aSRalph Campbell pagemap)->mdevice; 163b2ef9f5aSRalph Campbell } 164b2ef9f5aSRalph Campbell 165b2ef9f5aSRalph Campbell static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) 166b2ef9f5aSRalph Campbell { 167b2ef9f5aSRalph Campbell unsigned long *pfns = range->hmm_pfns; 168b2ef9f5aSRalph Campbell unsigned long pfn; 169b2ef9f5aSRalph Campbell 170b2ef9f5aSRalph Campbell for (pfn = (range->start >> PAGE_SHIFT); 171b2ef9f5aSRalph Campbell pfn < (range->end >> PAGE_SHIFT); 172b2ef9f5aSRalph Campbell pfn++, pfns++) { 173b2ef9f5aSRalph Campbell struct page *page; 174b2ef9f5aSRalph Campbell void *entry; 175b2ef9f5aSRalph Campbell 176b2ef9f5aSRalph Campbell /* 177b2ef9f5aSRalph Campbell * Since we asked for hmm_range_fault() to populate pages, 178b2ef9f5aSRalph Campbell * it shouldn't return an error entry on success. 179b2ef9f5aSRalph Campbell */ 180b2ef9f5aSRalph Campbell WARN_ON(*pfns & HMM_PFN_ERROR); 181b2ef9f5aSRalph Campbell WARN_ON(!(*pfns & HMM_PFN_VALID)); 182b2ef9f5aSRalph Campbell 183b2ef9f5aSRalph Campbell page = hmm_pfn_to_page(*pfns); 184b2ef9f5aSRalph Campbell WARN_ON(!page); 185b2ef9f5aSRalph Campbell 186b2ef9f5aSRalph Campbell entry = page; 187b2ef9f5aSRalph Campbell if (*pfns & HMM_PFN_WRITE) 188b2ef9f5aSRalph Campbell entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 189b2ef9f5aSRalph Campbell else if (WARN_ON(range->default_flags & HMM_PFN_WRITE)) 190b2ef9f5aSRalph Campbell return -EFAULT; 191b2ef9f5aSRalph Campbell entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 192b2ef9f5aSRalph Campbell if (xa_is_err(entry)) 193b2ef9f5aSRalph Campbell return xa_err(entry); 194b2ef9f5aSRalph Campbell } 195b2ef9f5aSRalph Campbell 196b2ef9f5aSRalph Campbell return 0; 197b2ef9f5aSRalph Campbell } 198b2ef9f5aSRalph Campbell 199b2ef9f5aSRalph Campbell static void dmirror_do_update(struct dmirror *dmirror, unsigned long start, 200b2ef9f5aSRalph Campbell unsigned long end) 201b2ef9f5aSRalph Campbell { 202b2ef9f5aSRalph Campbell unsigned long pfn; 203b2ef9f5aSRalph Campbell void *entry; 204b2ef9f5aSRalph Campbell 205b2ef9f5aSRalph Campbell /* 206b2ef9f5aSRalph Campbell * The XArray doesn't hold references to pages since it relies on 207b2ef9f5aSRalph Campbell * the mmu notifier to clear page pointers when they become stale. 208b2ef9f5aSRalph Campbell * Therefore, it is OK to just clear the entry. 209b2ef9f5aSRalph Campbell */ 210b2ef9f5aSRalph Campbell xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT, 211b2ef9f5aSRalph Campbell end >> PAGE_SHIFT) 212b2ef9f5aSRalph Campbell xa_erase(&dmirror->pt, pfn); 213b2ef9f5aSRalph Campbell } 214b2ef9f5aSRalph Campbell 215b2ef9f5aSRalph Campbell static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni, 216b2ef9f5aSRalph Campbell const struct mmu_notifier_range *range, 217b2ef9f5aSRalph Campbell unsigned long cur_seq) 218b2ef9f5aSRalph Campbell { 219b2ef9f5aSRalph Campbell struct dmirror *dmirror = container_of(mni, struct dmirror, notifier); 220b2ef9f5aSRalph Campbell 2217d17e83aSRalph Campbell /* 2227d17e83aSRalph Campbell * Ignore invalidation callbacks for device private pages since 2237d17e83aSRalph Campbell * the invalidation is handled as part of the migration process. 2247d17e83aSRalph Campbell */ 2257d17e83aSRalph Campbell if (range->event == MMU_NOTIFY_MIGRATE && 2266b49bf6dSAlistair Popple range->owner == dmirror->mdevice) 2277d17e83aSRalph Campbell return true; 2287d17e83aSRalph Campbell 229b2ef9f5aSRalph Campbell if (mmu_notifier_range_blockable(range)) 230b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 231b2ef9f5aSRalph Campbell else if (!mutex_trylock(&dmirror->mutex)) 232b2ef9f5aSRalph Campbell return false; 233b2ef9f5aSRalph Campbell 234b2ef9f5aSRalph Campbell mmu_interval_set_seq(mni, cur_seq); 235b2ef9f5aSRalph Campbell dmirror_do_update(dmirror, range->start, range->end); 236b2ef9f5aSRalph Campbell 237b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 238b2ef9f5aSRalph Campbell return true; 239b2ef9f5aSRalph Campbell } 240b2ef9f5aSRalph Campbell 241b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops = { 242b2ef9f5aSRalph Campbell .invalidate = dmirror_interval_invalidate, 243b2ef9f5aSRalph Campbell }; 244b2ef9f5aSRalph Campbell 245b2ef9f5aSRalph Campbell static int dmirror_range_fault(struct dmirror *dmirror, 246b2ef9f5aSRalph Campbell struct hmm_range *range) 247b2ef9f5aSRalph Campbell { 248b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm; 249b2ef9f5aSRalph Campbell unsigned long timeout = 250b2ef9f5aSRalph Campbell jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 251b2ef9f5aSRalph Campbell int ret; 252b2ef9f5aSRalph Campbell 253b2ef9f5aSRalph Campbell while (true) { 254b2ef9f5aSRalph Campbell if (time_after(jiffies, timeout)) { 255b2ef9f5aSRalph Campbell ret = -EBUSY; 256b2ef9f5aSRalph Campbell goto out; 257b2ef9f5aSRalph Campbell } 258b2ef9f5aSRalph Campbell 259b2ef9f5aSRalph Campbell range->notifier_seq = mmu_interval_read_begin(range->notifier); 26089154dd5SMichel Lespinasse mmap_read_lock(mm); 261b2ef9f5aSRalph Campbell ret = hmm_range_fault(range); 26289154dd5SMichel Lespinasse mmap_read_unlock(mm); 263b2ef9f5aSRalph Campbell if (ret) { 264b2ef9f5aSRalph Campbell if (ret == -EBUSY) 265b2ef9f5aSRalph Campbell continue; 266b2ef9f5aSRalph Campbell goto out; 267b2ef9f5aSRalph Campbell } 268b2ef9f5aSRalph Campbell 269b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 270b2ef9f5aSRalph Campbell if (mmu_interval_read_retry(range->notifier, 271b2ef9f5aSRalph Campbell range->notifier_seq)) { 272b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 273b2ef9f5aSRalph Campbell continue; 274b2ef9f5aSRalph Campbell } 275b2ef9f5aSRalph Campbell break; 276b2ef9f5aSRalph Campbell } 277b2ef9f5aSRalph Campbell 278b2ef9f5aSRalph Campbell ret = dmirror_do_fault(dmirror, range); 279b2ef9f5aSRalph Campbell 280b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 281b2ef9f5aSRalph Campbell out: 282b2ef9f5aSRalph Campbell return ret; 283b2ef9f5aSRalph Campbell } 284b2ef9f5aSRalph Campbell 285b2ef9f5aSRalph Campbell static int dmirror_fault(struct dmirror *dmirror, unsigned long start, 286b2ef9f5aSRalph Campbell unsigned long end, bool write) 287b2ef9f5aSRalph Campbell { 288b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm; 289b2ef9f5aSRalph Campbell unsigned long addr; 290b2ef9f5aSRalph Campbell unsigned long pfns[64]; 291b2ef9f5aSRalph Campbell struct hmm_range range = { 292b2ef9f5aSRalph Campbell .notifier = &dmirror->notifier, 293b2ef9f5aSRalph Campbell .hmm_pfns = pfns, 294b2ef9f5aSRalph Campbell .pfn_flags_mask = 0, 295b2ef9f5aSRalph Campbell .default_flags = 296b2ef9f5aSRalph Campbell HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0), 297b2ef9f5aSRalph Campbell .dev_private_owner = dmirror->mdevice, 298b2ef9f5aSRalph Campbell }; 299b2ef9f5aSRalph Campbell int ret = 0; 300b2ef9f5aSRalph Campbell 301b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */ 302b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm)) 303b2ef9f5aSRalph Campbell return 0; 304b2ef9f5aSRalph Campbell 305b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = range.end) { 306b2ef9f5aSRalph Campbell range.start = addr; 307b2ef9f5aSRalph Campbell range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 308b2ef9f5aSRalph Campbell 309b2ef9f5aSRalph Campbell ret = dmirror_range_fault(dmirror, &range); 310b2ef9f5aSRalph Campbell if (ret) 311b2ef9f5aSRalph Campbell break; 312b2ef9f5aSRalph Campbell } 313b2ef9f5aSRalph Campbell 314b2ef9f5aSRalph Campbell mmput(mm); 315b2ef9f5aSRalph Campbell return ret; 316b2ef9f5aSRalph Campbell } 317b2ef9f5aSRalph Campbell 318b2ef9f5aSRalph Campbell static int dmirror_do_read(struct dmirror *dmirror, unsigned long start, 319b2ef9f5aSRalph Campbell unsigned long end, struct dmirror_bounce *bounce) 320b2ef9f5aSRalph Campbell { 321b2ef9f5aSRalph Campbell unsigned long pfn; 322b2ef9f5aSRalph Campbell void *ptr; 323b2ef9f5aSRalph Campbell 324b2ef9f5aSRalph Campbell ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 325b2ef9f5aSRalph Campbell 326b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 327b2ef9f5aSRalph Campbell void *entry; 328b2ef9f5aSRalph Campbell struct page *page; 329b2ef9f5aSRalph Campbell void *tmp; 330b2ef9f5aSRalph Campbell 331b2ef9f5aSRalph Campbell entry = xa_load(&dmirror->pt, pfn); 332b2ef9f5aSRalph Campbell page = xa_untag_pointer(entry); 333b2ef9f5aSRalph Campbell if (!page) 334b2ef9f5aSRalph Campbell return -ENOENT; 335b2ef9f5aSRalph Campbell 336b2ef9f5aSRalph Campbell tmp = kmap(page); 337b2ef9f5aSRalph Campbell memcpy(ptr, tmp, PAGE_SIZE); 338b2ef9f5aSRalph Campbell kunmap(page); 339b2ef9f5aSRalph Campbell 340b2ef9f5aSRalph Campbell ptr += PAGE_SIZE; 341b2ef9f5aSRalph Campbell bounce->cpages++; 342b2ef9f5aSRalph Campbell } 343b2ef9f5aSRalph Campbell 344b2ef9f5aSRalph Campbell return 0; 345b2ef9f5aSRalph Campbell } 346b2ef9f5aSRalph Campbell 347b2ef9f5aSRalph Campbell static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 348b2ef9f5aSRalph Campbell { 349b2ef9f5aSRalph Campbell struct dmirror_bounce bounce; 350b2ef9f5aSRalph Campbell unsigned long start, end; 351b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT; 352b2ef9f5aSRalph Campbell int ret; 353b2ef9f5aSRalph Campbell 354b2ef9f5aSRalph Campbell start = cmd->addr; 355b2ef9f5aSRalph Campbell end = start + size; 356b2ef9f5aSRalph Campbell if (end < start) 357b2ef9f5aSRalph Campbell return -EINVAL; 358b2ef9f5aSRalph Campbell 359b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size); 360b2ef9f5aSRalph Campbell if (ret) 361b2ef9f5aSRalph Campbell return ret; 362b2ef9f5aSRalph Campbell 363b2ef9f5aSRalph Campbell while (1) { 364b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 365b2ef9f5aSRalph Campbell ret = dmirror_do_read(dmirror, start, end, &bounce); 366b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 367b2ef9f5aSRalph Campbell if (ret != -ENOENT) 368b2ef9f5aSRalph Campbell break; 369b2ef9f5aSRalph Campbell 370b2ef9f5aSRalph Campbell start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 371b2ef9f5aSRalph Campbell ret = dmirror_fault(dmirror, start, end, false); 372b2ef9f5aSRalph Campbell if (ret) 373b2ef9f5aSRalph Campbell break; 374b2ef9f5aSRalph Campbell cmd->faults++; 375b2ef9f5aSRalph Campbell } 376b2ef9f5aSRalph Campbell 377b2ef9f5aSRalph Campbell if (ret == 0) { 378b2ef9f5aSRalph Campbell if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 379b2ef9f5aSRalph Campbell bounce.size)) 380b2ef9f5aSRalph Campbell ret = -EFAULT; 381b2ef9f5aSRalph Campbell } 382b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages; 383b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce); 384b2ef9f5aSRalph Campbell return ret; 385b2ef9f5aSRalph Campbell } 386b2ef9f5aSRalph Campbell 387b2ef9f5aSRalph Campbell static int dmirror_do_write(struct dmirror *dmirror, unsigned long start, 388b2ef9f5aSRalph Campbell unsigned long end, struct dmirror_bounce *bounce) 389b2ef9f5aSRalph Campbell { 390b2ef9f5aSRalph Campbell unsigned long pfn; 391b2ef9f5aSRalph Campbell void *ptr; 392b2ef9f5aSRalph Campbell 393b2ef9f5aSRalph Campbell ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK); 394b2ef9f5aSRalph Campbell 395b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 396b2ef9f5aSRalph Campbell void *entry; 397b2ef9f5aSRalph Campbell struct page *page; 398b2ef9f5aSRalph Campbell void *tmp; 399b2ef9f5aSRalph Campbell 400b2ef9f5aSRalph Campbell entry = xa_load(&dmirror->pt, pfn); 401b2ef9f5aSRalph Campbell page = xa_untag_pointer(entry); 402b2ef9f5aSRalph Campbell if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE) 403b2ef9f5aSRalph Campbell return -ENOENT; 404b2ef9f5aSRalph Campbell 405b2ef9f5aSRalph Campbell tmp = kmap(page); 406b2ef9f5aSRalph Campbell memcpy(tmp, ptr, PAGE_SIZE); 407b2ef9f5aSRalph Campbell kunmap(page); 408b2ef9f5aSRalph Campbell 409b2ef9f5aSRalph Campbell ptr += PAGE_SIZE; 410b2ef9f5aSRalph Campbell bounce->cpages++; 411b2ef9f5aSRalph Campbell } 412b2ef9f5aSRalph Campbell 413b2ef9f5aSRalph Campbell return 0; 414b2ef9f5aSRalph Campbell } 415b2ef9f5aSRalph Campbell 416b2ef9f5aSRalph Campbell static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) 417b2ef9f5aSRalph Campbell { 418b2ef9f5aSRalph Campbell struct dmirror_bounce bounce; 419b2ef9f5aSRalph Campbell unsigned long start, end; 420b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT; 421b2ef9f5aSRalph Campbell int ret; 422b2ef9f5aSRalph Campbell 423b2ef9f5aSRalph Campbell start = cmd->addr; 424b2ef9f5aSRalph Campbell end = start + size; 425b2ef9f5aSRalph Campbell if (end < start) 426b2ef9f5aSRalph Campbell return -EINVAL; 427b2ef9f5aSRalph Campbell 428b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size); 429b2ef9f5aSRalph Campbell if (ret) 430b2ef9f5aSRalph Campbell return ret; 431b2ef9f5aSRalph Campbell if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr), 432b2ef9f5aSRalph Campbell bounce.size)) { 433b2ef9f5aSRalph Campbell ret = -EFAULT; 434b2ef9f5aSRalph Campbell goto fini; 435b2ef9f5aSRalph Campbell } 436b2ef9f5aSRalph Campbell 437b2ef9f5aSRalph Campbell while (1) { 438b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 439b2ef9f5aSRalph Campbell ret = dmirror_do_write(dmirror, start, end, &bounce); 440b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 441b2ef9f5aSRalph Campbell if (ret != -ENOENT) 442b2ef9f5aSRalph Campbell break; 443b2ef9f5aSRalph Campbell 444b2ef9f5aSRalph Campbell start = cmd->addr + (bounce.cpages << PAGE_SHIFT); 445b2ef9f5aSRalph Campbell ret = dmirror_fault(dmirror, start, end, true); 446b2ef9f5aSRalph Campbell if (ret) 447b2ef9f5aSRalph Campbell break; 448b2ef9f5aSRalph Campbell cmd->faults++; 449b2ef9f5aSRalph Campbell } 450b2ef9f5aSRalph Campbell 451b2ef9f5aSRalph Campbell fini: 452b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages; 453b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce); 454b2ef9f5aSRalph Campbell return ret; 455b2ef9f5aSRalph Campbell } 456b2ef9f5aSRalph Campbell 457b2ef9f5aSRalph Campbell static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, 458b2ef9f5aSRalph Campbell struct page **ppage) 459b2ef9f5aSRalph Campbell { 460b2ef9f5aSRalph Campbell struct dmirror_chunk *devmem; 461b2ef9f5aSRalph Campbell struct resource *res; 462b2ef9f5aSRalph Campbell unsigned long pfn; 463b2ef9f5aSRalph Campbell unsigned long pfn_first; 464b2ef9f5aSRalph Campbell unsigned long pfn_last; 465b2ef9f5aSRalph Campbell void *ptr; 466b2ef9f5aSRalph Campbell 467a4574f63SDan Williams devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); 468a4574f63SDan Williams if (!devmem) 469f3c9d0a3SDan Carpenter return false; 470a4574f63SDan Williams 471a4574f63SDan Williams res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, 472a4574f63SDan Williams "hmm_dmirror"); 473a4574f63SDan Williams if (IS_ERR(res)) 474a4574f63SDan Williams goto err_devmem; 475a4574f63SDan Williams 476a4574f63SDan Williams devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; 477a4574f63SDan Williams devmem->pagemap.range.start = res->start; 478a4574f63SDan Williams devmem->pagemap.range.end = res->end; 479b7b3c01bSDan Williams devmem->pagemap.nr_range = 1; 480a4574f63SDan Williams devmem->pagemap.ops = &dmirror_devmem_ops; 481a4574f63SDan Williams devmem->pagemap.owner = mdevice; 482a4574f63SDan Williams 483b2ef9f5aSRalph Campbell mutex_lock(&mdevice->devmem_lock); 484b2ef9f5aSRalph Campbell 485b2ef9f5aSRalph Campbell if (mdevice->devmem_count == mdevice->devmem_capacity) { 486b2ef9f5aSRalph Campbell struct dmirror_chunk **new_chunks; 487b2ef9f5aSRalph Campbell unsigned int new_capacity; 488b2ef9f5aSRalph Campbell 489b2ef9f5aSRalph Campbell new_capacity = mdevice->devmem_capacity + 490b2ef9f5aSRalph Campbell DEVMEM_CHUNKS_RESERVE; 491b2ef9f5aSRalph Campbell new_chunks = krealloc(mdevice->devmem_chunks, 492b2ef9f5aSRalph Campbell sizeof(new_chunks[0]) * new_capacity, 493b2ef9f5aSRalph Campbell GFP_KERNEL); 494b2ef9f5aSRalph Campbell if (!new_chunks) 495a4574f63SDan Williams goto err_release; 496b2ef9f5aSRalph Campbell mdevice->devmem_capacity = new_capacity; 497b2ef9f5aSRalph Campbell mdevice->devmem_chunks = new_chunks; 498b2ef9f5aSRalph Campbell } 499b2ef9f5aSRalph Campbell 500b2ef9f5aSRalph Campbell ptr = memremap_pages(&devmem->pagemap, numa_node_id()); 501b2ef9f5aSRalph Campbell if (IS_ERR(ptr)) 502a4574f63SDan Williams goto err_release; 503b2ef9f5aSRalph Campbell 504b2ef9f5aSRalph Campbell devmem->mdevice = mdevice; 505a4574f63SDan Williams pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; 506a4574f63SDan Williams pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT); 507b2ef9f5aSRalph Campbell mdevice->devmem_chunks[mdevice->devmem_count++] = devmem; 508b2ef9f5aSRalph Campbell 509b2ef9f5aSRalph Campbell mutex_unlock(&mdevice->devmem_lock); 510b2ef9f5aSRalph Campbell 511b2ef9f5aSRalph Campbell pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n", 512b2ef9f5aSRalph Campbell DEVMEM_CHUNK_SIZE / (1024 * 1024), 513b2ef9f5aSRalph Campbell mdevice->devmem_count, 514b2ef9f5aSRalph Campbell mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)), 515b2ef9f5aSRalph Campbell pfn_first, pfn_last); 516b2ef9f5aSRalph Campbell 517b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock); 518b2ef9f5aSRalph Campbell for (pfn = pfn_first; pfn < pfn_last; pfn++) { 519b2ef9f5aSRalph Campbell struct page *page = pfn_to_page(pfn); 520b2ef9f5aSRalph Campbell 521b2ef9f5aSRalph Campbell page->zone_device_data = mdevice->free_pages; 522b2ef9f5aSRalph Campbell mdevice->free_pages = page; 523b2ef9f5aSRalph Campbell } 524b2ef9f5aSRalph Campbell if (ppage) { 525b2ef9f5aSRalph Campbell *ppage = mdevice->free_pages; 526b2ef9f5aSRalph Campbell mdevice->free_pages = (*ppage)->zone_device_data; 527b2ef9f5aSRalph Campbell mdevice->calloc++; 528b2ef9f5aSRalph Campbell } 529b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock); 530b2ef9f5aSRalph Campbell 531b2ef9f5aSRalph Campbell return true; 532b2ef9f5aSRalph Campbell 533b2ef9f5aSRalph Campbell err_release: 534b2ef9f5aSRalph Campbell mutex_unlock(&mdevice->devmem_lock); 535a4574f63SDan Williams release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range)); 536a4574f63SDan Williams err_devmem: 537a4574f63SDan Williams kfree(devmem); 538a4574f63SDan Williams 539b2ef9f5aSRalph Campbell return false; 540b2ef9f5aSRalph Campbell } 541b2ef9f5aSRalph Campbell 542b2ef9f5aSRalph Campbell static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) 543b2ef9f5aSRalph Campbell { 544b2ef9f5aSRalph Campbell struct page *dpage = NULL; 545b2ef9f5aSRalph Campbell struct page *rpage; 546b2ef9f5aSRalph Campbell 547b2ef9f5aSRalph Campbell /* 548b2ef9f5aSRalph Campbell * This is a fake device so we alloc real system memory to store 549b2ef9f5aSRalph Campbell * our device memory. 550b2ef9f5aSRalph Campbell */ 551b2ef9f5aSRalph Campbell rpage = alloc_page(GFP_HIGHUSER); 552b2ef9f5aSRalph Campbell if (!rpage) 553b2ef9f5aSRalph Campbell return NULL; 554b2ef9f5aSRalph Campbell 555b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock); 556b2ef9f5aSRalph Campbell 557b2ef9f5aSRalph Campbell if (mdevice->free_pages) { 558b2ef9f5aSRalph Campbell dpage = mdevice->free_pages; 559b2ef9f5aSRalph Campbell mdevice->free_pages = dpage->zone_device_data; 560b2ef9f5aSRalph Campbell mdevice->calloc++; 561b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock); 562b2ef9f5aSRalph Campbell } else { 563b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock); 564b2ef9f5aSRalph Campbell if (!dmirror_allocate_chunk(mdevice, &dpage)) 565b2ef9f5aSRalph Campbell goto error; 566b2ef9f5aSRalph Campbell } 567b2ef9f5aSRalph Campbell 568b2ef9f5aSRalph Campbell dpage->zone_device_data = rpage; 569b2ef9f5aSRalph Campbell lock_page(dpage); 570b2ef9f5aSRalph Campbell return dpage; 571b2ef9f5aSRalph Campbell 572b2ef9f5aSRalph Campbell error: 573b2ef9f5aSRalph Campbell __free_page(rpage); 574b2ef9f5aSRalph Campbell return NULL; 575b2ef9f5aSRalph Campbell } 576b2ef9f5aSRalph Campbell 577b2ef9f5aSRalph Campbell static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, 578b2ef9f5aSRalph Campbell struct dmirror *dmirror) 579b2ef9f5aSRalph Campbell { 580b2ef9f5aSRalph Campbell struct dmirror_device *mdevice = dmirror->mdevice; 581b2ef9f5aSRalph Campbell const unsigned long *src = args->src; 582b2ef9f5aSRalph Campbell unsigned long *dst = args->dst; 583b2ef9f5aSRalph Campbell unsigned long addr; 584b2ef9f5aSRalph Campbell 585b2ef9f5aSRalph Campbell for (addr = args->start; addr < args->end; addr += PAGE_SIZE, 586b2ef9f5aSRalph Campbell src++, dst++) { 587b2ef9f5aSRalph Campbell struct page *spage; 588b2ef9f5aSRalph Campbell struct page *dpage; 589b2ef9f5aSRalph Campbell struct page *rpage; 590b2ef9f5aSRalph Campbell 591b2ef9f5aSRalph Campbell if (!(*src & MIGRATE_PFN_MIGRATE)) 592b2ef9f5aSRalph Campbell continue; 593b2ef9f5aSRalph Campbell 594b2ef9f5aSRalph Campbell /* 595b2ef9f5aSRalph Campbell * Note that spage might be NULL which is OK since it is an 596b2ef9f5aSRalph Campbell * unallocated pte_none() or read-only zero page. 597b2ef9f5aSRalph Campbell */ 598b2ef9f5aSRalph Campbell spage = migrate_pfn_to_page(*src); 599b2ef9f5aSRalph Campbell 600b2ef9f5aSRalph Campbell dpage = dmirror_devmem_alloc_page(mdevice); 601b2ef9f5aSRalph Campbell if (!dpage) 602b2ef9f5aSRalph Campbell continue; 603b2ef9f5aSRalph Campbell 604b2ef9f5aSRalph Campbell rpage = dpage->zone_device_data; 605b2ef9f5aSRalph Campbell if (spage) 606b2ef9f5aSRalph Campbell copy_highpage(rpage, spage); 607b2ef9f5aSRalph Campbell else 608b2ef9f5aSRalph Campbell clear_highpage(rpage); 609b2ef9f5aSRalph Campbell 610b2ef9f5aSRalph Campbell /* 611b2ef9f5aSRalph Campbell * Normally, a device would use the page->zone_device_data to 612b2ef9f5aSRalph Campbell * point to the mirror but here we use it to hold the page for 613b2ef9f5aSRalph Campbell * the simulated device memory and that page holds the pointer 614b2ef9f5aSRalph Campbell * to the mirror. 615b2ef9f5aSRalph Campbell */ 616b2ef9f5aSRalph Campbell rpage->zone_device_data = dmirror; 617b2ef9f5aSRalph Campbell 618ab09243aSAlistair Popple *dst = migrate_pfn(page_to_pfn(dpage)); 619b2ef9f5aSRalph Campbell if ((*src & MIGRATE_PFN_WRITE) || 620b2ef9f5aSRalph Campbell (!spage && args->vma->vm_flags & VM_WRITE)) 621b2ef9f5aSRalph Campbell *dst |= MIGRATE_PFN_WRITE; 622b2ef9f5aSRalph Campbell } 623b2ef9f5aSRalph Campbell } 624b2ef9f5aSRalph Campbell 625b659baeaSAlistair Popple static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start, 626b659baeaSAlistair Popple unsigned long end) 627b659baeaSAlistair Popple { 628b659baeaSAlistair Popple unsigned long pfn; 629b659baeaSAlistair Popple 630b659baeaSAlistair Popple for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) { 631b659baeaSAlistair Popple void *entry; 632b659baeaSAlistair Popple 633b659baeaSAlistair Popple entry = xa_load(&dmirror->pt, pfn); 634b659baeaSAlistair Popple if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC) 635b659baeaSAlistair Popple return -EPERM; 636b659baeaSAlistair Popple } 637b659baeaSAlistair Popple 638b659baeaSAlistair Popple return 0; 639b659baeaSAlistair Popple } 640b659baeaSAlistair Popple 641b659baeaSAlistair Popple static int dmirror_atomic_map(unsigned long start, unsigned long end, 642b659baeaSAlistair Popple struct page **pages, struct dmirror *dmirror) 643b659baeaSAlistair Popple { 644b659baeaSAlistair Popple unsigned long pfn, mapped = 0; 645b659baeaSAlistair Popple int i; 646b659baeaSAlistair Popple 647b659baeaSAlistair Popple /* Map the migrated pages into the device's page tables. */ 648b659baeaSAlistair Popple mutex_lock(&dmirror->mutex); 649b659baeaSAlistair Popple 650b659baeaSAlistair Popple for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) { 651b659baeaSAlistair Popple void *entry; 652b659baeaSAlistair Popple 653b659baeaSAlistair Popple if (!pages[i]) 654b659baeaSAlistair Popple continue; 655b659baeaSAlistair Popple 656b659baeaSAlistair Popple entry = pages[i]; 657b659baeaSAlistair Popple entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC); 658b659baeaSAlistair Popple entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 659b659baeaSAlistair Popple if (xa_is_err(entry)) { 660b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex); 661b659baeaSAlistair Popple return xa_err(entry); 662b659baeaSAlistair Popple } 663b659baeaSAlistair Popple 664b659baeaSAlistair Popple mapped++; 665b659baeaSAlistair Popple } 666b659baeaSAlistair Popple 667b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex); 668b659baeaSAlistair Popple return mapped; 669b659baeaSAlistair Popple } 670b659baeaSAlistair Popple 671b2ef9f5aSRalph Campbell static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, 672b2ef9f5aSRalph Campbell struct dmirror *dmirror) 673b2ef9f5aSRalph Campbell { 674b2ef9f5aSRalph Campbell unsigned long start = args->start; 675b2ef9f5aSRalph Campbell unsigned long end = args->end; 676b2ef9f5aSRalph Campbell const unsigned long *src = args->src; 677b2ef9f5aSRalph Campbell const unsigned long *dst = args->dst; 678b2ef9f5aSRalph Campbell unsigned long pfn; 679b2ef9f5aSRalph Campbell 680b2ef9f5aSRalph Campbell /* Map the migrated pages into the device's page tables. */ 681b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 682b2ef9f5aSRalph Campbell 683b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, 684b2ef9f5aSRalph Campbell src++, dst++) { 685b2ef9f5aSRalph Campbell struct page *dpage; 686b2ef9f5aSRalph Campbell void *entry; 687b2ef9f5aSRalph Campbell 688b2ef9f5aSRalph Campbell if (!(*src & MIGRATE_PFN_MIGRATE)) 689b2ef9f5aSRalph Campbell continue; 690b2ef9f5aSRalph Campbell 691b2ef9f5aSRalph Campbell dpage = migrate_pfn_to_page(*dst); 692b2ef9f5aSRalph Campbell if (!dpage) 693b2ef9f5aSRalph Campbell continue; 694b2ef9f5aSRalph Campbell 695b2ef9f5aSRalph Campbell /* 696b2ef9f5aSRalph Campbell * Store the page that holds the data so the page table 697b2ef9f5aSRalph Campbell * doesn't have to deal with ZONE_DEVICE private pages. 698b2ef9f5aSRalph Campbell */ 699b2ef9f5aSRalph Campbell entry = dpage->zone_device_data; 700b2ef9f5aSRalph Campbell if (*dst & MIGRATE_PFN_WRITE) 701b2ef9f5aSRalph Campbell entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); 702b2ef9f5aSRalph Campbell entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); 703b2ef9f5aSRalph Campbell if (xa_is_err(entry)) { 704b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 705b2ef9f5aSRalph Campbell return xa_err(entry); 706b2ef9f5aSRalph Campbell } 707b2ef9f5aSRalph Campbell } 708b2ef9f5aSRalph Campbell 709b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 710b2ef9f5aSRalph Campbell return 0; 711b2ef9f5aSRalph Campbell } 712b2ef9f5aSRalph Campbell 713b659baeaSAlistair Popple static int dmirror_exclusive(struct dmirror *dmirror, 714b659baeaSAlistair Popple struct hmm_dmirror_cmd *cmd) 715b659baeaSAlistair Popple { 716b659baeaSAlistair Popple unsigned long start, end, addr; 717b659baeaSAlistair Popple unsigned long size = cmd->npages << PAGE_SHIFT; 718b659baeaSAlistair Popple struct mm_struct *mm = dmirror->notifier.mm; 719b659baeaSAlistair Popple struct page *pages[64]; 720b659baeaSAlistair Popple struct dmirror_bounce bounce; 721b659baeaSAlistair Popple unsigned long next; 722b659baeaSAlistair Popple int ret; 723b659baeaSAlistair Popple 724b659baeaSAlistair Popple start = cmd->addr; 725b659baeaSAlistair Popple end = start + size; 726b659baeaSAlistair Popple if (end < start) 727b659baeaSAlistair Popple return -EINVAL; 728b659baeaSAlistair Popple 729b659baeaSAlistair Popple /* Since the mm is for the mirrored process, get a reference first. */ 730b659baeaSAlistair Popple if (!mmget_not_zero(mm)) 731b659baeaSAlistair Popple return -EINVAL; 732b659baeaSAlistair Popple 733b659baeaSAlistair Popple mmap_read_lock(mm); 734b659baeaSAlistair Popple for (addr = start; addr < end; addr = next) { 735*ed913b05SMiaohe Lin unsigned long mapped = 0; 736b659baeaSAlistair Popple int i; 737b659baeaSAlistair Popple 738b659baeaSAlistair Popple if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) 739b659baeaSAlistair Popple next = end; 740b659baeaSAlistair Popple else 741b659baeaSAlistair Popple next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); 742b659baeaSAlistair Popple 743b659baeaSAlistair Popple ret = make_device_exclusive_range(mm, addr, next, pages, NULL); 744*ed913b05SMiaohe Lin /* 745*ed913b05SMiaohe Lin * Do dmirror_atomic_map() iff all pages are marked for 746*ed913b05SMiaohe Lin * exclusive access to avoid accessing uninitialized 747*ed913b05SMiaohe Lin * fields of pages. 748*ed913b05SMiaohe Lin */ 749*ed913b05SMiaohe Lin if (ret == (next - addr) >> PAGE_SHIFT) 750b659baeaSAlistair Popple mapped = dmirror_atomic_map(addr, next, pages, dmirror); 751b659baeaSAlistair Popple for (i = 0; i < ret; i++) { 752b659baeaSAlistair Popple if (pages[i]) { 753b659baeaSAlistair Popple unlock_page(pages[i]); 754b659baeaSAlistair Popple put_page(pages[i]); 755b659baeaSAlistair Popple } 756b659baeaSAlistair Popple } 757b659baeaSAlistair Popple 758b659baeaSAlistair Popple if (addr + (mapped << PAGE_SHIFT) < next) { 759b659baeaSAlistair Popple mmap_read_unlock(mm); 760b659baeaSAlistair Popple mmput(mm); 761b659baeaSAlistair Popple return -EBUSY; 762b659baeaSAlistair Popple } 763b659baeaSAlistair Popple } 764b659baeaSAlistair Popple mmap_read_unlock(mm); 765b659baeaSAlistair Popple mmput(mm); 766b659baeaSAlistair Popple 767b659baeaSAlistair Popple /* Return the migrated data for verification. */ 768b659baeaSAlistair Popple ret = dmirror_bounce_init(&bounce, start, size); 769b659baeaSAlistair Popple if (ret) 770b659baeaSAlistair Popple return ret; 771b659baeaSAlistair Popple mutex_lock(&dmirror->mutex); 772b659baeaSAlistair Popple ret = dmirror_do_read(dmirror, start, end, &bounce); 773b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex); 774b659baeaSAlistair Popple if (ret == 0) { 775b659baeaSAlistair Popple if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 776b659baeaSAlistair Popple bounce.size)) 777b659baeaSAlistair Popple ret = -EFAULT; 778b659baeaSAlistair Popple } 779b659baeaSAlistair Popple 780b659baeaSAlistair Popple cmd->cpages = bounce.cpages; 781b659baeaSAlistair Popple dmirror_bounce_fini(&bounce); 782b659baeaSAlistair Popple return ret; 783b659baeaSAlistair Popple } 784b659baeaSAlistair Popple 785b2ef9f5aSRalph Campbell static int dmirror_migrate(struct dmirror *dmirror, 786b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd *cmd) 787b2ef9f5aSRalph Campbell { 788b2ef9f5aSRalph Campbell unsigned long start, end, addr; 789b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT; 790b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm; 791b2ef9f5aSRalph Campbell struct vm_area_struct *vma; 792b2ef9f5aSRalph Campbell unsigned long src_pfns[64]; 793b2ef9f5aSRalph Campbell unsigned long dst_pfns[64]; 794b2ef9f5aSRalph Campbell struct dmirror_bounce bounce; 795b2ef9f5aSRalph Campbell struct migrate_vma args; 796b2ef9f5aSRalph Campbell unsigned long next; 797b2ef9f5aSRalph Campbell int ret; 798b2ef9f5aSRalph Campbell 799b2ef9f5aSRalph Campbell start = cmd->addr; 800b2ef9f5aSRalph Campbell end = start + size; 801b2ef9f5aSRalph Campbell if (end < start) 802b2ef9f5aSRalph Campbell return -EINVAL; 803b2ef9f5aSRalph Campbell 804b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */ 805b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm)) 806b2ef9f5aSRalph Campbell return -EINVAL; 807b2ef9f5aSRalph Campbell 80889154dd5SMichel Lespinasse mmap_read_lock(mm); 809b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = next) { 81046e6b31dSLiam Howlett vma = vma_lookup(mm, addr); 81146e6b31dSLiam Howlett if (!vma || !(vma->vm_flags & VM_READ)) { 812b2ef9f5aSRalph Campbell ret = -EINVAL; 813b2ef9f5aSRalph Campbell goto out; 814b2ef9f5aSRalph Campbell } 815b2ef9f5aSRalph Campbell next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); 816b2ef9f5aSRalph Campbell if (next > vma->vm_end) 817b2ef9f5aSRalph Campbell next = vma->vm_end; 818b2ef9f5aSRalph Campbell 819b2ef9f5aSRalph Campbell args.vma = vma; 820b2ef9f5aSRalph Campbell args.src = src_pfns; 821b2ef9f5aSRalph Campbell args.dst = dst_pfns; 822b2ef9f5aSRalph Campbell args.start = addr; 823b2ef9f5aSRalph Campbell args.end = next; 8247d17e83aSRalph Campbell args.pgmap_owner = dmirror->mdevice; 8255143192cSRalph Campbell args.flags = MIGRATE_VMA_SELECT_SYSTEM; 826b2ef9f5aSRalph Campbell ret = migrate_vma_setup(&args); 827b2ef9f5aSRalph Campbell if (ret) 828b2ef9f5aSRalph Campbell goto out; 829b2ef9f5aSRalph Campbell 830b2ef9f5aSRalph Campbell dmirror_migrate_alloc_and_copy(&args, dmirror); 831b2ef9f5aSRalph Campbell migrate_vma_pages(&args); 832b2ef9f5aSRalph Campbell dmirror_migrate_finalize_and_map(&args, dmirror); 833b2ef9f5aSRalph Campbell migrate_vma_finalize(&args); 834b2ef9f5aSRalph Campbell } 83589154dd5SMichel Lespinasse mmap_read_unlock(mm); 836b2ef9f5aSRalph Campbell mmput(mm); 837b2ef9f5aSRalph Campbell 838b2ef9f5aSRalph Campbell /* Return the migrated data for verification. */ 839b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size); 840b2ef9f5aSRalph Campbell if (ret) 841b2ef9f5aSRalph Campbell return ret; 842b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 843b2ef9f5aSRalph Campbell ret = dmirror_do_read(dmirror, start, end, &bounce); 844b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 845b2ef9f5aSRalph Campbell if (ret == 0) { 846b2ef9f5aSRalph Campbell if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr, 847b2ef9f5aSRalph Campbell bounce.size)) 848b2ef9f5aSRalph Campbell ret = -EFAULT; 849b2ef9f5aSRalph Campbell } 850b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages; 851b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce); 852b2ef9f5aSRalph Campbell return ret; 853b2ef9f5aSRalph Campbell 854b2ef9f5aSRalph Campbell out: 85589154dd5SMichel Lespinasse mmap_read_unlock(mm); 856b2ef9f5aSRalph Campbell mmput(mm); 857b2ef9f5aSRalph Campbell return ret; 858b2ef9f5aSRalph Campbell } 859b2ef9f5aSRalph Campbell 860b2ef9f5aSRalph Campbell static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, 861b2ef9f5aSRalph Campbell unsigned char *perm, unsigned long entry) 862b2ef9f5aSRalph Campbell { 863b2ef9f5aSRalph Campbell struct page *page; 864b2ef9f5aSRalph Campbell 865b2ef9f5aSRalph Campbell if (entry & HMM_PFN_ERROR) { 866b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_ERROR; 867b2ef9f5aSRalph Campbell return; 868b2ef9f5aSRalph Campbell } 869b2ef9f5aSRalph Campbell if (!(entry & HMM_PFN_VALID)) { 870b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_NONE; 871b2ef9f5aSRalph Campbell return; 872b2ef9f5aSRalph Campbell } 873b2ef9f5aSRalph Campbell 874b2ef9f5aSRalph Campbell page = hmm_pfn_to_page(entry); 875b2ef9f5aSRalph Campbell if (is_device_private_page(page)) { 876b2ef9f5aSRalph Campbell /* Is the page migrated to this device or some other? */ 877b2ef9f5aSRalph Campbell if (dmirror->mdevice == dmirror_page_to_device(page)) 878b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL; 879b2ef9f5aSRalph Campbell else 880b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; 881b2ef9f5aSRalph Campbell } else if (is_zero_pfn(page_to_pfn(page))) 882b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_ZERO; 883b2ef9f5aSRalph Campbell else 884b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_NONE; 885b2ef9f5aSRalph Campbell if (entry & HMM_PFN_WRITE) 886b2ef9f5aSRalph Campbell *perm |= HMM_DMIRROR_PROT_WRITE; 887b2ef9f5aSRalph Campbell else 888b2ef9f5aSRalph Campbell *perm |= HMM_DMIRROR_PROT_READ; 889e478425bSRalph Campbell if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT) 890e478425bSRalph Campbell *perm |= HMM_DMIRROR_PROT_PMD; 891e478425bSRalph Campbell else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT) 892e478425bSRalph Campbell *perm |= HMM_DMIRROR_PROT_PUD; 893b2ef9f5aSRalph Campbell } 894b2ef9f5aSRalph Campbell 895b2ef9f5aSRalph Campbell static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni, 896b2ef9f5aSRalph Campbell const struct mmu_notifier_range *range, 897b2ef9f5aSRalph Campbell unsigned long cur_seq) 898b2ef9f5aSRalph Campbell { 899b2ef9f5aSRalph Campbell struct dmirror_interval *dmi = 900b2ef9f5aSRalph Campbell container_of(mni, struct dmirror_interval, notifier); 901b2ef9f5aSRalph Campbell struct dmirror *dmirror = dmi->dmirror; 902b2ef9f5aSRalph Campbell 903b2ef9f5aSRalph Campbell if (mmu_notifier_range_blockable(range)) 904b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 905b2ef9f5aSRalph Campbell else if (!mutex_trylock(&dmirror->mutex)) 906b2ef9f5aSRalph Campbell return false; 907b2ef9f5aSRalph Campbell 908b2ef9f5aSRalph Campbell /* 909b2ef9f5aSRalph Campbell * Snapshots only need to set the sequence number since any 910b2ef9f5aSRalph Campbell * invalidation in the interval invalidates the whole snapshot. 911b2ef9f5aSRalph Campbell */ 912b2ef9f5aSRalph Campbell mmu_interval_set_seq(mni, cur_seq); 913b2ef9f5aSRalph Campbell 914b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 915b2ef9f5aSRalph Campbell return true; 916b2ef9f5aSRalph Campbell } 917b2ef9f5aSRalph Campbell 918b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_mrn_ops = { 919b2ef9f5aSRalph Campbell .invalidate = dmirror_snapshot_invalidate, 920b2ef9f5aSRalph Campbell }; 921b2ef9f5aSRalph Campbell 922b2ef9f5aSRalph Campbell static int dmirror_range_snapshot(struct dmirror *dmirror, 923b2ef9f5aSRalph Campbell struct hmm_range *range, 924b2ef9f5aSRalph Campbell unsigned char *perm) 925b2ef9f5aSRalph Campbell { 926b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm; 927b2ef9f5aSRalph Campbell struct dmirror_interval notifier; 928b2ef9f5aSRalph Campbell unsigned long timeout = 929b2ef9f5aSRalph Campbell jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 930b2ef9f5aSRalph Campbell unsigned long i; 931b2ef9f5aSRalph Campbell unsigned long n; 932b2ef9f5aSRalph Campbell int ret = 0; 933b2ef9f5aSRalph Campbell 934b2ef9f5aSRalph Campbell notifier.dmirror = dmirror; 935b2ef9f5aSRalph Campbell range->notifier = ¬ifier.notifier; 936b2ef9f5aSRalph Campbell 937b2ef9f5aSRalph Campbell ret = mmu_interval_notifier_insert(range->notifier, mm, 938b2ef9f5aSRalph Campbell range->start, range->end - range->start, 939b2ef9f5aSRalph Campbell &dmirror_mrn_ops); 940b2ef9f5aSRalph Campbell if (ret) 941b2ef9f5aSRalph Campbell return ret; 942b2ef9f5aSRalph Campbell 943b2ef9f5aSRalph Campbell while (true) { 944b2ef9f5aSRalph Campbell if (time_after(jiffies, timeout)) { 945b2ef9f5aSRalph Campbell ret = -EBUSY; 946b2ef9f5aSRalph Campbell goto out; 947b2ef9f5aSRalph Campbell } 948b2ef9f5aSRalph Campbell 949b2ef9f5aSRalph Campbell range->notifier_seq = mmu_interval_read_begin(range->notifier); 950b2ef9f5aSRalph Campbell 95189154dd5SMichel Lespinasse mmap_read_lock(mm); 952b2ef9f5aSRalph Campbell ret = hmm_range_fault(range); 95389154dd5SMichel Lespinasse mmap_read_unlock(mm); 954b2ef9f5aSRalph Campbell if (ret) { 955b2ef9f5aSRalph Campbell if (ret == -EBUSY) 956b2ef9f5aSRalph Campbell continue; 957b2ef9f5aSRalph Campbell goto out; 958b2ef9f5aSRalph Campbell } 959b2ef9f5aSRalph Campbell 960b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex); 961b2ef9f5aSRalph Campbell if (mmu_interval_read_retry(range->notifier, 962b2ef9f5aSRalph Campbell range->notifier_seq)) { 963b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 964b2ef9f5aSRalph Campbell continue; 965b2ef9f5aSRalph Campbell } 966b2ef9f5aSRalph Campbell break; 967b2ef9f5aSRalph Campbell } 968b2ef9f5aSRalph Campbell 969b2ef9f5aSRalph Campbell n = (range->end - range->start) >> PAGE_SHIFT; 970b2ef9f5aSRalph Campbell for (i = 0; i < n; i++) 971b2ef9f5aSRalph Campbell dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]); 972b2ef9f5aSRalph Campbell 973b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex); 974b2ef9f5aSRalph Campbell out: 975b2ef9f5aSRalph Campbell mmu_interval_notifier_remove(range->notifier); 976b2ef9f5aSRalph Campbell return ret; 977b2ef9f5aSRalph Campbell } 978b2ef9f5aSRalph Campbell 979b2ef9f5aSRalph Campbell static int dmirror_snapshot(struct dmirror *dmirror, 980b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd *cmd) 981b2ef9f5aSRalph Campbell { 982b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm; 983b2ef9f5aSRalph Campbell unsigned long start, end; 984b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT; 985b2ef9f5aSRalph Campbell unsigned long addr; 986b2ef9f5aSRalph Campbell unsigned long next; 987b2ef9f5aSRalph Campbell unsigned long pfns[64]; 988b2ef9f5aSRalph Campbell unsigned char perm[64]; 989b2ef9f5aSRalph Campbell char __user *uptr; 990b2ef9f5aSRalph Campbell struct hmm_range range = { 991b2ef9f5aSRalph Campbell .hmm_pfns = pfns, 992b2ef9f5aSRalph Campbell .dev_private_owner = dmirror->mdevice, 993b2ef9f5aSRalph Campbell }; 994b2ef9f5aSRalph Campbell int ret = 0; 995b2ef9f5aSRalph Campbell 996b2ef9f5aSRalph Campbell start = cmd->addr; 997b2ef9f5aSRalph Campbell end = start + size; 998b2ef9f5aSRalph Campbell if (end < start) 999b2ef9f5aSRalph Campbell return -EINVAL; 1000b2ef9f5aSRalph Campbell 1001b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */ 1002b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm)) 1003b2ef9f5aSRalph Campbell return -EINVAL; 1004b2ef9f5aSRalph Campbell 1005b2ef9f5aSRalph Campbell /* 1006b2ef9f5aSRalph Campbell * Register a temporary notifier to detect invalidations even if it 1007b2ef9f5aSRalph Campbell * overlaps with other mmu_interval_notifiers. 1008b2ef9f5aSRalph Campbell */ 1009b2ef9f5aSRalph Campbell uptr = u64_to_user_ptr(cmd->ptr); 1010b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = next) { 1011b2ef9f5aSRalph Campbell unsigned long n; 1012b2ef9f5aSRalph Campbell 1013b2ef9f5aSRalph Campbell next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end); 1014b2ef9f5aSRalph Campbell range.start = addr; 1015b2ef9f5aSRalph Campbell range.end = next; 1016b2ef9f5aSRalph Campbell 1017b2ef9f5aSRalph Campbell ret = dmirror_range_snapshot(dmirror, &range, perm); 1018b2ef9f5aSRalph Campbell if (ret) 1019b2ef9f5aSRalph Campbell break; 1020b2ef9f5aSRalph Campbell 1021b2ef9f5aSRalph Campbell n = (range.end - range.start) >> PAGE_SHIFT; 1022b2ef9f5aSRalph Campbell if (copy_to_user(uptr, perm, n)) { 1023b2ef9f5aSRalph Campbell ret = -EFAULT; 1024b2ef9f5aSRalph Campbell break; 1025b2ef9f5aSRalph Campbell } 1026b2ef9f5aSRalph Campbell 1027b2ef9f5aSRalph Campbell cmd->cpages += n; 1028b2ef9f5aSRalph Campbell uptr += n; 1029b2ef9f5aSRalph Campbell } 1030b2ef9f5aSRalph Campbell mmput(mm); 1031b2ef9f5aSRalph Campbell 1032b2ef9f5aSRalph Campbell return ret; 1033b2ef9f5aSRalph Campbell } 1034b2ef9f5aSRalph Campbell 1035b2ef9f5aSRalph Campbell static long dmirror_fops_unlocked_ioctl(struct file *filp, 1036b2ef9f5aSRalph Campbell unsigned int command, 1037b2ef9f5aSRalph Campbell unsigned long arg) 1038b2ef9f5aSRalph Campbell { 1039b2ef9f5aSRalph Campbell void __user *uarg = (void __user *)arg; 1040b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd cmd; 1041b2ef9f5aSRalph Campbell struct dmirror *dmirror; 1042b2ef9f5aSRalph Campbell int ret; 1043b2ef9f5aSRalph Campbell 1044b2ef9f5aSRalph Campbell dmirror = filp->private_data; 1045b2ef9f5aSRalph Campbell if (!dmirror) 1046b2ef9f5aSRalph Campbell return -EINVAL; 1047b2ef9f5aSRalph Campbell 1048b2ef9f5aSRalph Campbell if (copy_from_user(&cmd, uarg, sizeof(cmd))) 1049b2ef9f5aSRalph Campbell return -EFAULT; 1050b2ef9f5aSRalph Campbell 1051b2ef9f5aSRalph Campbell if (cmd.addr & ~PAGE_MASK) 1052b2ef9f5aSRalph Campbell return -EINVAL; 1053b2ef9f5aSRalph Campbell if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT))) 1054b2ef9f5aSRalph Campbell return -EINVAL; 1055b2ef9f5aSRalph Campbell 1056b2ef9f5aSRalph Campbell cmd.cpages = 0; 1057b2ef9f5aSRalph Campbell cmd.faults = 0; 1058b2ef9f5aSRalph Campbell 1059b2ef9f5aSRalph Campbell switch (command) { 1060b2ef9f5aSRalph Campbell case HMM_DMIRROR_READ: 1061b2ef9f5aSRalph Campbell ret = dmirror_read(dmirror, &cmd); 1062b2ef9f5aSRalph Campbell break; 1063b2ef9f5aSRalph Campbell 1064b2ef9f5aSRalph Campbell case HMM_DMIRROR_WRITE: 1065b2ef9f5aSRalph Campbell ret = dmirror_write(dmirror, &cmd); 1066b2ef9f5aSRalph Campbell break; 1067b2ef9f5aSRalph Campbell 1068b2ef9f5aSRalph Campbell case HMM_DMIRROR_MIGRATE: 1069b2ef9f5aSRalph Campbell ret = dmirror_migrate(dmirror, &cmd); 1070b2ef9f5aSRalph Campbell break; 1071b2ef9f5aSRalph Campbell 1072b659baeaSAlistair Popple case HMM_DMIRROR_EXCLUSIVE: 1073b659baeaSAlistair Popple ret = dmirror_exclusive(dmirror, &cmd); 1074b659baeaSAlistair Popple break; 1075b659baeaSAlistair Popple 1076b659baeaSAlistair Popple case HMM_DMIRROR_CHECK_EXCLUSIVE: 1077b659baeaSAlistair Popple ret = dmirror_check_atomic(dmirror, cmd.addr, 1078b659baeaSAlistair Popple cmd.addr + (cmd.npages << PAGE_SHIFT)); 1079b659baeaSAlistair Popple break; 1080b659baeaSAlistair Popple 1081b2ef9f5aSRalph Campbell case HMM_DMIRROR_SNAPSHOT: 1082b2ef9f5aSRalph Campbell ret = dmirror_snapshot(dmirror, &cmd); 1083b2ef9f5aSRalph Campbell break; 1084b2ef9f5aSRalph Campbell 1085b2ef9f5aSRalph Campbell default: 1086b2ef9f5aSRalph Campbell return -EINVAL; 1087b2ef9f5aSRalph Campbell } 1088b2ef9f5aSRalph Campbell if (ret) 1089b2ef9f5aSRalph Campbell return ret; 1090b2ef9f5aSRalph Campbell 1091b2ef9f5aSRalph Campbell if (copy_to_user(uarg, &cmd, sizeof(cmd))) 1092b2ef9f5aSRalph Campbell return -EFAULT; 1093b2ef9f5aSRalph Campbell 1094b2ef9f5aSRalph Campbell return 0; 1095b2ef9f5aSRalph Campbell } 1096b2ef9f5aSRalph Campbell 109787c01d57SAlistair Popple static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma) 109887c01d57SAlistair Popple { 109987c01d57SAlistair Popple unsigned long addr; 110087c01d57SAlistair Popple 110187c01d57SAlistair Popple for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 110287c01d57SAlistair Popple struct page *page; 110387c01d57SAlistair Popple int ret; 110487c01d57SAlistair Popple 110587c01d57SAlistair Popple page = alloc_page(GFP_KERNEL | __GFP_ZERO); 110687c01d57SAlistair Popple if (!page) 110787c01d57SAlistair Popple return -ENOMEM; 110887c01d57SAlistair Popple 110987c01d57SAlistair Popple ret = vm_insert_page(vma, addr, page); 111087c01d57SAlistair Popple if (ret) { 111187c01d57SAlistair Popple __free_page(page); 111287c01d57SAlistair Popple return ret; 111387c01d57SAlistair Popple } 111487c01d57SAlistair Popple put_page(page); 111587c01d57SAlistair Popple } 111687c01d57SAlistair Popple 111787c01d57SAlistair Popple return 0; 111887c01d57SAlistair Popple } 111987c01d57SAlistair Popple 1120b2ef9f5aSRalph Campbell static const struct file_operations dmirror_fops = { 1121b2ef9f5aSRalph Campbell .open = dmirror_fops_open, 1122b2ef9f5aSRalph Campbell .release = dmirror_fops_release, 112387c01d57SAlistair Popple .mmap = dmirror_fops_mmap, 1124b2ef9f5aSRalph Campbell .unlocked_ioctl = dmirror_fops_unlocked_ioctl, 1125b2ef9f5aSRalph Campbell .llseek = default_llseek, 1126b2ef9f5aSRalph Campbell .owner = THIS_MODULE, 1127b2ef9f5aSRalph Campbell }; 1128b2ef9f5aSRalph Campbell 1129b2ef9f5aSRalph Campbell static void dmirror_devmem_free(struct page *page) 1130b2ef9f5aSRalph Campbell { 1131b2ef9f5aSRalph Campbell struct page *rpage = page->zone_device_data; 1132b2ef9f5aSRalph Campbell struct dmirror_device *mdevice; 1133b2ef9f5aSRalph Campbell 1134b2ef9f5aSRalph Campbell if (rpage) 1135b2ef9f5aSRalph Campbell __free_page(rpage); 1136b2ef9f5aSRalph Campbell 1137b2ef9f5aSRalph Campbell mdevice = dmirror_page_to_device(page); 1138b2ef9f5aSRalph Campbell 1139b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock); 1140b2ef9f5aSRalph Campbell mdevice->cfree++; 1141b2ef9f5aSRalph Campbell page->zone_device_data = mdevice->free_pages; 1142b2ef9f5aSRalph Campbell mdevice->free_pages = page; 1143b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock); 1144b2ef9f5aSRalph Campbell } 1145b2ef9f5aSRalph Campbell 1146b2ef9f5aSRalph Campbell static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, 11477d17e83aSRalph Campbell struct dmirror *dmirror) 1148b2ef9f5aSRalph Campbell { 1149b2ef9f5aSRalph Campbell const unsigned long *src = args->src; 1150b2ef9f5aSRalph Campbell unsigned long *dst = args->dst; 1151b2ef9f5aSRalph Campbell unsigned long start = args->start; 1152b2ef9f5aSRalph Campbell unsigned long end = args->end; 1153b2ef9f5aSRalph Campbell unsigned long addr; 1154b2ef9f5aSRalph Campbell 1155b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr += PAGE_SIZE, 1156b2ef9f5aSRalph Campbell src++, dst++) { 1157b2ef9f5aSRalph Campbell struct page *dpage, *spage; 1158b2ef9f5aSRalph Campbell 1159b2ef9f5aSRalph Campbell spage = migrate_pfn_to_page(*src); 1160b2ef9f5aSRalph Campbell if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) 1161b2ef9f5aSRalph Campbell continue; 1162b2ef9f5aSRalph Campbell spage = spage->zone_device_data; 1163b2ef9f5aSRalph Campbell 1164b2ef9f5aSRalph Campbell dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); 1165b2ef9f5aSRalph Campbell if (!dpage) 1166b2ef9f5aSRalph Campbell continue; 1167b2ef9f5aSRalph Campbell 1168b2ef9f5aSRalph Campbell lock_page(dpage); 11697d17e83aSRalph Campbell xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); 1170b2ef9f5aSRalph Campbell copy_highpage(dpage, spage); 1171ab09243aSAlistair Popple *dst = migrate_pfn(page_to_pfn(dpage)); 1172b2ef9f5aSRalph Campbell if (*src & MIGRATE_PFN_WRITE) 1173b2ef9f5aSRalph Campbell *dst |= MIGRATE_PFN_WRITE; 1174b2ef9f5aSRalph Campbell } 1175b2ef9f5aSRalph Campbell return 0; 1176b2ef9f5aSRalph Campbell } 1177b2ef9f5aSRalph Campbell 1178b2ef9f5aSRalph Campbell static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) 1179b2ef9f5aSRalph Campbell { 1180b2ef9f5aSRalph Campbell struct migrate_vma args; 1181b2ef9f5aSRalph Campbell unsigned long src_pfns; 1182b2ef9f5aSRalph Campbell unsigned long dst_pfns; 1183b2ef9f5aSRalph Campbell struct page *rpage; 1184b2ef9f5aSRalph Campbell struct dmirror *dmirror; 1185b2ef9f5aSRalph Campbell vm_fault_t ret; 1186b2ef9f5aSRalph Campbell 1187b2ef9f5aSRalph Campbell /* 1188b2ef9f5aSRalph Campbell * Normally, a device would use the page->zone_device_data to point to 1189b2ef9f5aSRalph Campbell * the mirror but here we use it to hold the page for the simulated 1190b2ef9f5aSRalph Campbell * device memory and that page holds the pointer to the mirror. 1191b2ef9f5aSRalph Campbell */ 1192b2ef9f5aSRalph Campbell rpage = vmf->page->zone_device_data; 1193b2ef9f5aSRalph Campbell dmirror = rpage->zone_device_data; 1194b2ef9f5aSRalph Campbell 1195b2ef9f5aSRalph Campbell /* FIXME demonstrate how we can adjust migrate range */ 1196b2ef9f5aSRalph Campbell args.vma = vmf->vma; 1197b2ef9f5aSRalph Campbell args.start = vmf->address; 1198b2ef9f5aSRalph Campbell args.end = args.start + PAGE_SIZE; 1199b2ef9f5aSRalph Campbell args.src = &src_pfns; 1200b2ef9f5aSRalph Campbell args.dst = &dst_pfns; 12015143192cSRalph Campbell args.pgmap_owner = dmirror->mdevice; 12025143192cSRalph Campbell args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 1203b2ef9f5aSRalph Campbell 1204b2ef9f5aSRalph Campbell if (migrate_vma_setup(&args)) 1205b2ef9f5aSRalph Campbell return VM_FAULT_SIGBUS; 1206b2ef9f5aSRalph Campbell 12077d17e83aSRalph Campbell ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); 1208b2ef9f5aSRalph Campbell if (ret) 1209b2ef9f5aSRalph Campbell return ret; 1210b2ef9f5aSRalph Campbell migrate_vma_pages(&args); 12117d17e83aSRalph Campbell /* 12127d17e83aSRalph Campbell * No device finalize step is needed since 12137d17e83aSRalph Campbell * dmirror_devmem_fault_alloc_and_copy() will have already 12147d17e83aSRalph Campbell * invalidated the device page table. 12157d17e83aSRalph Campbell */ 1216b2ef9f5aSRalph Campbell migrate_vma_finalize(&args); 1217b2ef9f5aSRalph Campbell return 0; 1218b2ef9f5aSRalph Campbell } 1219b2ef9f5aSRalph Campbell 1220b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops = { 1221b2ef9f5aSRalph Campbell .page_free = dmirror_devmem_free, 1222b2ef9f5aSRalph Campbell .migrate_to_ram = dmirror_devmem_fault, 1223b2ef9f5aSRalph Campbell }; 1224b2ef9f5aSRalph Campbell 1225b2ef9f5aSRalph Campbell static int dmirror_device_init(struct dmirror_device *mdevice, int id) 1226b2ef9f5aSRalph Campbell { 1227b2ef9f5aSRalph Campbell dev_t dev; 1228b2ef9f5aSRalph Campbell int ret; 1229b2ef9f5aSRalph Campbell 1230b2ef9f5aSRalph Campbell dev = MKDEV(MAJOR(dmirror_dev), id); 1231b2ef9f5aSRalph Campbell mutex_init(&mdevice->devmem_lock); 1232b2ef9f5aSRalph Campbell spin_lock_init(&mdevice->lock); 1233b2ef9f5aSRalph Campbell 1234b2ef9f5aSRalph Campbell cdev_init(&mdevice->cdevice, &dmirror_fops); 1235b2ef9f5aSRalph Campbell mdevice->cdevice.owner = THIS_MODULE; 1236b2ef9f5aSRalph Campbell ret = cdev_add(&mdevice->cdevice, dev, 1); 1237b2ef9f5aSRalph Campbell if (ret) 1238b2ef9f5aSRalph Campbell return ret; 1239b2ef9f5aSRalph Campbell 1240b2ef9f5aSRalph Campbell /* Build a list of free ZONE_DEVICE private struct pages */ 1241b2ef9f5aSRalph Campbell dmirror_allocate_chunk(mdevice, NULL); 1242b2ef9f5aSRalph Campbell 1243b2ef9f5aSRalph Campbell return 0; 1244b2ef9f5aSRalph Campbell } 1245b2ef9f5aSRalph Campbell 1246b2ef9f5aSRalph Campbell static void dmirror_device_remove(struct dmirror_device *mdevice) 1247b2ef9f5aSRalph Campbell { 1248b2ef9f5aSRalph Campbell unsigned int i; 1249b2ef9f5aSRalph Campbell 1250b2ef9f5aSRalph Campbell if (mdevice->devmem_chunks) { 1251b2ef9f5aSRalph Campbell for (i = 0; i < mdevice->devmem_count; i++) { 1252b2ef9f5aSRalph Campbell struct dmirror_chunk *devmem = 1253b2ef9f5aSRalph Campbell mdevice->devmem_chunks[i]; 1254b2ef9f5aSRalph Campbell 1255b2ef9f5aSRalph Campbell memunmap_pages(&devmem->pagemap); 1256a4574f63SDan Williams release_mem_region(devmem->pagemap.range.start, 1257a4574f63SDan Williams range_len(&devmem->pagemap.range)); 1258b2ef9f5aSRalph Campbell kfree(devmem); 1259b2ef9f5aSRalph Campbell } 1260b2ef9f5aSRalph Campbell kfree(mdevice->devmem_chunks); 1261b2ef9f5aSRalph Campbell } 1262b2ef9f5aSRalph Campbell 1263b2ef9f5aSRalph Campbell cdev_del(&mdevice->cdevice); 1264b2ef9f5aSRalph Campbell } 1265b2ef9f5aSRalph Campbell 1266b2ef9f5aSRalph Campbell static int __init hmm_dmirror_init(void) 1267b2ef9f5aSRalph Campbell { 1268b2ef9f5aSRalph Campbell int ret; 1269b2ef9f5aSRalph Campbell int id; 1270b2ef9f5aSRalph Campbell 1271b2ef9f5aSRalph Campbell ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES, 1272b2ef9f5aSRalph Campbell "HMM_DMIRROR"); 1273b2ef9f5aSRalph Campbell if (ret) 1274b2ef9f5aSRalph Campbell goto err_unreg; 1275b2ef9f5aSRalph Campbell 1276b2ef9f5aSRalph Campbell for (id = 0; id < DMIRROR_NDEVICES; id++) { 1277b2ef9f5aSRalph Campbell ret = dmirror_device_init(dmirror_devices + id, id); 1278b2ef9f5aSRalph Campbell if (ret) 1279b2ef9f5aSRalph Campbell goto err_chrdev; 1280b2ef9f5aSRalph Campbell } 1281b2ef9f5aSRalph Campbell 1282b2ef9f5aSRalph Campbell pr_info("HMM test module loaded. This is only for testing HMM.\n"); 1283b2ef9f5aSRalph Campbell return 0; 1284b2ef9f5aSRalph Campbell 1285b2ef9f5aSRalph Campbell err_chrdev: 1286b2ef9f5aSRalph Campbell while (--id >= 0) 1287b2ef9f5aSRalph Campbell dmirror_device_remove(dmirror_devices + id); 1288b2ef9f5aSRalph Campbell unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1289b2ef9f5aSRalph Campbell err_unreg: 1290b2ef9f5aSRalph Campbell return ret; 1291b2ef9f5aSRalph Campbell } 1292b2ef9f5aSRalph Campbell 1293b2ef9f5aSRalph Campbell static void __exit hmm_dmirror_exit(void) 1294b2ef9f5aSRalph Campbell { 1295b2ef9f5aSRalph Campbell int id; 1296b2ef9f5aSRalph Campbell 1297b2ef9f5aSRalph Campbell for (id = 0; id < DMIRROR_NDEVICES; id++) 1298b2ef9f5aSRalph Campbell dmirror_device_remove(dmirror_devices + id); 1299b2ef9f5aSRalph Campbell unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES); 1300b2ef9f5aSRalph Campbell } 1301b2ef9f5aSRalph Campbell 1302b2ef9f5aSRalph Campbell module_init(hmm_dmirror_init); 1303b2ef9f5aSRalph Campbell module_exit(hmm_dmirror_exit); 1304b2ef9f5aSRalph Campbell MODULE_LICENSE("GPL"); 1305