1b2ef9f5aSRalph Campbell // SPDX-License-Identifier: GPL-2.0
2b2ef9f5aSRalph Campbell /*
3b2ef9f5aSRalph Campbell * This is a module to test the HMM (Heterogeneous Memory Management)
4b2ef9f5aSRalph Campbell * mirror and zone device private memory migration APIs of the kernel.
5b2ef9f5aSRalph Campbell * Userspace programs can register with the driver to mirror their own address
6b2ef9f5aSRalph Campbell * space and can use the device to read/write any valid virtual address.
7b2ef9f5aSRalph Campbell */
8b2ef9f5aSRalph Campbell #include <linux/init.h>
9b2ef9f5aSRalph Campbell #include <linux/fs.h>
10b2ef9f5aSRalph Campbell #include <linux/mm.h>
11b2ef9f5aSRalph Campbell #include <linux/module.h>
12b2ef9f5aSRalph Campbell #include <linux/kernel.h>
13b2ef9f5aSRalph Campbell #include <linux/cdev.h>
14b2ef9f5aSRalph Campbell #include <linux/device.h>
15dc90f084SChristoph Hellwig #include <linux/memremap.h>
16b2ef9f5aSRalph Campbell #include <linux/mutex.h>
17b2ef9f5aSRalph Campbell #include <linux/rwsem.h>
18b2ef9f5aSRalph Campbell #include <linux/sched.h>
19b2ef9f5aSRalph Campbell #include <linux/slab.h>
20b2ef9f5aSRalph Campbell #include <linux/highmem.h>
21b2ef9f5aSRalph Campbell #include <linux/delay.h>
22b2ef9f5aSRalph Campbell #include <linux/pagemap.h>
23b2ef9f5aSRalph Campbell #include <linux/hmm.h>
24b2ef9f5aSRalph Campbell #include <linux/vmalloc.h>
25b2ef9f5aSRalph Campbell #include <linux/swap.h>
26b2ef9f5aSRalph Campbell #include <linux/swapops.h>
27b2ef9f5aSRalph Campbell #include <linux/sched/mm.h>
28b2ef9f5aSRalph Campbell #include <linux/platform_device.h>
29b659baeaSAlistair Popple #include <linux/rmap.h>
30730ff521SChristoph Hellwig #include <linux/mmu_notifier.h>
31730ff521SChristoph Hellwig #include <linux/migrate.h>
32b2ef9f5aSRalph Campbell
33b2ef9f5aSRalph Campbell #include "test_hmm_uapi.h"
34b2ef9f5aSRalph Campbell
354c2e0f76SAlex Sierra #define DMIRROR_NDEVICES 4
36b2ef9f5aSRalph Campbell #define DMIRROR_RANGE_FAULT_TIMEOUT 1000
37b2ef9f5aSRalph Campbell #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U)
38b2ef9f5aSRalph Campbell #define DEVMEM_CHUNKS_RESERVE 16
39b2ef9f5aSRalph Campbell
404c2e0f76SAlex Sierra /*
414c2e0f76SAlex Sierra * For device_private pages, dpage is just a dummy struct page
424c2e0f76SAlex Sierra * representing a piece of device memory. dmirror_devmem_alloc_page
434c2e0f76SAlex Sierra * allocates a real system memory page as backing storage to fake a
444c2e0f76SAlex Sierra * real device. zone_device_data points to that backing page. But
454c2e0f76SAlex Sierra * for device_coherent memory, the struct page represents real
464c2e0f76SAlex Sierra * physical CPU-accessible memory that we can use directly.
474c2e0f76SAlex Sierra */
484c2e0f76SAlex Sierra #define BACKING_PAGE(page) (is_device_private_page((page)) ? \
494c2e0f76SAlex Sierra (page)->zone_device_data : (page))
504c2e0f76SAlex Sierra
5125b80162SAlex Sierra static unsigned long spm_addr_dev0;
5225b80162SAlex Sierra module_param(spm_addr_dev0, long, 0644);
5325b80162SAlex Sierra MODULE_PARM_DESC(spm_addr_dev0,
5425b80162SAlex Sierra "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
5525b80162SAlex Sierra
5625b80162SAlex Sierra static unsigned long spm_addr_dev1;
5725b80162SAlex Sierra module_param(spm_addr_dev1, long, 0644);
5825b80162SAlex Sierra MODULE_PARM_DESC(spm_addr_dev1,
5925b80162SAlex Sierra "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
6025b80162SAlex Sierra
61b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops;
62b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops;
63b2ef9f5aSRalph Campbell static dev_t dmirror_dev;
64b2ef9f5aSRalph Campbell
65b2ef9f5aSRalph Campbell struct dmirror_device;
66b2ef9f5aSRalph Campbell
67b2ef9f5aSRalph Campbell struct dmirror_bounce {
68b2ef9f5aSRalph Campbell void *ptr;
69b2ef9f5aSRalph Campbell unsigned long size;
70b2ef9f5aSRalph Campbell unsigned long addr;
71b2ef9f5aSRalph Campbell unsigned long cpages;
72b2ef9f5aSRalph Campbell };
73b2ef9f5aSRalph Campbell
74b659baeaSAlistair Popple #define DPT_XA_TAG_ATOMIC 1UL
75b2ef9f5aSRalph Campbell #define DPT_XA_TAG_WRITE 3UL
76b2ef9f5aSRalph Campbell
77b2ef9f5aSRalph Campbell /*
78b2ef9f5aSRalph Campbell * Data structure to track address ranges and register for mmu interval
79b2ef9f5aSRalph Campbell * notifier updates.
80b2ef9f5aSRalph Campbell */
81b2ef9f5aSRalph Campbell struct dmirror_interval {
82b2ef9f5aSRalph Campbell struct mmu_interval_notifier notifier;
83b2ef9f5aSRalph Campbell struct dmirror *dmirror;
84b2ef9f5aSRalph Campbell };
85b2ef9f5aSRalph Campbell
86b2ef9f5aSRalph Campbell /*
87b2ef9f5aSRalph Campbell * Data attached to the open device file.
88b2ef9f5aSRalph Campbell * Note that it might be shared after a fork().
89b2ef9f5aSRalph Campbell */
90b2ef9f5aSRalph Campbell struct dmirror {
91b2ef9f5aSRalph Campbell struct dmirror_device *mdevice;
92b2ef9f5aSRalph Campbell struct xarray pt;
93b2ef9f5aSRalph Campbell struct mmu_interval_notifier notifier;
94b2ef9f5aSRalph Campbell struct mutex mutex;
95b2ef9f5aSRalph Campbell };
96b2ef9f5aSRalph Campbell
97b2ef9f5aSRalph Campbell /*
98b2ef9f5aSRalph Campbell * ZONE_DEVICE pages for migration and simulating device memory.
99b2ef9f5aSRalph Campbell */
100b2ef9f5aSRalph Campbell struct dmirror_chunk {
101b2ef9f5aSRalph Campbell struct dev_pagemap pagemap;
102b2ef9f5aSRalph Campbell struct dmirror_device *mdevice;
103ad4c3652SAlistair Popple bool remove;
104b2ef9f5aSRalph Campbell };
105b2ef9f5aSRalph Campbell
106b2ef9f5aSRalph Campbell /*
107b2ef9f5aSRalph Campbell * Per device data.
108b2ef9f5aSRalph Campbell */
109b2ef9f5aSRalph Campbell struct dmirror_device {
110b2ef9f5aSRalph Campbell struct cdev cdevice;
111188f4826SAlex Sierra unsigned int zone_device_type;
1126a760f58SMika Penttilä struct device device;
113b2ef9f5aSRalph Campbell
114b2ef9f5aSRalph Campbell unsigned int devmem_capacity;
115b2ef9f5aSRalph Campbell unsigned int devmem_count;
116b2ef9f5aSRalph Campbell struct dmirror_chunk **devmem_chunks;
117b2ef9f5aSRalph Campbell struct mutex devmem_lock; /* protects the above */
118b2ef9f5aSRalph Campbell
119b2ef9f5aSRalph Campbell unsigned long calloc;
120b2ef9f5aSRalph Campbell unsigned long cfree;
121b2ef9f5aSRalph Campbell struct page *free_pages;
122b2ef9f5aSRalph Campbell spinlock_t lock; /* protects the above */
123b2ef9f5aSRalph Campbell };
124b2ef9f5aSRalph Campbell
125b2ef9f5aSRalph Campbell static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES];
126b2ef9f5aSRalph Campbell
dmirror_bounce_init(struct dmirror_bounce * bounce,unsigned long addr,unsigned long size)127b2ef9f5aSRalph Campbell static int dmirror_bounce_init(struct dmirror_bounce *bounce,
128b2ef9f5aSRalph Campbell unsigned long addr,
129b2ef9f5aSRalph Campbell unsigned long size)
130b2ef9f5aSRalph Campbell {
131b2ef9f5aSRalph Campbell bounce->addr = addr;
132b2ef9f5aSRalph Campbell bounce->size = size;
133b2ef9f5aSRalph Campbell bounce->cpages = 0;
134b2ef9f5aSRalph Campbell bounce->ptr = vmalloc(size);
135b2ef9f5aSRalph Campbell if (!bounce->ptr)
136b2ef9f5aSRalph Campbell return -ENOMEM;
137b2ef9f5aSRalph Campbell return 0;
138b2ef9f5aSRalph Campbell }
139b2ef9f5aSRalph Campbell
dmirror_is_private_zone(struct dmirror_device * mdevice)1404c2e0f76SAlex Sierra static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
1414c2e0f76SAlex Sierra {
1424c2e0f76SAlex Sierra return (mdevice->zone_device_type ==
1434c2e0f76SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
1444c2e0f76SAlex Sierra }
1454c2e0f76SAlex Sierra
1464c2e0f76SAlex Sierra static enum migrate_vma_direction
dmirror_select_device(struct dmirror * dmirror)1474c2e0f76SAlex Sierra dmirror_select_device(struct dmirror *dmirror)
1484c2e0f76SAlex Sierra {
1494c2e0f76SAlex Sierra return (dmirror->mdevice->zone_device_type ==
1504c2e0f76SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
1514c2e0f76SAlex Sierra MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
1524c2e0f76SAlex Sierra MIGRATE_VMA_SELECT_DEVICE_COHERENT;
1534c2e0f76SAlex Sierra }
1544c2e0f76SAlex Sierra
dmirror_bounce_fini(struct dmirror_bounce * bounce)155b2ef9f5aSRalph Campbell static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
156b2ef9f5aSRalph Campbell {
157b2ef9f5aSRalph Campbell vfree(bounce->ptr);
158b2ef9f5aSRalph Campbell }
159b2ef9f5aSRalph Campbell
dmirror_fops_open(struct inode * inode,struct file * filp)160b2ef9f5aSRalph Campbell static int dmirror_fops_open(struct inode *inode, struct file *filp)
161b2ef9f5aSRalph Campbell {
162b2ef9f5aSRalph Campbell struct cdev *cdev = inode->i_cdev;
163b2ef9f5aSRalph Campbell struct dmirror *dmirror;
164b2ef9f5aSRalph Campbell int ret;
165b2ef9f5aSRalph Campbell
166b2ef9f5aSRalph Campbell /* Mirror this process address space */
167b2ef9f5aSRalph Campbell dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
168b2ef9f5aSRalph Campbell if (dmirror == NULL)
169b2ef9f5aSRalph Campbell return -ENOMEM;
170b2ef9f5aSRalph Campbell
171b2ef9f5aSRalph Campbell dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
172b2ef9f5aSRalph Campbell mutex_init(&dmirror->mutex);
173b2ef9f5aSRalph Campbell xa_init(&dmirror->pt);
174b2ef9f5aSRalph Campbell
175b2ef9f5aSRalph Campbell ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
176b2ef9f5aSRalph Campbell 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
177b2ef9f5aSRalph Campbell if (ret) {
178b2ef9f5aSRalph Campbell kfree(dmirror);
179b2ef9f5aSRalph Campbell return ret;
180b2ef9f5aSRalph Campbell }
181b2ef9f5aSRalph Campbell
182b2ef9f5aSRalph Campbell filp->private_data = dmirror;
183b2ef9f5aSRalph Campbell return 0;
184b2ef9f5aSRalph Campbell }
185b2ef9f5aSRalph Campbell
dmirror_fops_release(struct inode * inode,struct file * filp)186b2ef9f5aSRalph Campbell static int dmirror_fops_release(struct inode *inode, struct file *filp)
187b2ef9f5aSRalph Campbell {
188b2ef9f5aSRalph Campbell struct dmirror *dmirror = filp->private_data;
189b2ef9f5aSRalph Campbell
190b2ef9f5aSRalph Campbell mmu_interval_notifier_remove(&dmirror->notifier);
191b2ef9f5aSRalph Campbell xa_destroy(&dmirror->pt);
192b2ef9f5aSRalph Campbell kfree(dmirror);
193b2ef9f5aSRalph Campbell return 0;
194b2ef9f5aSRalph Campbell }
195b2ef9f5aSRalph Campbell
dmirror_page_to_chunk(struct page * page)196ad4c3652SAlistair Popple static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
197ad4c3652SAlistair Popple {
198ad4c3652SAlistair Popple return container_of(page->pgmap, struct dmirror_chunk, pagemap);
199ad4c3652SAlistair Popple }
200ad4c3652SAlistair Popple
dmirror_page_to_device(struct page * page)201b2ef9f5aSRalph Campbell static struct dmirror_device *dmirror_page_to_device(struct page *page)
202b2ef9f5aSRalph Campbell
203b2ef9f5aSRalph Campbell {
204ad4c3652SAlistair Popple return dmirror_page_to_chunk(page)->mdevice;
205b2ef9f5aSRalph Campbell }
206b2ef9f5aSRalph Campbell
dmirror_do_fault(struct dmirror * dmirror,struct hmm_range * range)207b2ef9f5aSRalph Campbell static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
208b2ef9f5aSRalph Campbell {
209b2ef9f5aSRalph Campbell unsigned long *pfns = range->hmm_pfns;
210b2ef9f5aSRalph Campbell unsigned long pfn;
211b2ef9f5aSRalph Campbell
212b2ef9f5aSRalph Campbell for (pfn = (range->start >> PAGE_SHIFT);
213b2ef9f5aSRalph Campbell pfn < (range->end >> PAGE_SHIFT);
214b2ef9f5aSRalph Campbell pfn++, pfns++) {
215b2ef9f5aSRalph Campbell struct page *page;
216b2ef9f5aSRalph Campbell void *entry;
217b2ef9f5aSRalph Campbell
218b2ef9f5aSRalph Campbell /*
219b2ef9f5aSRalph Campbell * Since we asked for hmm_range_fault() to populate pages,
220b2ef9f5aSRalph Campbell * it shouldn't return an error entry on success.
221b2ef9f5aSRalph Campbell */
222b2ef9f5aSRalph Campbell WARN_ON(*pfns & HMM_PFN_ERROR);
223b2ef9f5aSRalph Campbell WARN_ON(!(*pfns & HMM_PFN_VALID));
224b2ef9f5aSRalph Campbell
225b2ef9f5aSRalph Campbell page = hmm_pfn_to_page(*pfns);
226b2ef9f5aSRalph Campbell WARN_ON(!page);
227b2ef9f5aSRalph Campbell
228b2ef9f5aSRalph Campbell entry = page;
229b2ef9f5aSRalph Campbell if (*pfns & HMM_PFN_WRITE)
230b2ef9f5aSRalph Campbell entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
231b2ef9f5aSRalph Campbell else if (WARN_ON(range->default_flags & HMM_PFN_WRITE))
232b2ef9f5aSRalph Campbell return -EFAULT;
233b2ef9f5aSRalph Campbell entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
234b2ef9f5aSRalph Campbell if (xa_is_err(entry))
235b2ef9f5aSRalph Campbell return xa_err(entry);
236b2ef9f5aSRalph Campbell }
237b2ef9f5aSRalph Campbell
238b2ef9f5aSRalph Campbell return 0;
239b2ef9f5aSRalph Campbell }
240b2ef9f5aSRalph Campbell
dmirror_do_update(struct dmirror * dmirror,unsigned long start,unsigned long end)241b2ef9f5aSRalph Campbell static void dmirror_do_update(struct dmirror *dmirror, unsigned long start,
242b2ef9f5aSRalph Campbell unsigned long end)
243b2ef9f5aSRalph Campbell {
244b2ef9f5aSRalph Campbell unsigned long pfn;
245b2ef9f5aSRalph Campbell void *entry;
246b2ef9f5aSRalph Campbell
247b2ef9f5aSRalph Campbell /*
248b2ef9f5aSRalph Campbell * The XArray doesn't hold references to pages since it relies on
249b2ef9f5aSRalph Campbell * the mmu notifier to clear page pointers when they become stale.
250b2ef9f5aSRalph Campbell * Therefore, it is OK to just clear the entry.
251b2ef9f5aSRalph Campbell */
252b2ef9f5aSRalph Campbell xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
253b2ef9f5aSRalph Campbell end >> PAGE_SHIFT)
254b2ef9f5aSRalph Campbell xa_erase(&dmirror->pt, pfn);
255b2ef9f5aSRalph Campbell }
256b2ef9f5aSRalph Campbell
dmirror_interval_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)257b2ef9f5aSRalph Campbell static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
258b2ef9f5aSRalph Campbell const struct mmu_notifier_range *range,
259b2ef9f5aSRalph Campbell unsigned long cur_seq)
260b2ef9f5aSRalph Campbell {
261b2ef9f5aSRalph Campbell struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
262b2ef9f5aSRalph Campbell
2637d17e83aSRalph Campbell /*
2647d17e83aSRalph Campbell * Ignore invalidation callbacks for device private pages since
2657d17e83aSRalph Campbell * the invalidation is handled as part of the migration process.
2667d17e83aSRalph Campbell */
2677d17e83aSRalph Campbell if (range->event == MMU_NOTIFY_MIGRATE &&
2686b49bf6dSAlistair Popple range->owner == dmirror->mdevice)
2697d17e83aSRalph Campbell return true;
2707d17e83aSRalph Campbell
271b2ef9f5aSRalph Campbell if (mmu_notifier_range_blockable(range))
272b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
273b2ef9f5aSRalph Campbell else if (!mutex_trylock(&dmirror->mutex))
274b2ef9f5aSRalph Campbell return false;
275b2ef9f5aSRalph Campbell
276b2ef9f5aSRalph Campbell mmu_interval_set_seq(mni, cur_seq);
277b2ef9f5aSRalph Campbell dmirror_do_update(dmirror, range->start, range->end);
278b2ef9f5aSRalph Campbell
279b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
280b2ef9f5aSRalph Campbell return true;
281b2ef9f5aSRalph Campbell }
282b2ef9f5aSRalph Campbell
283b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops = {
284b2ef9f5aSRalph Campbell .invalidate = dmirror_interval_invalidate,
285b2ef9f5aSRalph Campbell };
286b2ef9f5aSRalph Campbell
dmirror_range_fault(struct dmirror * dmirror,struct hmm_range * range)287b2ef9f5aSRalph Campbell static int dmirror_range_fault(struct dmirror *dmirror,
288b2ef9f5aSRalph Campbell struct hmm_range *range)
289b2ef9f5aSRalph Campbell {
290b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm;
291b2ef9f5aSRalph Campbell unsigned long timeout =
292b2ef9f5aSRalph Campbell jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
293b2ef9f5aSRalph Campbell int ret;
294b2ef9f5aSRalph Campbell
295b2ef9f5aSRalph Campbell while (true) {
296b2ef9f5aSRalph Campbell if (time_after(jiffies, timeout)) {
297b2ef9f5aSRalph Campbell ret = -EBUSY;
298b2ef9f5aSRalph Campbell goto out;
299b2ef9f5aSRalph Campbell }
300b2ef9f5aSRalph Campbell
301b2ef9f5aSRalph Campbell range->notifier_seq = mmu_interval_read_begin(range->notifier);
30289154dd5SMichel Lespinasse mmap_read_lock(mm);
303b2ef9f5aSRalph Campbell ret = hmm_range_fault(range);
30489154dd5SMichel Lespinasse mmap_read_unlock(mm);
305b2ef9f5aSRalph Campbell if (ret) {
306b2ef9f5aSRalph Campbell if (ret == -EBUSY)
307b2ef9f5aSRalph Campbell continue;
308b2ef9f5aSRalph Campbell goto out;
309b2ef9f5aSRalph Campbell }
310b2ef9f5aSRalph Campbell
311b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
312b2ef9f5aSRalph Campbell if (mmu_interval_read_retry(range->notifier,
313b2ef9f5aSRalph Campbell range->notifier_seq)) {
314b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
315b2ef9f5aSRalph Campbell continue;
316b2ef9f5aSRalph Campbell }
317b2ef9f5aSRalph Campbell break;
318b2ef9f5aSRalph Campbell }
319b2ef9f5aSRalph Campbell
320b2ef9f5aSRalph Campbell ret = dmirror_do_fault(dmirror, range);
321b2ef9f5aSRalph Campbell
322b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
323b2ef9f5aSRalph Campbell out:
324b2ef9f5aSRalph Campbell return ret;
325b2ef9f5aSRalph Campbell }
326b2ef9f5aSRalph Campbell
dmirror_fault(struct dmirror * dmirror,unsigned long start,unsigned long end,bool write)327b2ef9f5aSRalph Campbell static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
328b2ef9f5aSRalph Campbell unsigned long end, bool write)
329b2ef9f5aSRalph Campbell {
330b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm;
331b2ef9f5aSRalph Campbell unsigned long addr;
332b2ef9f5aSRalph Campbell unsigned long pfns[64];
333b2ef9f5aSRalph Campbell struct hmm_range range = {
334b2ef9f5aSRalph Campbell .notifier = &dmirror->notifier,
335b2ef9f5aSRalph Campbell .hmm_pfns = pfns,
336b2ef9f5aSRalph Campbell .pfn_flags_mask = 0,
337b2ef9f5aSRalph Campbell .default_flags =
338b2ef9f5aSRalph Campbell HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0),
339b2ef9f5aSRalph Campbell .dev_private_owner = dmirror->mdevice,
340b2ef9f5aSRalph Campbell };
341b2ef9f5aSRalph Campbell int ret = 0;
342b2ef9f5aSRalph Campbell
343b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */
344b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm))
345b2ef9f5aSRalph Campbell return 0;
346b2ef9f5aSRalph Campbell
347b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = range.end) {
348b2ef9f5aSRalph Campbell range.start = addr;
349b2ef9f5aSRalph Campbell range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
350b2ef9f5aSRalph Campbell
351b2ef9f5aSRalph Campbell ret = dmirror_range_fault(dmirror, &range);
352b2ef9f5aSRalph Campbell if (ret)
353b2ef9f5aSRalph Campbell break;
354b2ef9f5aSRalph Campbell }
355b2ef9f5aSRalph Campbell
356b2ef9f5aSRalph Campbell mmput(mm);
357b2ef9f5aSRalph Campbell return ret;
358b2ef9f5aSRalph Campbell }
359b2ef9f5aSRalph Campbell
dmirror_do_read(struct dmirror * dmirror,unsigned long start,unsigned long end,struct dmirror_bounce * bounce)360b2ef9f5aSRalph Campbell static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
361b2ef9f5aSRalph Campbell unsigned long end, struct dmirror_bounce *bounce)
362b2ef9f5aSRalph Campbell {
363b2ef9f5aSRalph Campbell unsigned long pfn;
364b2ef9f5aSRalph Campbell void *ptr;
365b2ef9f5aSRalph Campbell
366b2ef9f5aSRalph Campbell ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
367b2ef9f5aSRalph Campbell
368b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
369b2ef9f5aSRalph Campbell void *entry;
370b2ef9f5aSRalph Campbell struct page *page;
371b2ef9f5aSRalph Campbell
372b2ef9f5aSRalph Campbell entry = xa_load(&dmirror->pt, pfn);
373b2ef9f5aSRalph Campbell page = xa_untag_pointer(entry);
374b2ef9f5aSRalph Campbell if (!page)
375b2ef9f5aSRalph Campbell return -ENOENT;
376b2ef9f5aSRalph Campbell
377ae96e0cdSSumitra Sharma memcpy_from_page(ptr, page, 0, PAGE_SIZE);
378b2ef9f5aSRalph Campbell
379b2ef9f5aSRalph Campbell ptr += PAGE_SIZE;
380b2ef9f5aSRalph Campbell bounce->cpages++;
381b2ef9f5aSRalph Campbell }
382b2ef9f5aSRalph Campbell
383b2ef9f5aSRalph Campbell return 0;
384b2ef9f5aSRalph Campbell }
385b2ef9f5aSRalph Campbell
dmirror_read(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)386b2ef9f5aSRalph Campbell static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
387b2ef9f5aSRalph Campbell {
388b2ef9f5aSRalph Campbell struct dmirror_bounce bounce;
389b2ef9f5aSRalph Campbell unsigned long start, end;
390b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT;
391b2ef9f5aSRalph Campbell int ret;
392b2ef9f5aSRalph Campbell
393b2ef9f5aSRalph Campbell start = cmd->addr;
394b2ef9f5aSRalph Campbell end = start + size;
395b2ef9f5aSRalph Campbell if (end < start)
396b2ef9f5aSRalph Campbell return -EINVAL;
397b2ef9f5aSRalph Campbell
398b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size);
399b2ef9f5aSRalph Campbell if (ret)
400b2ef9f5aSRalph Campbell return ret;
401b2ef9f5aSRalph Campbell
402b2ef9f5aSRalph Campbell while (1) {
403b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
404b2ef9f5aSRalph Campbell ret = dmirror_do_read(dmirror, start, end, &bounce);
405b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
406b2ef9f5aSRalph Campbell if (ret != -ENOENT)
407b2ef9f5aSRalph Campbell break;
408b2ef9f5aSRalph Campbell
409b2ef9f5aSRalph Campbell start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
410b2ef9f5aSRalph Campbell ret = dmirror_fault(dmirror, start, end, false);
411b2ef9f5aSRalph Campbell if (ret)
412b2ef9f5aSRalph Campbell break;
413b2ef9f5aSRalph Campbell cmd->faults++;
414b2ef9f5aSRalph Campbell }
415b2ef9f5aSRalph Campbell
416b2ef9f5aSRalph Campbell if (ret == 0) {
417b2ef9f5aSRalph Campbell if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
418b2ef9f5aSRalph Campbell bounce.size))
419b2ef9f5aSRalph Campbell ret = -EFAULT;
420b2ef9f5aSRalph Campbell }
421b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages;
422b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce);
423b2ef9f5aSRalph Campbell return ret;
424b2ef9f5aSRalph Campbell }
425b2ef9f5aSRalph Campbell
dmirror_do_write(struct dmirror * dmirror,unsigned long start,unsigned long end,struct dmirror_bounce * bounce)426b2ef9f5aSRalph Campbell static int dmirror_do_write(struct dmirror *dmirror, unsigned long start,
427b2ef9f5aSRalph Campbell unsigned long end, struct dmirror_bounce *bounce)
428b2ef9f5aSRalph Campbell {
429b2ef9f5aSRalph Campbell unsigned long pfn;
430b2ef9f5aSRalph Campbell void *ptr;
431b2ef9f5aSRalph Campbell
432b2ef9f5aSRalph Campbell ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
433b2ef9f5aSRalph Campbell
434b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
435b2ef9f5aSRalph Campbell void *entry;
436b2ef9f5aSRalph Campbell struct page *page;
437b2ef9f5aSRalph Campbell
438b2ef9f5aSRalph Campbell entry = xa_load(&dmirror->pt, pfn);
439b2ef9f5aSRalph Campbell page = xa_untag_pointer(entry);
440b2ef9f5aSRalph Campbell if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
441b2ef9f5aSRalph Campbell return -ENOENT;
442b2ef9f5aSRalph Campbell
443ae96e0cdSSumitra Sharma memcpy_to_page(page, 0, ptr, PAGE_SIZE);
444b2ef9f5aSRalph Campbell
445b2ef9f5aSRalph Campbell ptr += PAGE_SIZE;
446b2ef9f5aSRalph Campbell bounce->cpages++;
447b2ef9f5aSRalph Campbell }
448b2ef9f5aSRalph Campbell
449b2ef9f5aSRalph Campbell return 0;
450b2ef9f5aSRalph Campbell }
451b2ef9f5aSRalph Campbell
dmirror_write(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)452b2ef9f5aSRalph Campbell static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
453b2ef9f5aSRalph Campbell {
454b2ef9f5aSRalph Campbell struct dmirror_bounce bounce;
455b2ef9f5aSRalph Campbell unsigned long start, end;
456b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT;
457b2ef9f5aSRalph Campbell int ret;
458b2ef9f5aSRalph Campbell
459b2ef9f5aSRalph Campbell start = cmd->addr;
460b2ef9f5aSRalph Campbell end = start + size;
461b2ef9f5aSRalph Campbell if (end < start)
462b2ef9f5aSRalph Campbell return -EINVAL;
463b2ef9f5aSRalph Campbell
464b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size);
465b2ef9f5aSRalph Campbell if (ret)
466b2ef9f5aSRalph Campbell return ret;
467b2ef9f5aSRalph Campbell if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
468b2ef9f5aSRalph Campbell bounce.size)) {
469b2ef9f5aSRalph Campbell ret = -EFAULT;
470b2ef9f5aSRalph Campbell goto fini;
471b2ef9f5aSRalph Campbell }
472b2ef9f5aSRalph Campbell
473b2ef9f5aSRalph Campbell while (1) {
474b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
475b2ef9f5aSRalph Campbell ret = dmirror_do_write(dmirror, start, end, &bounce);
476b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
477b2ef9f5aSRalph Campbell if (ret != -ENOENT)
478b2ef9f5aSRalph Campbell break;
479b2ef9f5aSRalph Campbell
480b2ef9f5aSRalph Campbell start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
481b2ef9f5aSRalph Campbell ret = dmirror_fault(dmirror, start, end, true);
482b2ef9f5aSRalph Campbell if (ret)
483b2ef9f5aSRalph Campbell break;
484b2ef9f5aSRalph Campbell cmd->faults++;
485b2ef9f5aSRalph Campbell }
486b2ef9f5aSRalph Campbell
487b2ef9f5aSRalph Campbell fini:
488b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages;
489b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce);
490b2ef9f5aSRalph Campbell return ret;
491b2ef9f5aSRalph Campbell }
492b2ef9f5aSRalph Campbell
dmirror_allocate_chunk(struct dmirror_device * mdevice,struct page ** ppage)49325b80162SAlex Sierra static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
494b2ef9f5aSRalph Campbell struct page **ppage)
495b2ef9f5aSRalph Campbell {
496b2ef9f5aSRalph Campbell struct dmirror_chunk *devmem;
49725b80162SAlex Sierra struct resource *res = NULL;
498b2ef9f5aSRalph Campbell unsigned long pfn;
499b2ef9f5aSRalph Campbell unsigned long pfn_first;
500b2ef9f5aSRalph Campbell unsigned long pfn_last;
501b2ef9f5aSRalph Campbell void *ptr;
50225b80162SAlex Sierra int ret = -ENOMEM;
503b2ef9f5aSRalph Campbell
504a4574f63SDan Williams devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
505a4574f63SDan Williams if (!devmem)
50625b80162SAlex Sierra return ret;
507a4574f63SDan Williams
50825b80162SAlex Sierra switch (mdevice->zone_device_type) {
50925b80162SAlex Sierra case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
510a4574f63SDan Williams res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
511a4574f63SDan Williams "hmm_dmirror");
51225b80162SAlex Sierra if (IS_ERR_OR_NULL(res))
513a4574f63SDan Williams goto err_devmem;
514a4574f63SDan Williams devmem->pagemap.range.start = res->start;
515a4574f63SDan Williams devmem->pagemap.range.end = res->end;
51625b80162SAlex Sierra devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
51725b80162SAlex Sierra break;
51825b80162SAlex Sierra case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
51925b80162SAlex Sierra devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ?
52025b80162SAlex Sierra spm_addr_dev0 :
52125b80162SAlex Sierra spm_addr_dev1;
52225b80162SAlex Sierra devmem->pagemap.range.end = devmem->pagemap.range.start +
52325b80162SAlex Sierra DEVMEM_CHUNK_SIZE - 1;
52425b80162SAlex Sierra devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
52525b80162SAlex Sierra break;
52625b80162SAlex Sierra default:
52725b80162SAlex Sierra ret = -EINVAL;
52825b80162SAlex Sierra goto err_devmem;
52925b80162SAlex Sierra }
53025b80162SAlex Sierra
531b7b3c01bSDan Williams devmem->pagemap.nr_range = 1;
532a4574f63SDan Williams devmem->pagemap.ops = &dmirror_devmem_ops;
533a4574f63SDan Williams devmem->pagemap.owner = mdevice;
534a4574f63SDan Williams
535b2ef9f5aSRalph Campbell mutex_lock(&mdevice->devmem_lock);
536b2ef9f5aSRalph Campbell
537b2ef9f5aSRalph Campbell if (mdevice->devmem_count == mdevice->devmem_capacity) {
538b2ef9f5aSRalph Campbell struct dmirror_chunk **new_chunks;
539b2ef9f5aSRalph Campbell unsigned int new_capacity;
540b2ef9f5aSRalph Campbell
541b2ef9f5aSRalph Campbell new_capacity = mdevice->devmem_capacity +
542b2ef9f5aSRalph Campbell DEVMEM_CHUNKS_RESERVE;
543b2ef9f5aSRalph Campbell new_chunks = krealloc(mdevice->devmem_chunks,
544b2ef9f5aSRalph Campbell sizeof(new_chunks[0]) * new_capacity,
545b2ef9f5aSRalph Campbell GFP_KERNEL);
546b2ef9f5aSRalph Campbell if (!new_chunks)
547a4574f63SDan Williams goto err_release;
548b2ef9f5aSRalph Campbell mdevice->devmem_capacity = new_capacity;
549b2ef9f5aSRalph Campbell mdevice->devmem_chunks = new_chunks;
550b2ef9f5aSRalph Campbell }
551b2ef9f5aSRalph Campbell ptr = memremap_pages(&devmem->pagemap, numa_node_id());
55225b80162SAlex Sierra if (IS_ERR_OR_NULL(ptr)) {
55325b80162SAlex Sierra if (ptr)
55425b80162SAlex Sierra ret = PTR_ERR(ptr);
55525b80162SAlex Sierra else
55625b80162SAlex Sierra ret = -EFAULT;
557a4574f63SDan Williams goto err_release;
55825b80162SAlex Sierra }
559b2ef9f5aSRalph Campbell
560b2ef9f5aSRalph Campbell devmem->mdevice = mdevice;
561a4574f63SDan Williams pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
562a4574f63SDan Williams pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
563b2ef9f5aSRalph Campbell mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
564b2ef9f5aSRalph Campbell
565b2ef9f5aSRalph Campbell mutex_unlock(&mdevice->devmem_lock);
566b2ef9f5aSRalph Campbell
567b2ef9f5aSRalph Campbell pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
568b2ef9f5aSRalph Campbell DEVMEM_CHUNK_SIZE / (1024 * 1024),
569b2ef9f5aSRalph Campbell mdevice->devmem_count,
570b2ef9f5aSRalph Campbell mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)),
571b2ef9f5aSRalph Campbell pfn_first, pfn_last);
572b2ef9f5aSRalph Campbell
573b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock);
574b2ef9f5aSRalph Campbell for (pfn = pfn_first; pfn < pfn_last; pfn++) {
575b2ef9f5aSRalph Campbell struct page *page = pfn_to_page(pfn);
576b2ef9f5aSRalph Campbell
577b2ef9f5aSRalph Campbell page->zone_device_data = mdevice->free_pages;
578b2ef9f5aSRalph Campbell mdevice->free_pages = page;
579b2ef9f5aSRalph Campbell }
580b2ef9f5aSRalph Campbell if (ppage) {
581b2ef9f5aSRalph Campbell *ppage = mdevice->free_pages;
582b2ef9f5aSRalph Campbell mdevice->free_pages = (*ppage)->zone_device_data;
583b2ef9f5aSRalph Campbell mdevice->calloc++;
584b2ef9f5aSRalph Campbell }
585b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock);
586b2ef9f5aSRalph Campbell
58725b80162SAlex Sierra return 0;
588b2ef9f5aSRalph Campbell
589b2ef9f5aSRalph Campbell err_release:
590b2ef9f5aSRalph Campbell mutex_unlock(&mdevice->devmem_lock);
59125b80162SAlex Sierra if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
59225b80162SAlex Sierra release_mem_region(devmem->pagemap.range.start,
59325b80162SAlex Sierra range_len(&devmem->pagemap.range));
594a4574f63SDan Williams err_devmem:
595a4574f63SDan Williams kfree(devmem);
596a4574f63SDan Williams
59725b80162SAlex Sierra return ret;
598b2ef9f5aSRalph Campbell }
599b2ef9f5aSRalph Campbell
dmirror_devmem_alloc_page(struct dmirror_device * mdevice)600b2ef9f5aSRalph Campbell static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
601b2ef9f5aSRalph Campbell {
602b2ef9f5aSRalph Campbell struct page *dpage = NULL;
6034c2e0f76SAlex Sierra struct page *rpage = NULL;
604b2ef9f5aSRalph Campbell
605b2ef9f5aSRalph Campbell /*
6064c2e0f76SAlex Sierra * For ZONE_DEVICE private type, this is a fake device so we allocate
6074c2e0f76SAlex Sierra * real system memory to store our device memory.
6084c2e0f76SAlex Sierra * For ZONE_DEVICE coherent type we use the actual dpage to store the
6094c2e0f76SAlex Sierra * data and ignore rpage.
610b2ef9f5aSRalph Campbell */
6114c2e0f76SAlex Sierra if (dmirror_is_private_zone(mdevice)) {
612b2ef9f5aSRalph Campbell rpage = alloc_page(GFP_HIGHUSER);
613b2ef9f5aSRalph Campbell if (!rpage)
614b2ef9f5aSRalph Campbell return NULL;
6154c2e0f76SAlex Sierra }
616b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock);
617b2ef9f5aSRalph Campbell
618b2ef9f5aSRalph Campbell if (mdevice->free_pages) {
619b2ef9f5aSRalph Campbell dpage = mdevice->free_pages;
620b2ef9f5aSRalph Campbell mdevice->free_pages = dpage->zone_device_data;
621b2ef9f5aSRalph Campbell mdevice->calloc++;
622b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock);
623b2ef9f5aSRalph Campbell } else {
624b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock);
62525b80162SAlex Sierra if (dmirror_allocate_chunk(mdevice, &dpage))
626b2ef9f5aSRalph Campbell goto error;
627b2ef9f5aSRalph Campbell }
628b2ef9f5aSRalph Campbell
629ef233450SAlistair Popple zone_device_page_init(dpage);
630b2ef9f5aSRalph Campbell dpage->zone_device_data = rpage;
631b2ef9f5aSRalph Campbell return dpage;
632b2ef9f5aSRalph Campbell
633b2ef9f5aSRalph Campbell error:
6344c2e0f76SAlex Sierra if (rpage)
635b2ef9f5aSRalph Campbell __free_page(rpage);
636b2ef9f5aSRalph Campbell return NULL;
637b2ef9f5aSRalph Campbell }
638b2ef9f5aSRalph Campbell
dmirror_migrate_alloc_and_copy(struct migrate_vma * args,struct dmirror * dmirror)639b2ef9f5aSRalph Campbell static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
640b2ef9f5aSRalph Campbell struct dmirror *dmirror)
641b2ef9f5aSRalph Campbell {
642b2ef9f5aSRalph Campbell struct dmirror_device *mdevice = dmirror->mdevice;
643b2ef9f5aSRalph Campbell const unsigned long *src = args->src;
644b2ef9f5aSRalph Campbell unsigned long *dst = args->dst;
645b2ef9f5aSRalph Campbell unsigned long addr;
646b2ef9f5aSRalph Campbell
647b2ef9f5aSRalph Campbell for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
648b2ef9f5aSRalph Campbell src++, dst++) {
649b2ef9f5aSRalph Campbell struct page *spage;
650b2ef9f5aSRalph Campbell struct page *dpage;
651b2ef9f5aSRalph Campbell struct page *rpage;
652b2ef9f5aSRalph Campbell
653b2ef9f5aSRalph Campbell if (!(*src & MIGRATE_PFN_MIGRATE))
654b2ef9f5aSRalph Campbell continue;
655b2ef9f5aSRalph Campbell
656b2ef9f5aSRalph Campbell /*
657b2ef9f5aSRalph Campbell * Note that spage might be NULL which is OK since it is an
658b2ef9f5aSRalph Campbell * unallocated pte_none() or read-only zero page.
659b2ef9f5aSRalph Campbell */
660b2ef9f5aSRalph Campbell spage = migrate_pfn_to_page(*src);
6614c2e0f76SAlex Sierra if (WARN(spage && is_zone_device_page(spage),
6624c2e0f76SAlex Sierra "page already in device spage pfn: 0x%lx\n",
6634c2e0f76SAlex Sierra page_to_pfn(spage)))
6644c2e0f76SAlex Sierra continue;
665b2ef9f5aSRalph Campbell
666b2ef9f5aSRalph Campbell dpage = dmirror_devmem_alloc_page(mdevice);
667b2ef9f5aSRalph Campbell if (!dpage)
668b2ef9f5aSRalph Campbell continue;
669b2ef9f5aSRalph Campbell
6704c2e0f76SAlex Sierra rpage = BACKING_PAGE(dpage);
671b2ef9f5aSRalph Campbell if (spage)
672b2ef9f5aSRalph Campbell copy_highpage(rpage, spage);
673b2ef9f5aSRalph Campbell else
674b2ef9f5aSRalph Campbell clear_highpage(rpage);
675b2ef9f5aSRalph Campbell
676b2ef9f5aSRalph Campbell /*
677b2ef9f5aSRalph Campbell * Normally, a device would use the page->zone_device_data to
678b2ef9f5aSRalph Campbell * point to the mirror but here we use it to hold the page for
679b2ef9f5aSRalph Campbell * the simulated device memory and that page holds the pointer
680b2ef9f5aSRalph Campbell * to the mirror.
681b2ef9f5aSRalph Campbell */
682b2ef9f5aSRalph Campbell rpage->zone_device_data = dmirror;
683b2ef9f5aSRalph Campbell
6844c2e0f76SAlex Sierra pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
6854c2e0f76SAlex Sierra page_to_pfn(spage), page_to_pfn(dpage));
686ab09243aSAlistair Popple *dst = migrate_pfn(page_to_pfn(dpage));
687b2ef9f5aSRalph Campbell if ((*src & MIGRATE_PFN_WRITE) ||
688b2ef9f5aSRalph Campbell (!spage && args->vma->vm_flags & VM_WRITE))
689b2ef9f5aSRalph Campbell *dst |= MIGRATE_PFN_WRITE;
690b2ef9f5aSRalph Campbell }
691b2ef9f5aSRalph Campbell }
692b2ef9f5aSRalph Campbell
dmirror_check_atomic(struct dmirror * dmirror,unsigned long start,unsigned long end)693b659baeaSAlistair Popple static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
694b659baeaSAlistair Popple unsigned long end)
695b659baeaSAlistair Popple {
696b659baeaSAlistair Popple unsigned long pfn;
697b659baeaSAlistair Popple
698b659baeaSAlistair Popple for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
699b659baeaSAlistair Popple void *entry;
700b659baeaSAlistair Popple
701b659baeaSAlistair Popple entry = xa_load(&dmirror->pt, pfn);
702b659baeaSAlistair Popple if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC)
703b659baeaSAlistair Popple return -EPERM;
704b659baeaSAlistair Popple }
705b659baeaSAlistair Popple
706b659baeaSAlistair Popple return 0;
707b659baeaSAlistair Popple }
708b659baeaSAlistair Popple
dmirror_atomic_map(unsigned long start,unsigned long end,struct page ** pages,struct dmirror * dmirror)709b659baeaSAlistair Popple static int dmirror_atomic_map(unsigned long start, unsigned long end,
710b659baeaSAlistair Popple struct page **pages, struct dmirror *dmirror)
711b659baeaSAlistair Popple {
712b659baeaSAlistair Popple unsigned long pfn, mapped = 0;
713b659baeaSAlistair Popple int i;
714b659baeaSAlistair Popple
715b659baeaSAlistair Popple /* Map the migrated pages into the device's page tables. */
716b659baeaSAlistair Popple mutex_lock(&dmirror->mutex);
717b659baeaSAlistair Popple
718b659baeaSAlistair Popple for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
719b659baeaSAlistair Popple void *entry;
720b659baeaSAlistair Popple
721b659baeaSAlistair Popple if (!pages[i])
722b659baeaSAlistair Popple continue;
723b659baeaSAlistair Popple
724b659baeaSAlistair Popple entry = pages[i];
725b659baeaSAlistair Popple entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
726b659baeaSAlistair Popple entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
727b659baeaSAlistair Popple if (xa_is_err(entry)) {
728b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex);
729b659baeaSAlistair Popple return xa_err(entry);
730b659baeaSAlistair Popple }
731b659baeaSAlistair Popple
732b659baeaSAlistair Popple mapped++;
733b659baeaSAlistair Popple }
734b659baeaSAlistair Popple
735b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex);
736b659baeaSAlistair Popple return mapped;
737b659baeaSAlistair Popple }
738b659baeaSAlistair Popple
dmirror_migrate_finalize_and_map(struct migrate_vma * args,struct dmirror * dmirror)739b2ef9f5aSRalph Campbell static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
740b2ef9f5aSRalph Campbell struct dmirror *dmirror)
741b2ef9f5aSRalph Campbell {
742b2ef9f5aSRalph Campbell unsigned long start = args->start;
743b2ef9f5aSRalph Campbell unsigned long end = args->end;
744b2ef9f5aSRalph Campbell const unsigned long *src = args->src;
745b2ef9f5aSRalph Campbell const unsigned long *dst = args->dst;
746b2ef9f5aSRalph Campbell unsigned long pfn;
747b2ef9f5aSRalph Campbell
748b2ef9f5aSRalph Campbell /* Map the migrated pages into the device's page tables. */
749b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
750b2ef9f5aSRalph Campbell
751b2ef9f5aSRalph Campbell for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
752b2ef9f5aSRalph Campbell src++, dst++) {
753b2ef9f5aSRalph Campbell struct page *dpage;
754b2ef9f5aSRalph Campbell void *entry;
755b2ef9f5aSRalph Campbell
756b2ef9f5aSRalph Campbell if (!(*src & MIGRATE_PFN_MIGRATE))
757b2ef9f5aSRalph Campbell continue;
758b2ef9f5aSRalph Campbell
759b2ef9f5aSRalph Campbell dpage = migrate_pfn_to_page(*dst);
760b2ef9f5aSRalph Campbell if (!dpage)
761b2ef9f5aSRalph Campbell continue;
762b2ef9f5aSRalph Campbell
7634c2e0f76SAlex Sierra entry = BACKING_PAGE(dpage);
764b2ef9f5aSRalph Campbell if (*dst & MIGRATE_PFN_WRITE)
765b2ef9f5aSRalph Campbell entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
766b2ef9f5aSRalph Campbell entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
767b2ef9f5aSRalph Campbell if (xa_is_err(entry)) {
768b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
769b2ef9f5aSRalph Campbell return xa_err(entry);
770b2ef9f5aSRalph Campbell }
771b2ef9f5aSRalph Campbell }
772b2ef9f5aSRalph Campbell
773b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
774b2ef9f5aSRalph Campbell return 0;
775b2ef9f5aSRalph Campbell }
776b2ef9f5aSRalph Campbell
dmirror_exclusive(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)777b659baeaSAlistair Popple static int dmirror_exclusive(struct dmirror *dmirror,
778b659baeaSAlistair Popple struct hmm_dmirror_cmd *cmd)
779b659baeaSAlistair Popple {
780b659baeaSAlistair Popple unsigned long start, end, addr;
781b659baeaSAlistair Popple unsigned long size = cmd->npages << PAGE_SHIFT;
782b659baeaSAlistair Popple struct mm_struct *mm = dmirror->notifier.mm;
783b659baeaSAlistair Popple struct page *pages[64];
784b659baeaSAlistair Popple struct dmirror_bounce bounce;
785b659baeaSAlistair Popple unsigned long next;
786b659baeaSAlistair Popple int ret;
787b659baeaSAlistair Popple
788b659baeaSAlistair Popple start = cmd->addr;
789b659baeaSAlistair Popple end = start + size;
790b659baeaSAlistair Popple if (end < start)
791b659baeaSAlistair Popple return -EINVAL;
792b659baeaSAlistair Popple
793b659baeaSAlistair Popple /* Since the mm is for the mirrored process, get a reference first. */
794b659baeaSAlistair Popple if (!mmget_not_zero(mm))
795b659baeaSAlistair Popple return -EINVAL;
796b659baeaSAlistair Popple
797b659baeaSAlistair Popple mmap_read_lock(mm);
798b659baeaSAlistair Popple for (addr = start; addr < end; addr = next) {
799ed913b05SMiaohe Lin unsigned long mapped = 0;
800b659baeaSAlistair Popple int i;
801b659baeaSAlistair Popple
802b659baeaSAlistair Popple if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
803b659baeaSAlistair Popple next = end;
804b659baeaSAlistair Popple else
805b659baeaSAlistair Popple next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
806b659baeaSAlistair Popple
807b659baeaSAlistair Popple ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
808ed913b05SMiaohe Lin /*
809ed913b05SMiaohe Lin * Do dmirror_atomic_map() iff all pages are marked for
810ed913b05SMiaohe Lin * exclusive access to avoid accessing uninitialized
811ed913b05SMiaohe Lin * fields of pages.
812ed913b05SMiaohe Lin */
813ed913b05SMiaohe Lin if (ret == (next - addr) >> PAGE_SHIFT)
814b659baeaSAlistair Popple mapped = dmirror_atomic_map(addr, next, pages, dmirror);
815b659baeaSAlistair Popple for (i = 0; i < ret; i++) {
816b659baeaSAlistair Popple if (pages[i]) {
817b659baeaSAlistair Popple unlock_page(pages[i]);
818b659baeaSAlistair Popple put_page(pages[i]);
819b659baeaSAlistair Popple }
820b659baeaSAlistair Popple }
821b659baeaSAlistair Popple
822b659baeaSAlistair Popple if (addr + (mapped << PAGE_SHIFT) < next) {
823b659baeaSAlistair Popple mmap_read_unlock(mm);
824b659baeaSAlistair Popple mmput(mm);
825b659baeaSAlistair Popple return -EBUSY;
826b659baeaSAlistair Popple }
827b659baeaSAlistair Popple }
828b659baeaSAlistair Popple mmap_read_unlock(mm);
829b659baeaSAlistair Popple mmput(mm);
830b659baeaSAlistair Popple
831b659baeaSAlistair Popple /* Return the migrated data for verification. */
832b659baeaSAlistair Popple ret = dmirror_bounce_init(&bounce, start, size);
833b659baeaSAlistair Popple if (ret)
834b659baeaSAlistair Popple return ret;
835b659baeaSAlistair Popple mutex_lock(&dmirror->mutex);
836b659baeaSAlistair Popple ret = dmirror_do_read(dmirror, start, end, &bounce);
837b659baeaSAlistair Popple mutex_unlock(&dmirror->mutex);
838b659baeaSAlistair Popple if (ret == 0) {
839b659baeaSAlistair Popple if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
840b659baeaSAlistair Popple bounce.size))
841b659baeaSAlistair Popple ret = -EFAULT;
842b659baeaSAlistair Popple }
843b659baeaSAlistair Popple
844b659baeaSAlistair Popple cmd->cpages = bounce.cpages;
845b659baeaSAlistair Popple dmirror_bounce_fini(&bounce);
846b659baeaSAlistair Popple return ret;
847b659baeaSAlistair Popple }
848b659baeaSAlistair Popple
dmirror_devmem_fault_alloc_and_copy(struct migrate_vma * args,struct dmirror * dmirror)8494c2e0f76SAlex Sierra static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
8504c2e0f76SAlex Sierra struct dmirror *dmirror)
8514c2e0f76SAlex Sierra {
8524c2e0f76SAlex Sierra const unsigned long *src = args->src;
8534c2e0f76SAlex Sierra unsigned long *dst = args->dst;
8544c2e0f76SAlex Sierra unsigned long start = args->start;
8554c2e0f76SAlex Sierra unsigned long end = args->end;
8564c2e0f76SAlex Sierra unsigned long addr;
8574c2e0f76SAlex Sierra
8584c2e0f76SAlex Sierra for (addr = start; addr < end; addr += PAGE_SIZE,
8594c2e0f76SAlex Sierra src++, dst++) {
8604c2e0f76SAlex Sierra struct page *dpage, *spage;
8614c2e0f76SAlex Sierra
8624c2e0f76SAlex Sierra spage = migrate_pfn_to_page(*src);
8634c2e0f76SAlex Sierra if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
8644c2e0f76SAlex Sierra continue;
8654c2e0f76SAlex Sierra
8664c2e0f76SAlex Sierra if (WARN_ON(!is_device_private_page(spage) &&
8674c2e0f76SAlex Sierra !is_device_coherent_page(spage)))
8684c2e0f76SAlex Sierra continue;
8694c2e0f76SAlex Sierra spage = BACKING_PAGE(spage);
8704c2e0f76SAlex Sierra dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
8714c2e0f76SAlex Sierra if (!dpage)
8724c2e0f76SAlex Sierra continue;
8734c2e0f76SAlex Sierra pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
8744c2e0f76SAlex Sierra page_to_pfn(spage), page_to_pfn(dpage));
8754c2e0f76SAlex Sierra
8764c2e0f76SAlex Sierra lock_page(dpage);
8774c2e0f76SAlex Sierra xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
8784c2e0f76SAlex Sierra copy_highpage(dpage, spage);
8794c2e0f76SAlex Sierra *dst = migrate_pfn(page_to_pfn(dpage));
8804c2e0f76SAlex Sierra if (*src & MIGRATE_PFN_WRITE)
8814c2e0f76SAlex Sierra *dst |= MIGRATE_PFN_WRITE;
8824c2e0f76SAlex Sierra }
8834c2e0f76SAlex Sierra return 0;
8844c2e0f76SAlex Sierra }
8854c2e0f76SAlex Sierra
8864c2e0f76SAlex Sierra static unsigned long
dmirror_successful_migrated_pages(struct migrate_vma * migrate)8874c2e0f76SAlex Sierra dmirror_successful_migrated_pages(struct migrate_vma *migrate)
8884c2e0f76SAlex Sierra {
8894c2e0f76SAlex Sierra unsigned long cpages = 0;
8904c2e0f76SAlex Sierra unsigned long i;
8914c2e0f76SAlex Sierra
8924c2e0f76SAlex Sierra for (i = 0; i < migrate->npages; i++) {
8934c2e0f76SAlex Sierra if (migrate->src[i] & MIGRATE_PFN_VALID &&
8944c2e0f76SAlex Sierra migrate->src[i] & MIGRATE_PFN_MIGRATE)
8954c2e0f76SAlex Sierra cpages++;
8964c2e0f76SAlex Sierra }
8974c2e0f76SAlex Sierra return cpages;
8984c2e0f76SAlex Sierra }
8994c2e0f76SAlex Sierra
dmirror_migrate_to_system(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)9004c2e0f76SAlex Sierra static int dmirror_migrate_to_system(struct dmirror *dmirror,
901b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd *cmd)
902b2ef9f5aSRalph Campbell {
903b2ef9f5aSRalph Campbell unsigned long start, end, addr;
904b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT;
905b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm;
906b2ef9f5aSRalph Campbell struct vm_area_struct *vma;
9074c2e0f76SAlex Sierra unsigned long src_pfns[64] = { 0 };
9084c2e0f76SAlex Sierra unsigned long dst_pfns[64] = { 0 };
90916ce101dSAlistair Popple struct migrate_vma args = { 0 };
9104c2e0f76SAlex Sierra unsigned long next;
9114c2e0f76SAlex Sierra int ret;
9124c2e0f76SAlex Sierra
9134c2e0f76SAlex Sierra start = cmd->addr;
9144c2e0f76SAlex Sierra end = start + size;
9154c2e0f76SAlex Sierra if (end < start)
9164c2e0f76SAlex Sierra return -EINVAL;
9174c2e0f76SAlex Sierra
9184c2e0f76SAlex Sierra /* Since the mm is for the mirrored process, get a reference first. */
9194c2e0f76SAlex Sierra if (!mmget_not_zero(mm))
9204c2e0f76SAlex Sierra return -EINVAL;
9214c2e0f76SAlex Sierra
9224c2e0f76SAlex Sierra cmd->cpages = 0;
9234c2e0f76SAlex Sierra mmap_read_lock(mm);
9244c2e0f76SAlex Sierra for (addr = start; addr < end; addr = next) {
9254c2e0f76SAlex Sierra vma = vma_lookup(mm, addr);
9264c2e0f76SAlex Sierra if (!vma || !(vma->vm_flags & VM_READ)) {
9274c2e0f76SAlex Sierra ret = -EINVAL;
9284c2e0f76SAlex Sierra goto out;
9294c2e0f76SAlex Sierra }
9304c2e0f76SAlex Sierra next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
9314c2e0f76SAlex Sierra if (next > vma->vm_end)
9324c2e0f76SAlex Sierra next = vma->vm_end;
9334c2e0f76SAlex Sierra
9344c2e0f76SAlex Sierra args.vma = vma;
9354c2e0f76SAlex Sierra args.src = src_pfns;
9364c2e0f76SAlex Sierra args.dst = dst_pfns;
9374c2e0f76SAlex Sierra args.start = addr;
9384c2e0f76SAlex Sierra args.end = next;
9394c2e0f76SAlex Sierra args.pgmap_owner = dmirror->mdevice;
9404c2e0f76SAlex Sierra args.flags = dmirror_select_device(dmirror);
9414c2e0f76SAlex Sierra
9424c2e0f76SAlex Sierra ret = migrate_vma_setup(&args);
9434c2e0f76SAlex Sierra if (ret)
9444c2e0f76SAlex Sierra goto out;
9454c2e0f76SAlex Sierra
9464c2e0f76SAlex Sierra pr_debug("Migrating from device mem to sys mem\n");
9474c2e0f76SAlex Sierra dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
9484c2e0f76SAlex Sierra
9494c2e0f76SAlex Sierra migrate_vma_pages(&args);
9504c2e0f76SAlex Sierra cmd->cpages += dmirror_successful_migrated_pages(&args);
9514c2e0f76SAlex Sierra migrate_vma_finalize(&args);
9524c2e0f76SAlex Sierra }
9534c2e0f76SAlex Sierra out:
9544c2e0f76SAlex Sierra mmap_read_unlock(mm);
9554c2e0f76SAlex Sierra mmput(mm);
9564c2e0f76SAlex Sierra
9574c2e0f76SAlex Sierra return ret;
9584c2e0f76SAlex Sierra }
9594c2e0f76SAlex Sierra
dmirror_migrate_to_device(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)9604c2e0f76SAlex Sierra static int dmirror_migrate_to_device(struct dmirror *dmirror,
9614c2e0f76SAlex Sierra struct hmm_dmirror_cmd *cmd)
9624c2e0f76SAlex Sierra {
9634c2e0f76SAlex Sierra unsigned long start, end, addr;
9644c2e0f76SAlex Sierra unsigned long size = cmd->npages << PAGE_SHIFT;
9654c2e0f76SAlex Sierra struct mm_struct *mm = dmirror->notifier.mm;
9664c2e0f76SAlex Sierra struct vm_area_struct *vma;
9674c2e0f76SAlex Sierra unsigned long src_pfns[64] = { 0 };
9684c2e0f76SAlex Sierra unsigned long dst_pfns[64] = { 0 };
969b2ef9f5aSRalph Campbell struct dmirror_bounce bounce;
97016ce101dSAlistair Popple struct migrate_vma args = { 0 };
971b2ef9f5aSRalph Campbell unsigned long next;
972b2ef9f5aSRalph Campbell int ret;
973b2ef9f5aSRalph Campbell
974b2ef9f5aSRalph Campbell start = cmd->addr;
975b2ef9f5aSRalph Campbell end = start + size;
976b2ef9f5aSRalph Campbell if (end < start)
977b2ef9f5aSRalph Campbell return -EINVAL;
978b2ef9f5aSRalph Campbell
979b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */
980b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm))
981b2ef9f5aSRalph Campbell return -EINVAL;
982b2ef9f5aSRalph Campbell
98389154dd5SMichel Lespinasse mmap_read_lock(mm);
984b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = next) {
98546e6b31dSLiam Howlett vma = vma_lookup(mm, addr);
98646e6b31dSLiam Howlett if (!vma || !(vma->vm_flags & VM_READ)) {
987b2ef9f5aSRalph Campbell ret = -EINVAL;
988b2ef9f5aSRalph Campbell goto out;
989b2ef9f5aSRalph Campbell }
990b2ef9f5aSRalph Campbell next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
991b2ef9f5aSRalph Campbell if (next > vma->vm_end)
992b2ef9f5aSRalph Campbell next = vma->vm_end;
993b2ef9f5aSRalph Campbell
994b2ef9f5aSRalph Campbell args.vma = vma;
995b2ef9f5aSRalph Campbell args.src = src_pfns;
996b2ef9f5aSRalph Campbell args.dst = dst_pfns;
997b2ef9f5aSRalph Campbell args.start = addr;
998b2ef9f5aSRalph Campbell args.end = next;
9997d17e83aSRalph Campbell args.pgmap_owner = dmirror->mdevice;
10005143192cSRalph Campbell args.flags = MIGRATE_VMA_SELECT_SYSTEM;
1001b2ef9f5aSRalph Campbell ret = migrate_vma_setup(&args);
1002b2ef9f5aSRalph Campbell if (ret)
1003b2ef9f5aSRalph Campbell goto out;
1004b2ef9f5aSRalph Campbell
10054c2e0f76SAlex Sierra pr_debug("Migrating from sys mem to device mem\n");
1006b2ef9f5aSRalph Campbell dmirror_migrate_alloc_and_copy(&args, dmirror);
1007b2ef9f5aSRalph Campbell migrate_vma_pages(&args);
1008b2ef9f5aSRalph Campbell dmirror_migrate_finalize_and_map(&args, dmirror);
1009b2ef9f5aSRalph Campbell migrate_vma_finalize(&args);
1010b2ef9f5aSRalph Campbell }
101189154dd5SMichel Lespinasse mmap_read_unlock(mm);
1012b2ef9f5aSRalph Campbell mmput(mm);
1013b2ef9f5aSRalph Campbell
10144c2e0f76SAlex Sierra /*
10154c2e0f76SAlex Sierra * Return the migrated data for verification.
10164c2e0f76SAlex Sierra * Only for pages in device zone
10174c2e0f76SAlex Sierra */
1018b2ef9f5aSRalph Campbell ret = dmirror_bounce_init(&bounce, start, size);
1019b2ef9f5aSRalph Campbell if (ret)
1020b2ef9f5aSRalph Campbell return ret;
1021b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
1022b2ef9f5aSRalph Campbell ret = dmirror_do_read(dmirror, start, end, &bounce);
1023b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
1024b2ef9f5aSRalph Campbell if (ret == 0) {
1025b2ef9f5aSRalph Campbell if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
1026b2ef9f5aSRalph Campbell bounce.size))
1027b2ef9f5aSRalph Campbell ret = -EFAULT;
1028b2ef9f5aSRalph Campbell }
1029b2ef9f5aSRalph Campbell cmd->cpages = bounce.cpages;
1030b2ef9f5aSRalph Campbell dmirror_bounce_fini(&bounce);
1031b2ef9f5aSRalph Campbell return ret;
1032b2ef9f5aSRalph Campbell
1033b2ef9f5aSRalph Campbell out:
103489154dd5SMichel Lespinasse mmap_read_unlock(mm);
1035b2ef9f5aSRalph Campbell mmput(mm);
1036b2ef9f5aSRalph Campbell return ret;
1037b2ef9f5aSRalph Campbell }
1038b2ef9f5aSRalph Campbell
dmirror_mkentry(struct dmirror * dmirror,struct hmm_range * range,unsigned char * perm,unsigned long entry)1039b2ef9f5aSRalph Campbell static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
1040b2ef9f5aSRalph Campbell unsigned char *perm, unsigned long entry)
1041b2ef9f5aSRalph Campbell {
1042b2ef9f5aSRalph Campbell struct page *page;
1043b2ef9f5aSRalph Campbell
1044b2ef9f5aSRalph Campbell if (entry & HMM_PFN_ERROR) {
1045b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_ERROR;
1046b2ef9f5aSRalph Campbell return;
1047b2ef9f5aSRalph Campbell }
1048b2ef9f5aSRalph Campbell if (!(entry & HMM_PFN_VALID)) {
1049b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_NONE;
1050b2ef9f5aSRalph Campbell return;
1051b2ef9f5aSRalph Campbell }
1052b2ef9f5aSRalph Campbell
1053b2ef9f5aSRalph Campbell page = hmm_pfn_to_page(entry);
1054b2ef9f5aSRalph Campbell if (is_device_private_page(page)) {
1055b2ef9f5aSRalph Campbell /* Is the page migrated to this device or some other? */
1056b2ef9f5aSRalph Campbell if (dmirror->mdevice == dmirror_page_to_device(page))
1057b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
1058b2ef9f5aSRalph Campbell else
1059b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
10604c2e0f76SAlex Sierra } else if (is_device_coherent_page(page)) {
10614c2e0f76SAlex Sierra /* Is the page migrated to this device or some other? */
10624c2e0f76SAlex Sierra if (dmirror->mdevice == dmirror_page_to_device(page))
10634c2e0f76SAlex Sierra *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
10644c2e0f76SAlex Sierra else
10654c2e0f76SAlex Sierra *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
1066b2ef9f5aSRalph Campbell } else if (is_zero_pfn(page_to_pfn(page)))
1067b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_ZERO;
1068b2ef9f5aSRalph Campbell else
1069b2ef9f5aSRalph Campbell *perm = HMM_DMIRROR_PROT_NONE;
1070b2ef9f5aSRalph Campbell if (entry & HMM_PFN_WRITE)
1071b2ef9f5aSRalph Campbell *perm |= HMM_DMIRROR_PROT_WRITE;
1072b2ef9f5aSRalph Campbell else
1073b2ef9f5aSRalph Campbell *perm |= HMM_DMIRROR_PROT_READ;
1074e478425bSRalph Campbell if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT)
1075e478425bSRalph Campbell *perm |= HMM_DMIRROR_PROT_PMD;
1076e478425bSRalph Campbell else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT)
1077e478425bSRalph Campbell *perm |= HMM_DMIRROR_PROT_PUD;
1078b2ef9f5aSRalph Campbell }
1079b2ef9f5aSRalph Campbell
dmirror_snapshot_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)1080b2ef9f5aSRalph Campbell static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
1081b2ef9f5aSRalph Campbell const struct mmu_notifier_range *range,
1082b2ef9f5aSRalph Campbell unsigned long cur_seq)
1083b2ef9f5aSRalph Campbell {
1084b2ef9f5aSRalph Campbell struct dmirror_interval *dmi =
1085b2ef9f5aSRalph Campbell container_of(mni, struct dmirror_interval, notifier);
1086b2ef9f5aSRalph Campbell struct dmirror *dmirror = dmi->dmirror;
1087b2ef9f5aSRalph Campbell
1088b2ef9f5aSRalph Campbell if (mmu_notifier_range_blockable(range))
1089b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
1090b2ef9f5aSRalph Campbell else if (!mutex_trylock(&dmirror->mutex))
1091b2ef9f5aSRalph Campbell return false;
1092b2ef9f5aSRalph Campbell
1093b2ef9f5aSRalph Campbell /*
1094b2ef9f5aSRalph Campbell * Snapshots only need to set the sequence number since any
1095b2ef9f5aSRalph Campbell * invalidation in the interval invalidates the whole snapshot.
1096b2ef9f5aSRalph Campbell */
1097b2ef9f5aSRalph Campbell mmu_interval_set_seq(mni, cur_seq);
1098b2ef9f5aSRalph Campbell
1099b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
1100b2ef9f5aSRalph Campbell return true;
1101b2ef9f5aSRalph Campbell }
1102b2ef9f5aSRalph Campbell
1103b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_mrn_ops = {
1104b2ef9f5aSRalph Campbell .invalidate = dmirror_snapshot_invalidate,
1105b2ef9f5aSRalph Campbell };
1106b2ef9f5aSRalph Campbell
dmirror_range_snapshot(struct dmirror * dmirror,struct hmm_range * range,unsigned char * perm)1107b2ef9f5aSRalph Campbell static int dmirror_range_snapshot(struct dmirror *dmirror,
1108b2ef9f5aSRalph Campbell struct hmm_range *range,
1109b2ef9f5aSRalph Campbell unsigned char *perm)
1110b2ef9f5aSRalph Campbell {
1111b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm;
1112b2ef9f5aSRalph Campbell struct dmirror_interval notifier;
1113b2ef9f5aSRalph Campbell unsigned long timeout =
1114b2ef9f5aSRalph Campbell jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1115b2ef9f5aSRalph Campbell unsigned long i;
1116b2ef9f5aSRalph Campbell unsigned long n;
1117b2ef9f5aSRalph Campbell int ret = 0;
1118b2ef9f5aSRalph Campbell
1119b2ef9f5aSRalph Campbell notifier.dmirror = dmirror;
1120b2ef9f5aSRalph Campbell range->notifier = ¬ifier.notifier;
1121b2ef9f5aSRalph Campbell
1122b2ef9f5aSRalph Campbell ret = mmu_interval_notifier_insert(range->notifier, mm,
1123b2ef9f5aSRalph Campbell range->start, range->end - range->start,
1124b2ef9f5aSRalph Campbell &dmirror_mrn_ops);
1125b2ef9f5aSRalph Campbell if (ret)
1126b2ef9f5aSRalph Campbell return ret;
1127b2ef9f5aSRalph Campbell
1128b2ef9f5aSRalph Campbell while (true) {
1129b2ef9f5aSRalph Campbell if (time_after(jiffies, timeout)) {
1130b2ef9f5aSRalph Campbell ret = -EBUSY;
1131b2ef9f5aSRalph Campbell goto out;
1132b2ef9f5aSRalph Campbell }
1133b2ef9f5aSRalph Campbell
1134b2ef9f5aSRalph Campbell range->notifier_seq = mmu_interval_read_begin(range->notifier);
1135b2ef9f5aSRalph Campbell
113689154dd5SMichel Lespinasse mmap_read_lock(mm);
1137b2ef9f5aSRalph Campbell ret = hmm_range_fault(range);
113889154dd5SMichel Lespinasse mmap_read_unlock(mm);
1139b2ef9f5aSRalph Campbell if (ret) {
1140b2ef9f5aSRalph Campbell if (ret == -EBUSY)
1141b2ef9f5aSRalph Campbell continue;
1142b2ef9f5aSRalph Campbell goto out;
1143b2ef9f5aSRalph Campbell }
1144b2ef9f5aSRalph Campbell
1145b2ef9f5aSRalph Campbell mutex_lock(&dmirror->mutex);
1146b2ef9f5aSRalph Campbell if (mmu_interval_read_retry(range->notifier,
1147b2ef9f5aSRalph Campbell range->notifier_seq)) {
1148b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
1149b2ef9f5aSRalph Campbell continue;
1150b2ef9f5aSRalph Campbell }
1151b2ef9f5aSRalph Campbell break;
1152b2ef9f5aSRalph Campbell }
1153b2ef9f5aSRalph Campbell
1154b2ef9f5aSRalph Campbell n = (range->end - range->start) >> PAGE_SHIFT;
1155b2ef9f5aSRalph Campbell for (i = 0; i < n; i++)
1156b2ef9f5aSRalph Campbell dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]);
1157b2ef9f5aSRalph Campbell
1158b2ef9f5aSRalph Campbell mutex_unlock(&dmirror->mutex);
1159b2ef9f5aSRalph Campbell out:
1160b2ef9f5aSRalph Campbell mmu_interval_notifier_remove(range->notifier);
1161b2ef9f5aSRalph Campbell return ret;
1162b2ef9f5aSRalph Campbell }
1163b2ef9f5aSRalph Campbell
dmirror_snapshot(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)1164b2ef9f5aSRalph Campbell static int dmirror_snapshot(struct dmirror *dmirror,
1165b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd *cmd)
1166b2ef9f5aSRalph Campbell {
1167b2ef9f5aSRalph Campbell struct mm_struct *mm = dmirror->notifier.mm;
1168b2ef9f5aSRalph Campbell unsigned long start, end;
1169b2ef9f5aSRalph Campbell unsigned long size = cmd->npages << PAGE_SHIFT;
1170b2ef9f5aSRalph Campbell unsigned long addr;
1171b2ef9f5aSRalph Campbell unsigned long next;
1172b2ef9f5aSRalph Campbell unsigned long pfns[64];
1173b2ef9f5aSRalph Campbell unsigned char perm[64];
1174b2ef9f5aSRalph Campbell char __user *uptr;
1175b2ef9f5aSRalph Campbell struct hmm_range range = {
1176b2ef9f5aSRalph Campbell .hmm_pfns = pfns,
1177b2ef9f5aSRalph Campbell .dev_private_owner = dmirror->mdevice,
1178b2ef9f5aSRalph Campbell };
1179b2ef9f5aSRalph Campbell int ret = 0;
1180b2ef9f5aSRalph Campbell
1181b2ef9f5aSRalph Campbell start = cmd->addr;
1182b2ef9f5aSRalph Campbell end = start + size;
1183b2ef9f5aSRalph Campbell if (end < start)
1184b2ef9f5aSRalph Campbell return -EINVAL;
1185b2ef9f5aSRalph Campbell
1186b2ef9f5aSRalph Campbell /* Since the mm is for the mirrored process, get a reference first. */
1187b2ef9f5aSRalph Campbell if (!mmget_not_zero(mm))
1188b2ef9f5aSRalph Campbell return -EINVAL;
1189b2ef9f5aSRalph Campbell
1190b2ef9f5aSRalph Campbell /*
1191b2ef9f5aSRalph Campbell * Register a temporary notifier to detect invalidations even if it
1192b2ef9f5aSRalph Campbell * overlaps with other mmu_interval_notifiers.
1193b2ef9f5aSRalph Campbell */
1194b2ef9f5aSRalph Campbell uptr = u64_to_user_ptr(cmd->ptr);
1195b2ef9f5aSRalph Campbell for (addr = start; addr < end; addr = next) {
1196b2ef9f5aSRalph Campbell unsigned long n;
1197b2ef9f5aSRalph Campbell
1198b2ef9f5aSRalph Campbell next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
1199b2ef9f5aSRalph Campbell range.start = addr;
1200b2ef9f5aSRalph Campbell range.end = next;
1201b2ef9f5aSRalph Campbell
1202b2ef9f5aSRalph Campbell ret = dmirror_range_snapshot(dmirror, &range, perm);
1203b2ef9f5aSRalph Campbell if (ret)
1204b2ef9f5aSRalph Campbell break;
1205b2ef9f5aSRalph Campbell
1206b2ef9f5aSRalph Campbell n = (range.end - range.start) >> PAGE_SHIFT;
1207b2ef9f5aSRalph Campbell if (copy_to_user(uptr, perm, n)) {
1208b2ef9f5aSRalph Campbell ret = -EFAULT;
1209b2ef9f5aSRalph Campbell break;
1210b2ef9f5aSRalph Campbell }
1211b2ef9f5aSRalph Campbell
1212b2ef9f5aSRalph Campbell cmd->cpages += n;
1213b2ef9f5aSRalph Campbell uptr += n;
1214b2ef9f5aSRalph Campbell }
1215b2ef9f5aSRalph Campbell mmput(mm);
1216b2ef9f5aSRalph Campbell
1217b2ef9f5aSRalph Campbell return ret;
1218b2ef9f5aSRalph Campbell }
1219b2ef9f5aSRalph Campbell
dmirror_device_evict_chunk(struct dmirror_chunk * chunk)1220ad4c3652SAlistair Popple static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
1221ad4c3652SAlistair Popple {
1222ad4c3652SAlistair Popple unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
1223ad4c3652SAlistair Popple unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
1224ad4c3652SAlistair Popple unsigned long npages = end_pfn - start_pfn + 1;
1225ad4c3652SAlistair Popple unsigned long i;
1226ad4c3652SAlistair Popple unsigned long *src_pfns;
1227ad4c3652SAlistair Popple unsigned long *dst_pfns;
1228ad4c3652SAlistair Popple
1229*65e528a6SDuoming Zhou src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
1230*65e528a6SDuoming Zhou dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
1231ad4c3652SAlistair Popple
1232ad4c3652SAlistair Popple migrate_device_range(src_pfns, start_pfn, npages);
1233ad4c3652SAlistair Popple for (i = 0; i < npages; i++) {
1234ad4c3652SAlistair Popple struct page *dpage, *spage;
1235ad4c3652SAlistair Popple
1236ad4c3652SAlistair Popple spage = migrate_pfn_to_page(src_pfns[i]);
1237ad4c3652SAlistair Popple if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
1238ad4c3652SAlistair Popple continue;
1239ad4c3652SAlistair Popple
1240ad4c3652SAlistair Popple if (WARN_ON(!is_device_private_page(spage) &&
1241ad4c3652SAlistair Popple !is_device_coherent_page(spage)))
1242ad4c3652SAlistair Popple continue;
1243ad4c3652SAlistair Popple spage = BACKING_PAGE(spage);
1244ad4c3652SAlistair Popple dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
1245ad4c3652SAlistair Popple lock_page(dpage);
1246ad4c3652SAlistair Popple copy_highpage(dpage, spage);
1247ad4c3652SAlistair Popple dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
1248ad4c3652SAlistair Popple if (src_pfns[i] & MIGRATE_PFN_WRITE)
1249ad4c3652SAlistair Popple dst_pfns[i] |= MIGRATE_PFN_WRITE;
1250ad4c3652SAlistair Popple }
1251ad4c3652SAlistair Popple migrate_device_pages(src_pfns, dst_pfns, npages);
1252ad4c3652SAlistair Popple migrate_device_finalize(src_pfns, dst_pfns, npages);
1253*65e528a6SDuoming Zhou kvfree(src_pfns);
1254*65e528a6SDuoming Zhou kvfree(dst_pfns);
1255ad4c3652SAlistair Popple }
1256ad4c3652SAlistair Popple
1257ad4c3652SAlistair Popple /* Removes free pages from the free list so they can't be re-allocated */
dmirror_remove_free_pages(struct dmirror_chunk * devmem)1258ad4c3652SAlistair Popple static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
1259ad4c3652SAlistair Popple {
1260ad4c3652SAlistair Popple struct dmirror_device *mdevice = devmem->mdevice;
1261ad4c3652SAlistair Popple struct page *page;
1262ad4c3652SAlistair Popple
1263ad4c3652SAlistair Popple for (page = mdevice->free_pages; page; page = page->zone_device_data)
1264ad4c3652SAlistair Popple if (dmirror_page_to_chunk(page) == devmem)
1265ad4c3652SAlistair Popple mdevice->free_pages = page->zone_device_data;
1266ad4c3652SAlistair Popple }
1267ad4c3652SAlistair Popple
dmirror_device_remove_chunks(struct dmirror_device * mdevice)1268ad4c3652SAlistair Popple static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
1269ad4c3652SAlistair Popple {
1270ad4c3652SAlistair Popple unsigned int i;
1271ad4c3652SAlistair Popple
1272ad4c3652SAlistair Popple mutex_lock(&mdevice->devmem_lock);
1273ad4c3652SAlistair Popple if (mdevice->devmem_chunks) {
1274ad4c3652SAlistair Popple for (i = 0; i < mdevice->devmem_count; i++) {
1275ad4c3652SAlistair Popple struct dmirror_chunk *devmem =
1276ad4c3652SAlistair Popple mdevice->devmem_chunks[i];
1277ad4c3652SAlistair Popple
1278ad4c3652SAlistair Popple spin_lock(&mdevice->lock);
1279ad4c3652SAlistair Popple devmem->remove = true;
1280ad4c3652SAlistair Popple dmirror_remove_free_pages(devmem);
1281ad4c3652SAlistair Popple spin_unlock(&mdevice->lock);
1282ad4c3652SAlistair Popple
1283ad4c3652SAlistair Popple dmirror_device_evict_chunk(devmem);
1284ad4c3652SAlistair Popple memunmap_pages(&devmem->pagemap);
1285ad4c3652SAlistair Popple if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
1286ad4c3652SAlistair Popple release_mem_region(devmem->pagemap.range.start,
1287ad4c3652SAlistair Popple range_len(&devmem->pagemap.range));
1288ad4c3652SAlistair Popple kfree(devmem);
1289ad4c3652SAlistair Popple }
1290ad4c3652SAlistair Popple mdevice->devmem_count = 0;
1291ad4c3652SAlistair Popple mdevice->devmem_capacity = 0;
1292ad4c3652SAlistair Popple mdevice->free_pages = NULL;
1293ad4c3652SAlistair Popple kfree(mdevice->devmem_chunks);
1294ad4c3652SAlistair Popple mdevice->devmem_chunks = NULL;
1295ad4c3652SAlistair Popple }
1296ad4c3652SAlistair Popple mutex_unlock(&mdevice->devmem_lock);
1297ad4c3652SAlistair Popple }
1298ad4c3652SAlistair Popple
dmirror_fops_unlocked_ioctl(struct file * filp,unsigned int command,unsigned long arg)1299b2ef9f5aSRalph Campbell static long dmirror_fops_unlocked_ioctl(struct file *filp,
1300b2ef9f5aSRalph Campbell unsigned int command,
1301b2ef9f5aSRalph Campbell unsigned long arg)
1302b2ef9f5aSRalph Campbell {
1303b2ef9f5aSRalph Campbell void __user *uarg = (void __user *)arg;
1304b2ef9f5aSRalph Campbell struct hmm_dmirror_cmd cmd;
1305b2ef9f5aSRalph Campbell struct dmirror *dmirror;
1306b2ef9f5aSRalph Campbell int ret;
1307b2ef9f5aSRalph Campbell
1308b2ef9f5aSRalph Campbell dmirror = filp->private_data;
1309b2ef9f5aSRalph Campbell if (!dmirror)
1310b2ef9f5aSRalph Campbell return -EINVAL;
1311b2ef9f5aSRalph Campbell
1312b2ef9f5aSRalph Campbell if (copy_from_user(&cmd, uarg, sizeof(cmd)))
1313b2ef9f5aSRalph Campbell return -EFAULT;
1314b2ef9f5aSRalph Campbell
1315b2ef9f5aSRalph Campbell if (cmd.addr & ~PAGE_MASK)
1316b2ef9f5aSRalph Campbell return -EINVAL;
1317b2ef9f5aSRalph Campbell if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT)))
1318b2ef9f5aSRalph Campbell return -EINVAL;
1319b2ef9f5aSRalph Campbell
1320b2ef9f5aSRalph Campbell cmd.cpages = 0;
1321b2ef9f5aSRalph Campbell cmd.faults = 0;
1322b2ef9f5aSRalph Campbell
1323b2ef9f5aSRalph Campbell switch (command) {
1324b2ef9f5aSRalph Campbell case HMM_DMIRROR_READ:
1325b2ef9f5aSRalph Campbell ret = dmirror_read(dmirror, &cmd);
1326b2ef9f5aSRalph Campbell break;
1327b2ef9f5aSRalph Campbell
1328b2ef9f5aSRalph Campbell case HMM_DMIRROR_WRITE:
1329b2ef9f5aSRalph Campbell ret = dmirror_write(dmirror, &cmd);
1330b2ef9f5aSRalph Campbell break;
1331b2ef9f5aSRalph Campbell
13324c2e0f76SAlex Sierra case HMM_DMIRROR_MIGRATE_TO_DEV:
13334c2e0f76SAlex Sierra ret = dmirror_migrate_to_device(dmirror, &cmd);
13344c2e0f76SAlex Sierra break;
13354c2e0f76SAlex Sierra
13364c2e0f76SAlex Sierra case HMM_DMIRROR_MIGRATE_TO_SYS:
13374c2e0f76SAlex Sierra ret = dmirror_migrate_to_system(dmirror, &cmd);
1338b2ef9f5aSRalph Campbell break;
1339b2ef9f5aSRalph Campbell
1340b659baeaSAlistair Popple case HMM_DMIRROR_EXCLUSIVE:
1341b659baeaSAlistair Popple ret = dmirror_exclusive(dmirror, &cmd);
1342b659baeaSAlistair Popple break;
1343b659baeaSAlistair Popple
1344b659baeaSAlistair Popple case HMM_DMIRROR_CHECK_EXCLUSIVE:
1345b659baeaSAlistair Popple ret = dmirror_check_atomic(dmirror, cmd.addr,
1346b659baeaSAlistair Popple cmd.addr + (cmd.npages << PAGE_SHIFT));
1347b659baeaSAlistair Popple break;
1348b659baeaSAlistair Popple
1349b2ef9f5aSRalph Campbell case HMM_DMIRROR_SNAPSHOT:
1350b2ef9f5aSRalph Campbell ret = dmirror_snapshot(dmirror, &cmd);
1351b2ef9f5aSRalph Campbell break;
1352b2ef9f5aSRalph Campbell
1353ad4c3652SAlistair Popple case HMM_DMIRROR_RELEASE:
1354ad4c3652SAlistair Popple dmirror_device_remove_chunks(dmirror->mdevice);
1355ad4c3652SAlistair Popple ret = 0;
1356ad4c3652SAlistair Popple break;
1357ad4c3652SAlistair Popple
1358b2ef9f5aSRalph Campbell default:
1359b2ef9f5aSRalph Campbell return -EINVAL;
1360b2ef9f5aSRalph Campbell }
1361b2ef9f5aSRalph Campbell if (ret)
1362b2ef9f5aSRalph Campbell return ret;
1363b2ef9f5aSRalph Campbell
1364b2ef9f5aSRalph Campbell if (copy_to_user(uarg, &cmd, sizeof(cmd)))
1365b2ef9f5aSRalph Campbell return -EFAULT;
1366b2ef9f5aSRalph Campbell
1367b2ef9f5aSRalph Campbell return 0;
1368b2ef9f5aSRalph Campbell }
1369b2ef9f5aSRalph Campbell
dmirror_fops_mmap(struct file * file,struct vm_area_struct * vma)137087c01d57SAlistair Popple static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
137187c01d57SAlistair Popple {
137287c01d57SAlistair Popple unsigned long addr;
137387c01d57SAlistair Popple
137487c01d57SAlistair Popple for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
137587c01d57SAlistair Popple struct page *page;
137687c01d57SAlistair Popple int ret;
137787c01d57SAlistair Popple
137887c01d57SAlistair Popple page = alloc_page(GFP_KERNEL | __GFP_ZERO);
137987c01d57SAlistair Popple if (!page)
138087c01d57SAlistair Popple return -ENOMEM;
138187c01d57SAlistair Popple
138287c01d57SAlistair Popple ret = vm_insert_page(vma, addr, page);
138387c01d57SAlistair Popple if (ret) {
138487c01d57SAlistair Popple __free_page(page);
138587c01d57SAlistair Popple return ret;
138687c01d57SAlistair Popple }
138787c01d57SAlistair Popple put_page(page);
138887c01d57SAlistair Popple }
138987c01d57SAlistair Popple
139087c01d57SAlistair Popple return 0;
139187c01d57SAlistair Popple }
139287c01d57SAlistair Popple
1393b2ef9f5aSRalph Campbell static const struct file_operations dmirror_fops = {
1394b2ef9f5aSRalph Campbell .open = dmirror_fops_open,
1395b2ef9f5aSRalph Campbell .release = dmirror_fops_release,
139687c01d57SAlistair Popple .mmap = dmirror_fops_mmap,
1397b2ef9f5aSRalph Campbell .unlocked_ioctl = dmirror_fops_unlocked_ioctl,
1398b2ef9f5aSRalph Campbell .llseek = default_llseek,
1399b2ef9f5aSRalph Campbell .owner = THIS_MODULE,
1400b2ef9f5aSRalph Campbell };
1401b2ef9f5aSRalph Campbell
dmirror_devmem_free(struct page * page)1402b2ef9f5aSRalph Campbell static void dmirror_devmem_free(struct page *page)
1403b2ef9f5aSRalph Campbell {
14044c2e0f76SAlex Sierra struct page *rpage = BACKING_PAGE(page);
1405b2ef9f5aSRalph Campbell struct dmirror_device *mdevice;
1406b2ef9f5aSRalph Campbell
14074c2e0f76SAlex Sierra if (rpage != page)
1408b2ef9f5aSRalph Campbell __free_page(rpage);
1409b2ef9f5aSRalph Campbell
1410b2ef9f5aSRalph Campbell mdevice = dmirror_page_to_device(page);
1411b2ef9f5aSRalph Campbell spin_lock(&mdevice->lock);
1412ad4c3652SAlistair Popple
1413ad4c3652SAlistair Popple /* Return page to our allocator if not freeing the chunk */
1414ad4c3652SAlistair Popple if (!dmirror_page_to_chunk(page)->remove) {
1415b2ef9f5aSRalph Campbell mdevice->cfree++;
1416b2ef9f5aSRalph Campbell page->zone_device_data = mdevice->free_pages;
1417b2ef9f5aSRalph Campbell mdevice->free_pages = page;
1418ad4c3652SAlistair Popple }
1419b2ef9f5aSRalph Campbell spin_unlock(&mdevice->lock);
1420b2ef9f5aSRalph Campbell }
1421b2ef9f5aSRalph Campbell
dmirror_devmem_fault(struct vm_fault * vmf)1422b2ef9f5aSRalph Campbell static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
1423b2ef9f5aSRalph Campbell {
142416ce101dSAlistair Popple struct migrate_vma args = { 0 };
14254c2e0f76SAlex Sierra unsigned long src_pfns = 0;
14264c2e0f76SAlex Sierra unsigned long dst_pfns = 0;
1427b2ef9f5aSRalph Campbell struct page *rpage;
1428b2ef9f5aSRalph Campbell struct dmirror *dmirror;
1429b2ef9f5aSRalph Campbell vm_fault_t ret;
1430b2ef9f5aSRalph Campbell
1431b2ef9f5aSRalph Campbell /*
1432b2ef9f5aSRalph Campbell * Normally, a device would use the page->zone_device_data to point to
1433b2ef9f5aSRalph Campbell * the mirror but here we use it to hold the page for the simulated
1434b2ef9f5aSRalph Campbell * device memory and that page holds the pointer to the mirror.
1435b2ef9f5aSRalph Campbell */
1436b2ef9f5aSRalph Campbell rpage = vmf->page->zone_device_data;
1437b2ef9f5aSRalph Campbell dmirror = rpage->zone_device_data;
1438b2ef9f5aSRalph Campbell
1439b2ef9f5aSRalph Campbell /* FIXME demonstrate how we can adjust migrate range */
1440b2ef9f5aSRalph Campbell args.vma = vmf->vma;
1441b2ef9f5aSRalph Campbell args.start = vmf->address;
1442b2ef9f5aSRalph Campbell args.end = args.start + PAGE_SIZE;
1443b2ef9f5aSRalph Campbell args.src = &src_pfns;
1444b2ef9f5aSRalph Campbell args.dst = &dst_pfns;
14455143192cSRalph Campbell args.pgmap_owner = dmirror->mdevice;
14464c2e0f76SAlex Sierra args.flags = dmirror_select_device(dmirror);
144716ce101dSAlistair Popple args.fault_page = vmf->page;
1448b2ef9f5aSRalph Campbell
1449b2ef9f5aSRalph Campbell if (migrate_vma_setup(&args))
1450b2ef9f5aSRalph Campbell return VM_FAULT_SIGBUS;
1451b2ef9f5aSRalph Campbell
14527d17e83aSRalph Campbell ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
1453b2ef9f5aSRalph Campbell if (ret)
1454b2ef9f5aSRalph Campbell return ret;
1455b2ef9f5aSRalph Campbell migrate_vma_pages(&args);
14567d17e83aSRalph Campbell /*
14577d17e83aSRalph Campbell * No device finalize step is needed since
14587d17e83aSRalph Campbell * dmirror_devmem_fault_alloc_and_copy() will have already
14597d17e83aSRalph Campbell * invalidated the device page table.
14607d17e83aSRalph Campbell */
1461b2ef9f5aSRalph Campbell migrate_vma_finalize(&args);
1462b2ef9f5aSRalph Campbell return 0;
1463b2ef9f5aSRalph Campbell }
1464b2ef9f5aSRalph Campbell
1465b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops = {
1466b2ef9f5aSRalph Campbell .page_free = dmirror_devmem_free,
1467b2ef9f5aSRalph Campbell .migrate_to_ram = dmirror_devmem_fault,
1468b2ef9f5aSRalph Campbell };
1469b2ef9f5aSRalph Campbell
dmirror_device_init(struct dmirror_device * mdevice,int id)1470b2ef9f5aSRalph Campbell static int dmirror_device_init(struct dmirror_device *mdevice, int id)
1471b2ef9f5aSRalph Campbell {
1472b2ef9f5aSRalph Campbell dev_t dev;
1473b2ef9f5aSRalph Campbell int ret;
1474b2ef9f5aSRalph Campbell
1475b2ef9f5aSRalph Campbell dev = MKDEV(MAJOR(dmirror_dev), id);
1476b2ef9f5aSRalph Campbell mutex_init(&mdevice->devmem_lock);
1477b2ef9f5aSRalph Campbell spin_lock_init(&mdevice->lock);
1478b2ef9f5aSRalph Campbell
1479b2ef9f5aSRalph Campbell cdev_init(&mdevice->cdevice, &dmirror_fops);
1480b2ef9f5aSRalph Campbell mdevice->cdevice.owner = THIS_MODULE;
14816a760f58SMika Penttilä device_initialize(&mdevice->device);
14826a760f58SMika Penttilä mdevice->device.devt = dev;
14836a760f58SMika Penttilä
14846a760f58SMika Penttilä ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id);
14856a760f58SMika Penttilä if (ret)
14866a760f58SMika Penttilä return ret;
14876a760f58SMika Penttilä
14886a760f58SMika Penttilä ret = cdev_device_add(&mdevice->cdevice, &mdevice->device);
1489b2ef9f5aSRalph Campbell if (ret)
1490b2ef9f5aSRalph Campbell return ret;
1491b2ef9f5aSRalph Campbell
149225b80162SAlex Sierra /* Build a list of free ZONE_DEVICE struct pages */
149325b80162SAlex Sierra return dmirror_allocate_chunk(mdevice, NULL);
1494b2ef9f5aSRalph Campbell }
1495b2ef9f5aSRalph Campbell
dmirror_device_remove(struct dmirror_device * mdevice)1496b2ef9f5aSRalph Campbell static void dmirror_device_remove(struct dmirror_device *mdevice)
1497b2ef9f5aSRalph Campbell {
1498ad4c3652SAlistair Popple dmirror_device_remove_chunks(mdevice);
14996a760f58SMika Penttilä cdev_device_del(&mdevice->cdevice, &mdevice->device);
1500b2ef9f5aSRalph Campbell }
1501b2ef9f5aSRalph Campbell
hmm_dmirror_init(void)1502b2ef9f5aSRalph Campbell static int __init hmm_dmirror_init(void)
1503b2ef9f5aSRalph Campbell {
1504b2ef9f5aSRalph Campbell int ret;
1505188f4826SAlex Sierra int id = 0;
1506188f4826SAlex Sierra int ndevices = 0;
1507b2ef9f5aSRalph Campbell
1508b2ef9f5aSRalph Campbell ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
1509b2ef9f5aSRalph Campbell "HMM_DMIRROR");
1510b2ef9f5aSRalph Campbell if (ret)
1511b2ef9f5aSRalph Campbell goto err_unreg;
1512b2ef9f5aSRalph Campbell
1513188f4826SAlex Sierra memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0]));
1514188f4826SAlex Sierra dmirror_devices[ndevices++].zone_device_type =
1515188f4826SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
1516188f4826SAlex Sierra dmirror_devices[ndevices++].zone_device_type =
1517188f4826SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
15184c2e0f76SAlex Sierra if (spm_addr_dev0 && spm_addr_dev1) {
15194c2e0f76SAlex Sierra dmirror_devices[ndevices++].zone_device_type =
15204c2e0f76SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
15214c2e0f76SAlex Sierra dmirror_devices[ndevices++].zone_device_type =
15224c2e0f76SAlex Sierra HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
15234c2e0f76SAlex Sierra }
1524188f4826SAlex Sierra for (id = 0; id < ndevices; id++) {
1525b2ef9f5aSRalph Campbell ret = dmirror_device_init(dmirror_devices + id, id);
1526b2ef9f5aSRalph Campbell if (ret)
1527b2ef9f5aSRalph Campbell goto err_chrdev;
1528b2ef9f5aSRalph Campbell }
1529b2ef9f5aSRalph Campbell
1530b2ef9f5aSRalph Campbell pr_info("HMM test module loaded. This is only for testing HMM.\n");
1531b2ef9f5aSRalph Campbell return 0;
1532b2ef9f5aSRalph Campbell
1533b2ef9f5aSRalph Campbell err_chrdev:
1534b2ef9f5aSRalph Campbell while (--id >= 0)
1535b2ef9f5aSRalph Campbell dmirror_device_remove(dmirror_devices + id);
1536b2ef9f5aSRalph Campbell unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1537b2ef9f5aSRalph Campbell err_unreg:
1538b2ef9f5aSRalph Campbell return ret;
1539b2ef9f5aSRalph Campbell }
1540b2ef9f5aSRalph Campbell
hmm_dmirror_exit(void)1541b2ef9f5aSRalph Campbell static void __exit hmm_dmirror_exit(void)
1542b2ef9f5aSRalph Campbell {
1543b2ef9f5aSRalph Campbell int id;
1544b2ef9f5aSRalph Campbell
1545b2ef9f5aSRalph Campbell for (id = 0; id < DMIRROR_NDEVICES; id++)
15464c2e0f76SAlex Sierra if (dmirror_devices[id].zone_device_type)
1547b2ef9f5aSRalph Campbell dmirror_device_remove(dmirror_devices + id);
1548b2ef9f5aSRalph Campbell unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1549b2ef9f5aSRalph Campbell }
1550b2ef9f5aSRalph Campbell
1551b2ef9f5aSRalph Campbell module_init(hmm_dmirror_init);
1552b2ef9f5aSRalph Campbell module_exit(hmm_dmirror_exit);
1553b2ef9f5aSRalph Campbell MODULE_LICENSE("GPL");
1554