xref: /openbmc/linux/lib/test_hmm.c (revision ae96e0cd)
1b2ef9f5aSRalph Campbell // SPDX-License-Identifier: GPL-2.0
2b2ef9f5aSRalph Campbell /*
3b2ef9f5aSRalph Campbell  * This is a module to test the HMM (Heterogeneous Memory Management)
4b2ef9f5aSRalph Campbell  * mirror and zone device private memory migration APIs of the kernel.
5b2ef9f5aSRalph Campbell  * Userspace programs can register with the driver to mirror their own address
6b2ef9f5aSRalph Campbell  * space and can use the device to read/write any valid virtual address.
7b2ef9f5aSRalph Campbell  */
8b2ef9f5aSRalph Campbell #include <linux/init.h>
9b2ef9f5aSRalph Campbell #include <linux/fs.h>
10b2ef9f5aSRalph Campbell #include <linux/mm.h>
11b2ef9f5aSRalph Campbell #include <linux/module.h>
12b2ef9f5aSRalph Campbell #include <linux/kernel.h>
13b2ef9f5aSRalph Campbell #include <linux/cdev.h>
14b2ef9f5aSRalph Campbell #include <linux/device.h>
15dc90f084SChristoph Hellwig #include <linux/memremap.h>
16b2ef9f5aSRalph Campbell #include <linux/mutex.h>
17b2ef9f5aSRalph Campbell #include <linux/rwsem.h>
18b2ef9f5aSRalph Campbell #include <linux/sched.h>
19b2ef9f5aSRalph Campbell #include <linux/slab.h>
20b2ef9f5aSRalph Campbell #include <linux/highmem.h>
21b2ef9f5aSRalph Campbell #include <linux/delay.h>
22b2ef9f5aSRalph Campbell #include <linux/pagemap.h>
23b2ef9f5aSRalph Campbell #include <linux/hmm.h>
24b2ef9f5aSRalph Campbell #include <linux/vmalloc.h>
25b2ef9f5aSRalph Campbell #include <linux/swap.h>
26b2ef9f5aSRalph Campbell #include <linux/swapops.h>
27b2ef9f5aSRalph Campbell #include <linux/sched/mm.h>
28b2ef9f5aSRalph Campbell #include <linux/platform_device.h>
29b659baeaSAlistair Popple #include <linux/rmap.h>
30730ff521SChristoph Hellwig #include <linux/mmu_notifier.h>
31730ff521SChristoph Hellwig #include <linux/migrate.h>
32b2ef9f5aSRalph Campbell 
33b2ef9f5aSRalph Campbell #include "test_hmm_uapi.h"
34b2ef9f5aSRalph Campbell 
354c2e0f76SAlex Sierra #define DMIRROR_NDEVICES		4
36b2ef9f5aSRalph Campbell #define DMIRROR_RANGE_FAULT_TIMEOUT	1000
37b2ef9f5aSRalph Campbell #define DEVMEM_CHUNK_SIZE		(256 * 1024 * 1024U)
38b2ef9f5aSRalph Campbell #define DEVMEM_CHUNKS_RESERVE		16
39b2ef9f5aSRalph Campbell 
404c2e0f76SAlex Sierra /*
414c2e0f76SAlex Sierra  * For device_private pages, dpage is just a dummy struct page
424c2e0f76SAlex Sierra  * representing a piece of device memory. dmirror_devmem_alloc_page
434c2e0f76SAlex Sierra  * allocates a real system memory page as backing storage to fake a
444c2e0f76SAlex Sierra  * real device. zone_device_data points to that backing page. But
454c2e0f76SAlex Sierra  * for device_coherent memory, the struct page represents real
464c2e0f76SAlex Sierra  * physical CPU-accessible memory that we can use directly.
474c2e0f76SAlex Sierra  */
484c2e0f76SAlex Sierra #define BACKING_PAGE(page) (is_device_private_page((page)) ? \
494c2e0f76SAlex Sierra 			   (page)->zone_device_data : (page))
504c2e0f76SAlex Sierra 
5125b80162SAlex Sierra static unsigned long spm_addr_dev0;
5225b80162SAlex Sierra module_param(spm_addr_dev0, long, 0644);
5325b80162SAlex Sierra MODULE_PARM_DESC(spm_addr_dev0,
5425b80162SAlex Sierra 		"Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
5525b80162SAlex Sierra 
5625b80162SAlex Sierra static unsigned long spm_addr_dev1;
5725b80162SAlex Sierra module_param(spm_addr_dev1, long, 0644);
5825b80162SAlex Sierra MODULE_PARM_DESC(spm_addr_dev1,
5925b80162SAlex Sierra 		"Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
6025b80162SAlex Sierra 
61b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops;
62b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops;
63b2ef9f5aSRalph Campbell static dev_t dmirror_dev;
64b2ef9f5aSRalph Campbell 
65b2ef9f5aSRalph Campbell struct dmirror_device;
66b2ef9f5aSRalph Campbell 
67b2ef9f5aSRalph Campbell struct dmirror_bounce {
68b2ef9f5aSRalph Campbell 	void			*ptr;
69b2ef9f5aSRalph Campbell 	unsigned long		size;
70b2ef9f5aSRalph Campbell 	unsigned long		addr;
71b2ef9f5aSRalph Campbell 	unsigned long		cpages;
72b2ef9f5aSRalph Campbell };
73b2ef9f5aSRalph Campbell 
74b659baeaSAlistair Popple #define DPT_XA_TAG_ATOMIC 1UL
75b2ef9f5aSRalph Campbell #define DPT_XA_TAG_WRITE 3UL
76b2ef9f5aSRalph Campbell 
77b2ef9f5aSRalph Campbell /*
78b2ef9f5aSRalph Campbell  * Data structure to track address ranges and register for mmu interval
79b2ef9f5aSRalph Campbell  * notifier updates.
80b2ef9f5aSRalph Campbell  */
81b2ef9f5aSRalph Campbell struct dmirror_interval {
82b2ef9f5aSRalph Campbell 	struct mmu_interval_notifier	notifier;
83b2ef9f5aSRalph Campbell 	struct dmirror			*dmirror;
84b2ef9f5aSRalph Campbell };
85b2ef9f5aSRalph Campbell 
86b2ef9f5aSRalph Campbell /*
87b2ef9f5aSRalph Campbell  * Data attached to the open device file.
88b2ef9f5aSRalph Campbell  * Note that it might be shared after a fork().
89b2ef9f5aSRalph Campbell  */
90b2ef9f5aSRalph Campbell struct dmirror {
91b2ef9f5aSRalph Campbell 	struct dmirror_device		*mdevice;
92b2ef9f5aSRalph Campbell 	struct xarray			pt;
93b2ef9f5aSRalph Campbell 	struct mmu_interval_notifier	notifier;
94b2ef9f5aSRalph Campbell 	struct mutex			mutex;
95b2ef9f5aSRalph Campbell };
96b2ef9f5aSRalph Campbell 
97b2ef9f5aSRalph Campbell /*
98b2ef9f5aSRalph Campbell  * ZONE_DEVICE pages for migration and simulating device memory.
99b2ef9f5aSRalph Campbell  */
100b2ef9f5aSRalph Campbell struct dmirror_chunk {
101b2ef9f5aSRalph Campbell 	struct dev_pagemap	pagemap;
102b2ef9f5aSRalph Campbell 	struct dmirror_device	*mdevice;
103ad4c3652SAlistair Popple 	bool remove;
104b2ef9f5aSRalph Campbell };
105b2ef9f5aSRalph Campbell 
106b2ef9f5aSRalph Campbell /*
107b2ef9f5aSRalph Campbell  * Per device data.
108b2ef9f5aSRalph Campbell  */
109b2ef9f5aSRalph Campbell struct dmirror_device {
110b2ef9f5aSRalph Campbell 	struct cdev		cdevice;
111188f4826SAlex Sierra 	unsigned int            zone_device_type;
1126a760f58SMika Penttilä 	struct device		device;
113b2ef9f5aSRalph Campbell 
114b2ef9f5aSRalph Campbell 	unsigned int		devmem_capacity;
115b2ef9f5aSRalph Campbell 	unsigned int		devmem_count;
116b2ef9f5aSRalph Campbell 	struct dmirror_chunk	**devmem_chunks;
117b2ef9f5aSRalph Campbell 	struct mutex		devmem_lock;	/* protects the above */
118b2ef9f5aSRalph Campbell 
119b2ef9f5aSRalph Campbell 	unsigned long		calloc;
120b2ef9f5aSRalph Campbell 	unsigned long		cfree;
121b2ef9f5aSRalph Campbell 	struct page		*free_pages;
122b2ef9f5aSRalph Campbell 	spinlock_t		lock;		/* protects the above */
123b2ef9f5aSRalph Campbell };
124b2ef9f5aSRalph Campbell 
125b2ef9f5aSRalph Campbell static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES];
126b2ef9f5aSRalph Campbell 
dmirror_bounce_init(struct dmirror_bounce * bounce,unsigned long addr,unsigned long size)127b2ef9f5aSRalph Campbell static int dmirror_bounce_init(struct dmirror_bounce *bounce,
128b2ef9f5aSRalph Campbell 			       unsigned long addr,
129b2ef9f5aSRalph Campbell 			       unsigned long size)
130b2ef9f5aSRalph Campbell {
131b2ef9f5aSRalph Campbell 	bounce->addr = addr;
132b2ef9f5aSRalph Campbell 	bounce->size = size;
133b2ef9f5aSRalph Campbell 	bounce->cpages = 0;
134b2ef9f5aSRalph Campbell 	bounce->ptr = vmalloc(size);
135b2ef9f5aSRalph Campbell 	if (!bounce->ptr)
136b2ef9f5aSRalph Campbell 		return -ENOMEM;
137b2ef9f5aSRalph Campbell 	return 0;
138b2ef9f5aSRalph Campbell }
139b2ef9f5aSRalph Campbell 
dmirror_is_private_zone(struct dmirror_device * mdevice)1404c2e0f76SAlex Sierra static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
1414c2e0f76SAlex Sierra {
1424c2e0f76SAlex Sierra 	return (mdevice->zone_device_type ==
1434c2e0f76SAlex Sierra 		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
1444c2e0f76SAlex Sierra }
1454c2e0f76SAlex Sierra 
1464c2e0f76SAlex Sierra static enum migrate_vma_direction
dmirror_select_device(struct dmirror * dmirror)1474c2e0f76SAlex Sierra dmirror_select_device(struct dmirror *dmirror)
1484c2e0f76SAlex Sierra {
1494c2e0f76SAlex Sierra 	return (dmirror->mdevice->zone_device_type ==
1504c2e0f76SAlex Sierra 		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
1514c2e0f76SAlex Sierra 		MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
1524c2e0f76SAlex Sierra 		MIGRATE_VMA_SELECT_DEVICE_COHERENT;
1534c2e0f76SAlex Sierra }
1544c2e0f76SAlex Sierra 
dmirror_bounce_fini(struct dmirror_bounce * bounce)155b2ef9f5aSRalph Campbell static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
156b2ef9f5aSRalph Campbell {
157b2ef9f5aSRalph Campbell 	vfree(bounce->ptr);
158b2ef9f5aSRalph Campbell }
159b2ef9f5aSRalph Campbell 
dmirror_fops_open(struct inode * inode,struct file * filp)160b2ef9f5aSRalph Campbell static int dmirror_fops_open(struct inode *inode, struct file *filp)
161b2ef9f5aSRalph Campbell {
162b2ef9f5aSRalph Campbell 	struct cdev *cdev = inode->i_cdev;
163b2ef9f5aSRalph Campbell 	struct dmirror *dmirror;
164b2ef9f5aSRalph Campbell 	int ret;
165b2ef9f5aSRalph Campbell 
166b2ef9f5aSRalph Campbell 	/* Mirror this process address space */
167b2ef9f5aSRalph Campbell 	dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
168b2ef9f5aSRalph Campbell 	if (dmirror == NULL)
169b2ef9f5aSRalph Campbell 		return -ENOMEM;
170b2ef9f5aSRalph Campbell 
171b2ef9f5aSRalph Campbell 	dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
172b2ef9f5aSRalph Campbell 	mutex_init(&dmirror->mutex);
173b2ef9f5aSRalph Campbell 	xa_init(&dmirror->pt);
174b2ef9f5aSRalph Campbell 
175b2ef9f5aSRalph Campbell 	ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
176b2ef9f5aSRalph Campbell 				0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
177b2ef9f5aSRalph Campbell 	if (ret) {
178b2ef9f5aSRalph Campbell 		kfree(dmirror);
179b2ef9f5aSRalph Campbell 		return ret;
180b2ef9f5aSRalph Campbell 	}
181b2ef9f5aSRalph Campbell 
182b2ef9f5aSRalph Campbell 	filp->private_data = dmirror;
183b2ef9f5aSRalph Campbell 	return 0;
184b2ef9f5aSRalph Campbell }
185b2ef9f5aSRalph Campbell 
dmirror_fops_release(struct inode * inode,struct file * filp)186b2ef9f5aSRalph Campbell static int dmirror_fops_release(struct inode *inode, struct file *filp)
187b2ef9f5aSRalph Campbell {
188b2ef9f5aSRalph Campbell 	struct dmirror *dmirror = filp->private_data;
189b2ef9f5aSRalph Campbell 
190b2ef9f5aSRalph Campbell 	mmu_interval_notifier_remove(&dmirror->notifier);
191b2ef9f5aSRalph Campbell 	xa_destroy(&dmirror->pt);
192b2ef9f5aSRalph Campbell 	kfree(dmirror);
193b2ef9f5aSRalph Campbell 	return 0;
194b2ef9f5aSRalph Campbell }
195b2ef9f5aSRalph Campbell 
dmirror_page_to_chunk(struct page * page)196ad4c3652SAlistair Popple static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
197ad4c3652SAlistair Popple {
198ad4c3652SAlistair Popple 	return container_of(page->pgmap, struct dmirror_chunk, pagemap);
199ad4c3652SAlistair Popple }
200ad4c3652SAlistair Popple 
dmirror_page_to_device(struct page * page)201b2ef9f5aSRalph Campbell static struct dmirror_device *dmirror_page_to_device(struct page *page)
202b2ef9f5aSRalph Campbell 
203b2ef9f5aSRalph Campbell {
204ad4c3652SAlistair Popple 	return dmirror_page_to_chunk(page)->mdevice;
205b2ef9f5aSRalph Campbell }
206b2ef9f5aSRalph Campbell 
dmirror_do_fault(struct dmirror * dmirror,struct hmm_range * range)207b2ef9f5aSRalph Campbell static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
208b2ef9f5aSRalph Campbell {
209b2ef9f5aSRalph Campbell 	unsigned long *pfns = range->hmm_pfns;
210b2ef9f5aSRalph Campbell 	unsigned long pfn;
211b2ef9f5aSRalph Campbell 
212b2ef9f5aSRalph Campbell 	for (pfn = (range->start >> PAGE_SHIFT);
213b2ef9f5aSRalph Campbell 	     pfn < (range->end >> PAGE_SHIFT);
214b2ef9f5aSRalph Campbell 	     pfn++, pfns++) {
215b2ef9f5aSRalph Campbell 		struct page *page;
216b2ef9f5aSRalph Campbell 		void *entry;
217b2ef9f5aSRalph Campbell 
218b2ef9f5aSRalph Campbell 		/*
219b2ef9f5aSRalph Campbell 		 * Since we asked for hmm_range_fault() to populate pages,
220b2ef9f5aSRalph Campbell 		 * it shouldn't return an error entry on success.
221b2ef9f5aSRalph Campbell 		 */
222b2ef9f5aSRalph Campbell 		WARN_ON(*pfns & HMM_PFN_ERROR);
223b2ef9f5aSRalph Campbell 		WARN_ON(!(*pfns & HMM_PFN_VALID));
224b2ef9f5aSRalph Campbell 
225b2ef9f5aSRalph Campbell 		page = hmm_pfn_to_page(*pfns);
226b2ef9f5aSRalph Campbell 		WARN_ON(!page);
227b2ef9f5aSRalph Campbell 
228b2ef9f5aSRalph Campbell 		entry = page;
229b2ef9f5aSRalph Campbell 		if (*pfns & HMM_PFN_WRITE)
230b2ef9f5aSRalph Campbell 			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
231b2ef9f5aSRalph Campbell 		else if (WARN_ON(range->default_flags & HMM_PFN_WRITE))
232b2ef9f5aSRalph Campbell 			return -EFAULT;
233b2ef9f5aSRalph Campbell 		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
234b2ef9f5aSRalph Campbell 		if (xa_is_err(entry))
235b2ef9f5aSRalph Campbell 			return xa_err(entry);
236b2ef9f5aSRalph Campbell 	}
237b2ef9f5aSRalph Campbell 
238b2ef9f5aSRalph Campbell 	return 0;
239b2ef9f5aSRalph Campbell }
240b2ef9f5aSRalph Campbell 
dmirror_do_update(struct dmirror * dmirror,unsigned long start,unsigned long end)241b2ef9f5aSRalph Campbell static void dmirror_do_update(struct dmirror *dmirror, unsigned long start,
242b2ef9f5aSRalph Campbell 			      unsigned long end)
243b2ef9f5aSRalph Campbell {
244b2ef9f5aSRalph Campbell 	unsigned long pfn;
245b2ef9f5aSRalph Campbell 	void *entry;
246b2ef9f5aSRalph Campbell 
247b2ef9f5aSRalph Campbell 	/*
248b2ef9f5aSRalph Campbell 	 * The XArray doesn't hold references to pages since it relies on
249b2ef9f5aSRalph Campbell 	 * the mmu notifier to clear page pointers when they become stale.
250b2ef9f5aSRalph Campbell 	 * Therefore, it is OK to just clear the entry.
251b2ef9f5aSRalph Campbell 	 */
252b2ef9f5aSRalph Campbell 	xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
253b2ef9f5aSRalph Campbell 			  end >> PAGE_SHIFT)
254b2ef9f5aSRalph Campbell 		xa_erase(&dmirror->pt, pfn);
255b2ef9f5aSRalph Campbell }
256b2ef9f5aSRalph Campbell 
dmirror_interval_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)257b2ef9f5aSRalph Campbell static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
258b2ef9f5aSRalph Campbell 				const struct mmu_notifier_range *range,
259b2ef9f5aSRalph Campbell 				unsigned long cur_seq)
260b2ef9f5aSRalph Campbell {
261b2ef9f5aSRalph Campbell 	struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
262b2ef9f5aSRalph Campbell 
2637d17e83aSRalph Campbell 	/*
2647d17e83aSRalph Campbell 	 * Ignore invalidation callbacks for device private pages since
2657d17e83aSRalph Campbell 	 * the invalidation is handled as part of the migration process.
2667d17e83aSRalph Campbell 	 */
2677d17e83aSRalph Campbell 	if (range->event == MMU_NOTIFY_MIGRATE &&
2686b49bf6dSAlistair Popple 	    range->owner == dmirror->mdevice)
2697d17e83aSRalph Campbell 		return true;
2707d17e83aSRalph Campbell 
271b2ef9f5aSRalph Campbell 	if (mmu_notifier_range_blockable(range))
272b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
273b2ef9f5aSRalph Campbell 	else if (!mutex_trylock(&dmirror->mutex))
274b2ef9f5aSRalph Campbell 		return false;
275b2ef9f5aSRalph Campbell 
276b2ef9f5aSRalph Campbell 	mmu_interval_set_seq(mni, cur_seq);
277b2ef9f5aSRalph Campbell 	dmirror_do_update(dmirror, range->start, range->end);
278b2ef9f5aSRalph Campbell 
279b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
280b2ef9f5aSRalph Campbell 	return true;
281b2ef9f5aSRalph Campbell }
282b2ef9f5aSRalph Campbell 
283b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_min_ops = {
284b2ef9f5aSRalph Campbell 	.invalidate = dmirror_interval_invalidate,
285b2ef9f5aSRalph Campbell };
286b2ef9f5aSRalph Campbell 
dmirror_range_fault(struct dmirror * dmirror,struct hmm_range * range)287b2ef9f5aSRalph Campbell static int dmirror_range_fault(struct dmirror *dmirror,
288b2ef9f5aSRalph Campbell 				struct hmm_range *range)
289b2ef9f5aSRalph Campbell {
290b2ef9f5aSRalph Campbell 	struct mm_struct *mm = dmirror->notifier.mm;
291b2ef9f5aSRalph Campbell 	unsigned long timeout =
292b2ef9f5aSRalph Campbell 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
293b2ef9f5aSRalph Campbell 	int ret;
294b2ef9f5aSRalph Campbell 
295b2ef9f5aSRalph Campbell 	while (true) {
296b2ef9f5aSRalph Campbell 		if (time_after(jiffies, timeout)) {
297b2ef9f5aSRalph Campbell 			ret = -EBUSY;
298b2ef9f5aSRalph Campbell 			goto out;
299b2ef9f5aSRalph Campbell 		}
300b2ef9f5aSRalph Campbell 
301b2ef9f5aSRalph Campbell 		range->notifier_seq = mmu_interval_read_begin(range->notifier);
30289154dd5SMichel Lespinasse 		mmap_read_lock(mm);
303b2ef9f5aSRalph Campbell 		ret = hmm_range_fault(range);
30489154dd5SMichel Lespinasse 		mmap_read_unlock(mm);
305b2ef9f5aSRalph Campbell 		if (ret) {
306b2ef9f5aSRalph Campbell 			if (ret == -EBUSY)
307b2ef9f5aSRalph Campbell 				continue;
308b2ef9f5aSRalph Campbell 			goto out;
309b2ef9f5aSRalph Campbell 		}
310b2ef9f5aSRalph Campbell 
311b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
312b2ef9f5aSRalph Campbell 		if (mmu_interval_read_retry(range->notifier,
313b2ef9f5aSRalph Campbell 					    range->notifier_seq)) {
314b2ef9f5aSRalph Campbell 			mutex_unlock(&dmirror->mutex);
315b2ef9f5aSRalph Campbell 			continue;
316b2ef9f5aSRalph Campbell 		}
317b2ef9f5aSRalph Campbell 		break;
318b2ef9f5aSRalph Campbell 	}
319b2ef9f5aSRalph Campbell 
320b2ef9f5aSRalph Campbell 	ret = dmirror_do_fault(dmirror, range);
321b2ef9f5aSRalph Campbell 
322b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
323b2ef9f5aSRalph Campbell out:
324b2ef9f5aSRalph Campbell 	return ret;
325b2ef9f5aSRalph Campbell }
326b2ef9f5aSRalph Campbell 
dmirror_fault(struct dmirror * dmirror,unsigned long start,unsigned long end,bool write)327b2ef9f5aSRalph Campbell static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
328b2ef9f5aSRalph Campbell 			 unsigned long end, bool write)
329b2ef9f5aSRalph Campbell {
330b2ef9f5aSRalph Campbell 	struct mm_struct *mm = dmirror->notifier.mm;
331b2ef9f5aSRalph Campbell 	unsigned long addr;
332b2ef9f5aSRalph Campbell 	unsigned long pfns[64];
333b2ef9f5aSRalph Campbell 	struct hmm_range range = {
334b2ef9f5aSRalph Campbell 		.notifier = &dmirror->notifier,
335b2ef9f5aSRalph Campbell 		.hmm_pfns = pfns,
336b2ef9f5aSRalph Campbell 		.pfn_flags_mask = 0,
337b2ef9f5aSRalph Campbell 		.default_flags =
338b2ef9f5aSRalph Campbell 			HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0),
339b2ef9f5aSRalph Campbell 		.dev_private_owner = dmirror->mdevice,
340b2ef9f5aSRalph Campbell 	};
341b2ef9f5aSRalph Campbell 	int ret = 0;
342b2ef9f5aSRalph Campbell 
343b2ef9f5aSRalph Campbell 	/* Since the mm is for the mirrored process, get a reference first. */
344b2ef9f5aSRalph Campbell 	if (!mmget_not_zero(mm))
345b2ef9f5aSRalph Campbell 		return 0;
346b2ef9f5aSRalph Campbell 
347b2ef9f5aSRalph Campbell 	for (addr = start; addr < end; addr = range.end) {
348b2ef9f5aSRalph Campbell 		range.start = addr;
349b2ef9f5aSRalph Campbell 		range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
350b2ef9f5aSRalph Campbell 
351b2ef9f5aSRalph Campbell 		ret = dmirror_range_fault(dmirror, &range);
352b2ef9f5aSRalph Campbell 		if (ret)
353b2ef9f5aSRalph Campbell 			break;
354b2ef9f5aSRalph Campbell 	}
355b2ef9f5aSRalph Campbell 
356b2ef9f5aSRalph Campbell 	mmput(mm);
357b2ef9f5aSRalph Campbell 	return ret;
358b2ef9f5aSRalph Campbell }
359b2ef9f5aSRalph Campbell 
dmirror_do_read(struct dmirror * dmirror,unsigned long start,unsigned long end,struct dmirror_bounce * bounce)360b2ef9f5aSRalph Campbell static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
361b2ef9f5aSRalph Campbell 			   unsigned long end, struct dmirror_bounce *bounce)
362b2ef9f5aSRalph Campbell {
363b2ef9f5aSRalph Campbell 	unsigned long pfn;
364b2ef9f5aSRalph Campbell 	void *ptr;
365b2ef9f5aSRalph Campbell 
366b2ef9f5aSRalph Campbell 	ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
367b2ef9f5aSRalph Campbell 
368b2ef9f5aSRalph Campbell 	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
369b2ef9f5aSRalph Campbell 		void *entry;
370b2ef9f5aSRalph Campbell 		struct page *page;
371b2ef9f5aSRalph Campbell 
372b2ef9f5aSRalph Campbell 		entry = xa_load(&dmirror->pt, pfn);
373b2ef9f5aSRalph Campbell 		page = xa_untag_pointer(entry);
374b2ef9f5aSRalph Campbell 		if (!page)
375b2ef9f5aSRalph Campbell 			return -ENOENT;
376b2ef9f5aSRalph Campbell 
377*ae96e0cdSSumitra Sharma 		memcpy_from_page(ptr, page, 0, PAGE_SIZE);
378b2ef9f5aSRalph Campbell 
379b2ef9f5aSRalph Campbell 		ptr += PAGE_SIZE;
380b2ef9f5aSRalph Campbell 		bounce->cpages++;
381b2ef9f5aSRalph Campbell 	}
382b2ef9f5aSRalph Campbell 
383b2ef9f5aSRalph Campbell 	return 0;
384b2ef9f5aSRalph Campbell }
385b2ef9f5aSRalph Campbell 
dmirror_read(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)386b2ef9f5aSRalph Campbell static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
387b2ef9f5aSRalph Campbell {
388b2ef9f5aSRalph Campbell 	struct dmirror_bounce bounce;
389b2ef9f5aSRalph Campbell 	unsigned long start, end;
390b2ef9f5aSRalph Campbell 	unsigned long size = cmd->npages << PAGE_SHIFT;
391b2ef9f5aSRalph Campbell 	int ret;
392b2ef9f5aSRalph Campbell 
393b2ef9f5aSRalph Campbell 	start = cmd->addr;
394b2ef9f5aSRalph Campbell 	end = start + size;
395b2ef9f5aSRalph Campbell 	if (end < start)
396b2ef9f5aSRalph Campbell 		return -EINVAL;
397b2ef9f5aSRalph Campbell 
398b2ef9f5aSRalph Campbell 	ret = dmirror_bounce_init(&bounce, start, size);
399b2ef9f5aSRalph Campbell 	if (ret)
400b2ef9f5aSRalph Campbell 		return ret;
401b2ef9f5aSRalph Campbell 
402b2ef9f5aSRalph Campbell 	while (1) {
403b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
404b2ef9f5aSRalph Campbell 		ret = dmirror_do_read(dmirror, start, end, &bounce);
405b2ef9f5aSRalph Campbell 		mutex_unlock(&dmirror->mutex);
406b2ef9f5aSRalph Campbell 		if (ret != -ENOENT)
407b2ef9f5aSRalph Campbell 			break;
408b2ef9f5aSRalph Campbell 
409b2ef9f5aSRalph Campbell 		start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
410b2ef9f5aSRalph Campbell 		ret = dmirror_fault(dmirror, start, end, false);
411b2ef9f5aSRalph Campbell 		if (ret)
412b2ef9f5aSRalph Campbell 			break;
413b2ef9f5aSRalph Campbell 		cmd->faults++;
414b2ef9f5aSRalph Campbell 	}
415b2ef9f5aSRalph Campbell 
416b2ef9f5aSRalph Campbell 	if (ret == 0) {
417b2ef9f5aSRalph Campbell 		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
418b2ef9f5aSRalph Campbell 				 bounce.size))
419b2ef9f5aSRalph Campbell 			ret = -EFAULT;
420b2ef9f5aSRalph Campbell 	}
421b2ef9f5aSRalph Campbell 	cmd->cpages = bounce.cpages;
422b2ef9f5aSRalph Campbell 	dmirror_bounce_fini(&bounce);
423b2ef9f5aSRalph Campbell 	return ret;
424b2ef9f5aSRalph Campbell }
425b2ef9f5aSRalph Campbell 
dmirror_do_write(struct dmirror * dmirror,unsigned long start,unsigned long end,struct dmirror_bounce * bounce)426b2ef9f5aSRalph Campbell static int dmirror_do_write(struct dmirror *dmirror, unsigned long start,
427b2ef9f5aSRalph Campbell 			    unsigned long end, struct dmirror_bounce *bounce)
428b2ef9f5aSRalph Campbell {
429b2ef9f5aSRalph Campbell 	unsigned long pfn;
430b2ef9f5aSRalph Campbell 	void *ptr;
431b2ef9f5aSRalph Campbell 
432b2ef9f5aSRalph Campbell 	ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
433b2ef9f5aSRalph Campbell 
434b2ef9f5aSRalph Campbell 	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
435b2ef9f5aSRalph Campbell 		void *entry;
436b2ef9f5aSRalph Campbell 		struct page *page;
437b2ef9f5aSRalph Campbell 
438b2ef9f5aSRalph Campbell 		entry = xa_load(&dmirror->pt, pfn);
439b2ef9f5aSRalph Campbell 		page = xa_untag_pointer(entry);
440b2ef9f5aSRalph Campbell 		if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
441b2ef9f5aSRalph Campbell 			return -ENOENT;
442b2ef9f5aSRalph Campbell 
443*ae96e0cdSSumitra Sharma 		memcpy_to_page(page, 0, ptr, PAGE_SIZE);
444b2ef9f5aSRalph Campbell 
445b2ef9f5aSRalph Campbell 		ptr += PAGE_SIZE;
446b2ef9f5aSRalph Campbell 		bounce->cpages++;
447b2ef9f5aSRalph Campbell 	}
448b2ef9f5aSRalph Campbell 
449b2ef9f5aSRalph Campbell 	return 0;
450b2ef9f5aSRalph Campbell }
451b2ef9f5aSRalph Campbell 
dmirror_write(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)452b2ef9f5aSRalph Campbell static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
453b2ef9f5aSRalph Campbell {
454b2ef9f5aSRalph Campbell 	struct dmirror_bounce bounce;
455b2ef9f5aSRalph Campbell 	unsigned long start, end;
456b2ef9f5aSRalph Campbell 	unsigned long size = cmd->npages << PAGE_SHIFT;
457b2ef9f5aSRalph Campbell 	int ret;
458b2ef9f5aSRalph Campbell 
459b2ef9f5aSRalph Campbell 	start = cmd->addr;
460b2ef9f5aSRalph Campbell 	end = start + size;
461b2ef9f5aSRalph Campbell 	if (end < start)
462b2ef9f5aSRalph Campbell 		return -EINVAL;
463b2ef9f5aSRalph Campbell 
464b2ef9f5aSRalph Campbell 	ret = dmirror_bounce_init(&bounce, start, size);
465b2ef9f5aSRalph Campbell 	if (ret)
466b2ef9f5aSRalph Campbell 		return ret;
467b2ef9f5aSRalph Campbell 	if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
468b2ef9f5aSRalph Campbell 			   bounce.size)) {
469b2ef9f5aSRalph Campbell 		ret = -EFAULT;
470b2ef9f5aSRalph Campbell 		goto fini;
471b2ef9f5aSRalph Campbell 	}
472b2ef9f5aSRalph Campbell 
473b2ef9f5aSRalph Campbell 	while (1) {
474b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
475b2ef9f5aSRalph Campbell 		ret = dmirror_do_write(dmirror, start, end, &bounce);
476b2ef9f5aSRalph Campbell 		mutex_unlock(&dmirror->mutex);
477b2ef9f5aSRalph Campbell 		if (ret != -ENOENT)
478b2ef9f5aSRalph Campbell 			break;
479b2ef9f5aSRalph Campbell 
480b2ef9f5aSRalph Campbell 		start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
481b2ef9f5aSRalph Campbell 		ret = dmirror_fault(dmirror, start, end, true);
482b2ef9f5aSRalph Campbell 		if (ret)
483b2ef9f5aSRalph Campbell 			break;
484b2ef9f5aSRalph Campbell 		cmd->faults++;
485b2ef9f5aSRalph Campbell 	}
486b2ef9f5aSRalph Campbell 
487b2ef9f5aSRalph Campbell fini:
488b2ef9f5aSRalph Campbell 	cmd->cpages = bounce.cpages;
489b2ef9f5aSRalph Campbell 	dmirror_bounce_fini(&bounce);
490b2ef9f5aSRalph Campbell 	return ret;
491b2ef9f5aSRalph Campbell }
492b2ef9f5aSRalph Campbell 
dmirror_allocate_chunk(struct dmirror_device * mdevice,struct page ** ppage)49325b80162SAlex Sierra static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
494b2ef9f5aSRalph Campbell 				   struct page **ppage)
495b2ef9f5aSRalph Campbell {
496b2ef9f5aSRalph Campbell 	struct dmirror_chunk *devmem;
49725b80162SAlex Sierra 	struct resource *res = NULL;
498b2ef9f5aSRalph Campbell 	unsigned long pfn;
499b2ef9f5aSRalph Campbell 	unsigned long pfn_first;
500b2ef9f5aSRalph Campbell 	unsigned long pfn_last;
501b2ef9f5aSRalph Campbell 	void *ptr;
50225b80162SAlex Sierra 	int ret = -ENOMEM;
503b2ef9f5aSRalph Campbell 
504a4574f63SDan Williams 	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
505a4574f63SDan Williams 	if (!devmem)
50625b80162SAlex Sierra 		return ret;
507a4574f63SDan Williams 
50825b80162SAlex Sierra 	switch (mdevice->zone_device_type) {
50925b80162SAlex Sierra 	case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
510a4574f63SDan Williams 		res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
511a4574f63SDan Williams 					      "hmm_dmirror");
51225b80162SAlex Sierra 		if (IS_ERR_OR_NULL(res))
513a4574f63SDan Williams 			goto err_devmem;
514a4574f63SDan Williams 		devmem->pagemap.range.start = res->start;
515a4574f63SDan Williams 		devmem->pagemap.range.end = res->end;
51625b80162SAlex Sierra 		devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
51725b80162SAlex Sierra 		break;
51825b80162SAlex Sierra 	case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
51925b80162SAlex Sierra 		devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ?
52025b80162SAlex Sierra 							spm_addr_dev0 :
52125b80162SAlex Sierra 							spm_addr_dev1;
52225b80162SAlex Sierra 		devmem->pagemap.range.end = devmem->pagemap.range.start +
52325b80162SAlex Sierra 					    DEVMEM_CHUNK_SIZE - 1;
52425b80162SAlex Sierra 		devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
52525b80162SAlex Sierra 		break;
52625b80162SAlex Sierra 	default:
52725b80162SAlex Sierra 		ret = -EINVAL;
52825b80162SAlex Sierra 		goto err_devmem;
52925b80162SAlex Sierra 	}
53025b80162SAlex Sierra 
531b7b3c01bSDan Williams 	devmem->pagemap.nr_range = 1;
532a4574f63SDan Williams 	devmem->pagemap.ops = &dmirror_devmem_ops;
533a4574f63SDan Williams 	devmem->pagemap.owner = mdevice;
534a4574f63SDan Williams 
535b2ef9f5aSRalph Campbell 	mutex_lock(&mdevice->devmem_lock);
536b2ef9f5aSRalph Campbell 
537b2ef9f5aSRalph Campbell 	if (mdevice->devmem_count == mdevice->devmem_capacity) {
538b2ef9f5aSRalph Campbell 		struct dmirror_chunk **new_chunks;
539b2ef9f5aSRalph Campbell 		unsigned int new_capacity;
540b2ef9f5aSRalph Campbell 
541b2ef9f5aSRalph Campbell 		new_capacity = mdevice->devmem_capacity +
542b2ef9f5aSRalph Campbell 				DEVMEM_CHUNKS_RESERVE;
543b2ef9f5aSRalph Campbell 		new_chunks = krealloc(mdevice->devmem_chunks,
544b2ef9f5aSRalph Campbell 				sizeof(new_chunks[0]) * new_capacity,
545b2ef9f5aSRalph Campbell 				GFP_KERNEL);
546b2ef9f5aSRalph Campbell 		if (!new_chunks)
547a4574f63SDan Williams 			goto err_release;
548b2ef9f5aSRalph Campbell 		mdevice->devmem_capacity = new_capacity;
549b2ef9f5aSRalph Campbell 		mdevice->devmem_chunks = new_chunks;
550b2ef9f5aSRalph Campbell 	}
551b2ef9f5aSRalph Campbell 	ptr = memremap_pages(&devmem->pagemap, numa_node_id());
55225b80162SAlex Sierra 	if (IS_ERR_OR_NULL(ptr)) {
55325b80162SAlex Sierra 		if (ptr)
55425b80162SAlex Sierra 			ret = PTR_ERR(ptr);
55525b80162SAlex Sierra 		else
55625b80162SAlex Sierra 			ret = -EFAULT;
557a4574f63SDan Williams 		goto err_release;
55825b80162SAlex Sierra 	}
559b2ef9f5aSRalph Campbell 
560b2ef9f5aSRalph Campbell 	devmem->mdevice = mdevice;
561a4574f63SDan Williams 	pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
562a4574f63SDan Williams 	pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
563b2ef9f5aSRalph Campbell 	mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
564b2ef9f5aSRalph Campbell 
565b2ef9f5aSRalph Campbell 	mutex_unlock(&mdevice->devmem_lock);
566b2ef9f5aSRalph Campbell 
567b2ef9f5aSRalph Campbell 	pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
568b2ef9f5aSRalph Campbell 		DEVMEM_CHUNK_SIZE / (1024 * 1024),
569b2ef9f5aSRalph Campbell 		mdevice->devmem_count,
570b2ef9f5aSRalph Campbell 		mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)),
571b2ef9f5aSRalph Campbell 		pfn_first, pfn_last);
572b2ef9f5aSRalph Campbell 
573b2ef9f5aSRalph Campbell 	spin_lock(&mdevice->lock);
574b2ef9f5aSRalph Campbell 	for (pfn = pfn_first; pfn < pfn_last; pfn++) {
575b2ef9f5aSRalph Campbell 		struct page *page = pfn_to_page(pfn);
576b2ef9f5aSRalph Campbell 
577b2ef9f5aSRalph Campbell 		page->zone_device_data = mdevice->free_pages;
578b2ef9f5aSRalph Campbell 		mdevice->free_pages = page;
579b2ef9f5aSRalph Campbell 	}
580b2ef9f5aSRalph Campbell 	if (ppage) {
581b2ef9f5aSRalph Campbell 		*ppage = mdevice->free_pages;
582b2ef9f5aSRalph Campbell 		mdevice->free_pages = (*ppage)->zone_device_data;
583b2ef9f5aSRalph Campbell 		mdevice->calloc++;
584b2ef9f5aSRalph Campbell 	}
585b2ef9f5aSRalph Campbell 	spin_unlock(&mdevice->lock);
586b2ef9f5aSRalph Campbell 
58725b80162SAlex Sierra 	return 0;
588b2ef9f5aSRalph Campbell 
589b2ef9f5aSRalph Campbell err_release:
590b2ef9f5aSRalph Campbell 	mutex_unlock(&mdevice->devmem_lock);
59125b80162SAlex Sierra 	if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
59225b80162SAlex Sierra 		release_mem_region(devmem->pagemap.range.start,
59325b80162SAlex Sierra 				   range_len(&devmem->pagemap.range));
594a4574f63SDan Williams err_devmem:
595a4574f63SDan Williams 	kfree(devmem);
596a4574f63SDan Williams 
59725b80162SAlex Sierra 	return ret;
598b2ef9f5aSRalph Campbell }
599b2ef9f5aSRalph Campbell 
dmirror_devmem_alloc_page(struct dmirror_device * mdevice)600b2ef9f5aSRalph Campbell static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
601b2ef9f5aSRalph Campbell {
602b2ef9f5aSRalph Campbell 	struct page *dpage = NULL;
6034c2e0f76SAlex Sierra 	struct page *rpage = NULL;
604b2ef9f5aSRalph Campbell 
605b2ef9f5aSRalph Campbell 	/*
6064c2e0f76SAlex Sierra 	 * For ZONE_DEVICE private type, this is a fake device so we allocate
6074c2e0f76SAlex Sierra 	 * real system memory to store our device memory.
6084c2e0f76SAlex Sierra 	 * For ZONE_DEVICE coherent type we use the actual dpage to store the
6094c2e0f76SAlex Sierra 	 * data and ignore rpage.
610b2ef9f5aSRalph Campbell 	 */
6114c2e0f76SAlex Sierra 	if (dmirror_is_private_zone(mdevice)) {
612b2ef9f5aSRalph Campbell 		rpage = alloc_page(GFP_HIGHUSER);
613b2ef9f5aSRalph Campbell 		if (!rpage)
614b2ef9f5aSRalph Campbell 			return NULL;
6154c2e0f76SAlex Sierra 	}
616b2ef9f5aSRalph Campbell 	spin_lock(&mdevice->lock);
617b2ef9f5aSRalph Campbell 
618b2ef9f5aSRalph Campbell 	if (mdevice->free_pages) {
619b2ef9f5aSRalph Campbell 		dpage = mdevice->free_pages;
620b2ef9f5aSRalph Campbell 		mdevice->free_pages = dpage->zone_device_data;
621b2ef9f5aSRalph Campbell 		mdevice->calloc++;
622b2ef9f5aSRalph Campbell 		spin_unlock(&mdevice->lock);
623b2ef9f5aSRalph Campbell 	} else {
624b2ef9f5aSRalph Campbell 		spin_unlock(&mdevice->lock);
62525b80162SAlex Sierra 		if (dmirror_allocate_chunk(mdevice, &dpage))
626b2ef9f5aSRalph Campbell 			goto error;
627b2ef9f5aSRalph Campbell 	}
628b2ef9f5aSRalph Campbell 
629ef233450SAlistair Popple 	zone_device_page_init(dpage);
630b2ef9f5aSRalph Campbell 	dpage->zone_device_data = rpage;
631b2ef9f5aSRalph Campbell 	return dpage;
632b2ef9f5aSRalph Campbell 
633b2ef9f5aSRalph Campbell error:
6344c2e0f76SAlex Sierra 	if (rpage)
635b2ef9f5aSRalph Campbell 		__free_page(rpage);
636b2ef9f5aSRalph Campbell 	return NULL;
637b2ef9f5aSRalph Campbell }
638b2ef9f5aSRalph Campbell 
dmirror_migrate_alloc_and_copy(struct migrate_vma * args,struct dmirror * dmirror)639b2ef9f5aSRalph Campbell static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
640b2ef9f5aSRalph Campbell 					   struct dmirror *dmirror)
641b2ef9f5aSRalph Campbell {
642b2ef9f5aSRalph Campbell 	struct dmirror_device *mdevice = dmirror->mdevice;
643b2ef9f5aSRalph Campbell 	const unsigned long *src = args->src;
644b2ef9f5aSRalph Campbell 	unsigned long *dst = args->dst;
645b2ef9f5aSRalph Campbell 	unsigned long addr;
646b2ef9f5aSRalph Campbell 
647b2ef9f5aSRalph Campbell 	for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
648b2ef9f5aSRalph Campbell 						   src++, dst++) {
649b2ef9f5aSRalph Campbell 		struct page *spage;
650b2ef9f5aSRalph Campbell 		struct page *dpage;
651b2ef9f5aSRalph Campbell 		struct page *rpage;
652b2ef9f5aSRalph Campbell 
653b2ef9f5aSRalph Campbell 		if (!(*src & MIGRATE_PFN_MIGRATE))
654b2ef9f5aSRalph Campbell 			continue;
655b2ef9f5aSRalph Campbell 
656b2ef9f5aSRalph Campbell 		/*
657b2ef9f5aSRalph Campbell 		 * Note that spage might be NULL which is OK since it is an
658b2ef9f5aSRalph Campbell 		 * unallocated pte_none() or read-only zero page.
659b2ef9f5aSRalph Campbell 		 */
660b2ef9f5aSRalph Campbell 		spage = migrate_pfn_to_page(*src);
6614c2e0f76SAlex Sierra 		if (WARN(spage && is_zone_device_page(spage),
6624c2e0f76SAlex Sierra 		     "page already in device spage pfn: 0x%lx\n",
6634c2e0f76SAlex Sierra 		     page_to_pfn(spage)))
6644c2e0f76SAlex Sierra 			continue;
665b2ef9f5aSRalph Campbell 
666b2ef9f5aSRalph Campbell 		dpage = dmirror_devmem_alloc_page(mdevice);
667b2ef9f5aSRalph Campbell 		if (!dpage)
668b2ef9f5aSRalph Campbell 			continue;
669b2ef9f5aSRalph Campbell 
6704c2e0f76SAlex Sierra 		rpage = BACKING_PAGE(dpage);
671b2ef9f5aSRalph Campbell 		if (spage)
672b2ef9f5aSRalph Campbell 			copy_highpage(rpage, spage);
673b2ef9f5aSRalph Campbell 		else
674b2ef9f5aSRalph Campbell 			clear_highpage(rpage);
675b2ef9f5aSRalph Campbell 
676b2ef9f5aSRalph Campbell 		/*
677b2ef9f5aSRalph Campbell 		 * Normally, a device would use the page->zone_device_data to
678b2ef9f5aSRalph Campbell 		 * point to the mirror but here we use it to hold the page for
679b2ef9f5aSRalph Campbell 		 * the simulated device memory and that page holds the pointer
680b2ef9f5aSRalph Campbell 		 * to the mirror.
681b2ef9f5aSRalph Campbell 		 */
682b2ef9f5aSRalph Campbell 		rpage->zone_device_data = dmirror;
683b2ef9f5aSRalph Campbell 
6844c2e0f76SAlex Sierra 		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
6854c2e0f76SAlex Sierra 			 page_to_pfn(spage), page_to_pfn(dpage));
686ab09243aSAlistair Popple 		*dst = migrate_pfn(page_to_pfn(dpage));
687b2ef9f5aSRalph Campbell 		if ((*src & MIGRATE_PFN_WRITE) ||
688b2ef9f5aSRalph Campbell 		    (!spage && args->vma->vm_flags & VM_WRITE))
689b2ef9f5aSRalph Campbell 			*dst |= MIGRATE_PFN_WRITE;
690b2ef9f5aSRalph Campbell 	}
691b2ef9f5aSRalph Campbell }
692b2ef9f5aSRalph Campbell 
dmirror_check_atomic(struct dmirror * dmirror,unsigned long start,unsigned long end)693b659baeaSAlistair Popple static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
694b659baeaSAlistair Popple 			     unsigned long end)
695b659baeaSAlistair Popple {
696b659baeaSAlistair Popple 	unsigned long pfn;
697b659baeaSAlistair Popple 
698b659baeaSAlistair Popple 	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
699b659baeaSAlistair Popple 		void *entry;
700b659baeaSAlistair Popple 
701b659baeaSAlistair Popple 		entry = xa_load(&dmirror->pt, pfn);
702b659baeaSAlistair Popple 		if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC)
703b659baeaSAlistair Popple 			return -EPERM;
704b659baeaSAlistair Popple 	}
705b659baeaSAlistair Popple 
706b659baeaSAlistair Popple 	return 0;
707b659baeaSAlistair Popple }
708b659baeaSAlistair Popple 
dmirror_atomic_map(unsigned long start,unsigned long end,struct page ** pages,struct dmirror * dmirror)709b659baeaSAlistair Popple static int dmirror_atomic_map(unsigned long start, unsigned long end,
710b659baeaSAlistair Popple 			      struct page **pages, struct dmirror *dmirror)
711b659baeaSAlistair Popple {
712b659baeaSAlistair Popple 	unsigned long pfn, mapped = 0;
713b659baeaSAlistair Popple 	int i;
714b659baeaSAlistair Popple 
715b659baeaSAlistair Popple 	/* Map the migrated pages into the device's page tables. */
716b659baeaSAlistair Popple 	mutex_lock(&dmirror->mutex);
717b659baeaSAlistair Popple 
718b659baeaSAlistair Popple 	for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
719b659baeaSAlistair Popple 		void *entry;
720b659baeaSAlistair Popple 
721b659baeaSAlistair Popple 		if (!pages[i])
722b659baeaSAlistair Popple 			continue;
723b659baeaSAlistair Popple 
724b659baeaSAlistair Popple 		entry = pages[i];
725b659baeaSAlistair Popple 		entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
726b659baeaSAlistair Popple 		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
727b659baeaSAlistair Popple 		if (xa_is_err(entry)) {
728b659baeaSAlistair Popple 			mutex_unlock(&dmirror->mutex);
729b659baeaSAlistair Popple 			return xa_err(entry);
730b659baeaSAlistair Popple 		}
731b659baeaSAlistair Popple 
732b659baeaSAlistair Popple 		mapped++;
733b659baeaSAlistair Popple 	}
734b659baeaSAlistair Popple 
735b659baeaSAlistair Popple 	mutex_unlock(&dmirror->mutex);
736b659baeaSAlistair Popple 	return mapped;
737b659baeaSAlistair Popple }
738b659baeaSAlistair Popple 
dmirror_migrate_finalize_and_map(struct migrate_vma * args,struct dmirror * dmirror)739b2ef9f5aSRalph Campbell static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
740b2ef9f5aSRalph Campbell 					    struct dmirror *dmirror)
741b2ef9f5aSRalph Campbell {
742b2ef9f5aSRalph Campbell 	unsigned long start = args->start;
743b2ef9f5aSRalph Campbell 	unsigned long end = args->end;
744b2ef9f5aSRalph Campbell 	const unsigned long *src = args->src;
745b2ef9f5aSRalph Campbell 	const unsigned long *dst = args->dst;
746b2ef9f5aSRalph Campbell 	unsigned long pfn;
747b2ef9f5aSRalph Campbell 
748b2ef9f5aSRalph Campbell 	/* Map the migrated pages into the device's page tables. */
749b2ef9f5aSRalph Campbell 	mutex_lock(&dmirror->mutex);
750b2ef9f5aSRalph Campbell 
751b2ef9f5aSRalph Campbell 	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
752b2ef9f5aSRalph Campbell 								src++, dst++) {
753b2ef9f5aSRalph Campbell 		struct page *dpage;
754b2ef9f5aSRalph Campbell 		void *entry;
755b2ef9f5aSRalph Campbell 
756b2ef9f5aSRalph Campbell 		if (!(*src & MIGRATE_PFN_MIGRATE))
757b2ef9f5aSRalph Campbell 			continue;
758b2ef9f5aSRalph Campbell 
759b2ef9f5aSRalph Campbell 		dpage = migrate_pfn_to_page(*dst);
760b2ef9f5aSRalph Campbell 		if (!dpage)
761b2ef9f5aSRalph Campbell 			continue;
762b2ef9f5aSRalph Campbell 
7634c2e0f76SAlex Sierra 		entry = BACKING_PAGE(dpage);
764b2ef9f5aSRalph Campbell 		if (*dst & MIGRATE_PFN_WRITE)
765b2ef9f5aSRalph Campbell 			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
766b2ef9f5aSRalph Campbell 		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
767b2ef9f5aSRalph Campbell 		if (xa_is_err(entry)) {
768b2ef9f5aSRalph Campbell 			mutex_unlock(&dmirror->mutex);
769b2ef9f5aSRalph Campbell 			return xa_err(entry);
770b2ef9f5aSRalph Campbell 		}
771b2ef9f5aSRalph Campbell 	}
772b2ef9f5aSRalph Campbell 
773b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
774b2ef9f5aSRalph Campbell 	return 0;
775b2ef9f5aSRalph Campbell }
776b2ef9f5aSRalph Campbell 
dmirror_exclusive(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)777b659baeaSAlistair Popple static int dmirror_exclusive(struct dmirror *dmirror,
778b659baeaSAlistair Popple 			     struct hmm_dmirror_cmd *cmd)
779b659baeaSAlistair Popple {
780b659baeaSAlistair Popple 	unsigned long start, end, addr;
781b659baeaSAlistair Popple 	unsigned long size = cmd->npages << PAGE_SHIFT;
782b659baeaSAlistair Popple 	struct mm_struct *mm = dmirror->notifier.mm;
783b659baeaSAlistair Popple 	struct page *pages[64];
784b659baeaSAlistair Popple 	struct dmirror_bounce bounce;
785b659baeaSAlistair Popple 	unsigned long next;
786b659baeaSAlistair Popple 	int ret;
787b659baeaSAlistair Popple 
788b659baeaSAlistair Popple 	start = cmd->addr;
789b659baeaSAlistair Popple 	end = start + size;
790b659baeaSAlistair Popple 	if (end < start)
791b659baeaSAlistair Popple 		return -EINVAL;
792b659baeaSAlistair Popple 
793b659baeaSAlistair Popple 	/* Since the mm is for the mirrored process, get a reference first. */
794b659baeaSAlistair Popple 	if (!mmget_not_zero(mm))
795b659baeaSAlistair Popple 		return -EINVAL;
796b659baeaSAlistair Popple 
797b659baeaSAlistair Popple 	mmap_read_lock(mm);
798b659baeaSAlistair Popple 	for (addr = start; addr < end; addr = next) {
799ed913b05SMiaohe Lin 		unsigned long mapped = 0;
800b659baeaSAlistair Popple 		int i;
801b659baeaSAlistair Popple 
802b659baeaSAlistair Popple 		if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
803b659baeaSAlistair Popple 			next = end;
804b659baeaSAlistair Popple 		else
805b659baeaSAlistair Popple 			next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
806b659baeaSAlistair Popple 
807b659baeaSAlistair Popple 		ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
808ed913b05SMiaohe Lin 		/*
809ed913b05SMiaohe Lin 		 * Do dmirror_atomic_map() iff all pages are marked for
810ed913b05SMiaohe Lin 		 * exclusive access to avoid accessing uninitialized
811ed913b05SMiaohe Lin 		 * fields of pages.
812ed913b05SMiaohe Lin 		 */
813ed913b05SMiaohe Lin 		if (ret == (next - addr) >> PAGE_SHIFT)
814b659baeaSAlistair Popple 			mapped = dmirror_atomic_map(addr, next, pages, dmirror);
815b659baeaSAlistair Popple 		for (i = 0; i < ret; i++) {
816b659baeaSAlistair Popple 			if (pages[i]) {
817b659baeaSAlistair Popple 				unlock_page(pages[i]);
818b659baeaSAlistair Popple 				put_page(pages[i]);
819b659baeaSAlistair Popple 			}
820b659baeaSAlistair Popple 		}
821b659baeaSAlistair Popple 
822b659baeaSAlistair Popple 		if (addr + (mapped << PAGE_SHIFT) < next) {
823b659baeaSAlistair Popple 			mmap_read_unlock(mm);
824b659baeaSAlistair Popple 			mmput(mm);
825b659baeaSAlistair Popple 			return -EBUSY;
826b659baeaSAlistair Popple 		}
827b659baeaSAlistair Popple 	}
828b659baeaSAlistair Popple 	mmap_read_unlock(mm);
829b659baeaSAlistair Popple 	mmput(mm);
830b659baeaSAlistair Popple 
831b659baeaSAlistair Popple 	/* Return the migrated data for verification. */
832b659baeaSAlistair Popple 	ret = dmirror_bounce_init(&bounce, start, size);
833b659baeaSAlistair Popple 	if (ret)
834b659baeaSAlistair Popple 		return ret;
835b659baeaSAlistair Popple 	mutex_lock(&dmirror->mutex);
836b659baeaSAlistair Popple 	ret = dmirror_do_read(dmirror, start, end, &bounce);
837b659baeaSAlistair Popple 	mutex_unlock(&dmirror->mutex);
838b659baeaSAlistair Popple 	if (ret == 0) {
839b659baeaSAlistair Popple 		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
840b659baeaSAlistair Popple 				 bounce.size))
841b659baeaSAlistair Popple 			ret = -EFAULT;
842b659baeaSAlistair Popple 	}
843b659baeaSAlistair Popple 
844b659baeaSAlistair Popple 	cmd->cpages = bounce.cpages;
845b659baeaSAlistair Popple 	dmirror_bounce_fini(&bounce);
846b659baeaSAlistair Popple 	return ret;
847b659baeaSAlistair Popple }
848b659baeaSAlistair Popple 
dmirror_devmem_fault_alloc_and_copy(struct migrate_vma * args,struct dmirror * dmirror)8494c2e0f76SAlex Sierra static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
8504c2e0f76SAlex Sierra 						      struct dmirror *dmirror)
8514c2e0f76SAlex Sierra {
8524c2e0f76SAlex Sierra 	const unsigned long *src = args->src;
8534c2e0f76SAlex Sierra 	unsigned long *dst = args->dst;
8544c2e0f76SAlex Sierra 	unsigned long start = args->start;
8554c2e0f76SAlex Sierra 	unsigned long end = args->end;
8564c2e0f76SAlex Sierra 	unsigned long addr;
8574c2e0f76SAlex Sierra 
8584c2e0f76SAlex Sierra 	for (addr = start; addr < end; addr += PAGE_SIZE,
8594c2e0f76SAlex Sierra 				       src++, dst++) {
8604c2e0f76SAlex Sierra 		struct page *dpage, *spage;
8614c2e0f76SAlex Sierra 
8624c2e0f76SAlex Sierra 		spage = migrate_pfn_to_page(*src);
8634c2e0f76SAlex Sierra 		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
8644c2e0f76SAlex Sierra 			continue;
8654c2e0f76SAlex Sierra 
8664c2e0f76SAlex Sierra 		if (WARN_ON(!is_device_private_page(spage) &&
8674c2e0f76SAlex Sierra 			    !is_device_coherent_page(spage)))
8684c2e0f76SAlex Sierra 			continue;
8694c2e0f76SAlex Sierra 		spage = BACKING_PAGE(spage);
8704c2e0f76SAlex Sierra 		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
8714c2e0f76SAlex Sierra 		if (!dpage)
8724c2e0f76SAlex Sierra 			continue;
8734c2e0f76SAlex Sierra 		pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
8744c2e0f76SAlex Sierra 			 page_to_pfn(spage), page_to_pfn(dpage));
8754c2e0f76SAlex Sierra 
8764c2e0f76SAlex Sierra 		lock_page(dpage);
8774c2e0f76SAlex Sierra 		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
8784c2e0f76SAlex Sierra 		copy_highpage(dpage, spage);
8794c2e0f76SAlex Sierra 		*dst = migrate_pfn(page_to_pfn(dpage));
8804c2e0f76SAlex Sierra 		if (*src & MIGRATE_PFN_WRITE)
8814c2e0f76SAlex Sierra 			*dst |= MIGRATE_PFN_WRITE;
8824c2e0f76SAlex Sierra 	}
8834c2e0f76SAlex Sierra 	return 0;
8844c2e0f76SAlex Sierra }
8854c2e0f76SAlex Sierra 
8864c2e0f76SAlex Sierra static unsigned long
dmirror_successful_migrated_pages(struct migrate_vma * migrate)8874c2e0f76SAlex Sierra dmirror_successful_migrated_pages(struct migrate_vma *migrate)
8884c2e0f76SAlex Sierra {
8894c2e0f76SAlex Sierra 	unsigned long cpages = 0;
8904c2e0f76SAlex Sierra 	unsigned long i;
8914c2e0f76SAlex Sierra 
8924c2e0f76SAlex Sierra 	for (i = 0; i < migrate->npages; i++) {
8934c2e0f76SAlex Sierra 		if (migrate->src[i] & MIGRATE_PFN_VALID &&
8944c2e0f76SAlex Sierra 		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
8954c2e0f76SAlex Sierra 			cpages++;
8964c2e0f76SAlex Sierra 	}
8974c2e0f76SAlex Sierra 	return cpages;
8984c2e0f76SAlex Sierra }
8994c2e0f76SAlex Sierra 
dmirror_migrate_to_system(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)9004c2e0f76SAlex Sierra static int dmirror_migrate_to_system(struct dmirror *dmirror,
901b2ef9f5aSRalph Campbell 				     struct hmm_dmirror_cmd *cmd)
902b2ef9f5aSRalph Campbell {
903b2ef9f5aSRalph Campbell 	unsigned long start, end, addr;
904b2ef9f5aSRalph Campbell 	unsigned long size = cmd->npages << PAGE_SHIFT;
905b2ef9f5aSRalph Campbell 	struct mm_struct *mm = dmirror->notifier.mm;
906b2ef9f5aSRalph Campbell 	struct vm_area_struct *vma;
9074c2e0f76SAlex Sierra 	unsigned long src_pfns[64] = { 0 };
9084c2e0f76SAlex Sierra 	unsigned long dst_pfns[64] = { 0 };
90916ce101dSAlistair Popple 	struct migrate_vma args = { 0 };
9104c2e0f76SAlex Sierra 	unsigned long next;
9114c2e0f76SAlex Sierra 	int ret;
9124c2e0f76SAlex Sierra 
9134c2e0f76SAlex Sierra 	start = cmd->addr;
9144c2e0f76SAlex Sierra 	end = start + size;
9154c2e0f76SAlex Sierra 	if (end < start)
9164c2e0f76SAlex Sierra 		return -EINVAL;
9174c2e0f76SAlex Sierra 
9184c2e0f76SAlex Sierra 	/* Since the mm is for the mirrored process, get a reference first. */
9194c2e0f76SAlex Sierra 	if (!mmget_not_zero(mm))
9204c2e0f76SAlex Sierra 		return -EINVAL;
9214c2e0f76SAlex Sierra 
9224c2e0f76SAlex Sierra 	cmd->cpages = 0;
9234c2e0f76SAlex Sierra 	mmap_read_lock(mm);
9244c2e0f76SAlex Sierra 	for (addr = start; addr < end; addr = next) {
9254c2e0f76SAlex Sierra 		vma = vma_lookup(mm, addr);
9264c2e0f76SAlex Sierra 		if (!vma || !(vma->vm_flags & VM_READ)) {
9274c2e0f76SAlex Sierra 			ret = -EINVAL;
9284c2e0f76SAlex Sierra 			goto out;
9294c2e0f76SAlex Sierra 		}
9304c2e0f76SAlex Sierra 		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
9314c2e0f76SAlex Sierra 		if (next > vma->vm_end)
9324c2e0f76SAlex Sierra 			next = vma->vm_end;
9334c2e0f76SAlex Sierra 
9344c2e0f76SAlex Sierra 		args.vma = vma;
9354c2e0f76SAlex Sierra 		args.src = src_pfns;
9364c2e0f76SAlex Sierra 		args.dst = dst_pfns;
9374c2e0f76SAlex Sierra 		args.start = addr;
9384c2e0f76SAlex Sierra 		args.end = next;
9394c2e0f76SAlex Sierra 		args.pgmap_owner = dmirror->mdevice;
9404c2e0f76SAlex Sierra 		args.flags = dmirror_select_device(dmirror);
9414c2e0f76SAlex Sierra 
9424c2e0f76SAlex Sierra 		ret = migrate_vma_setup(&args);
9434c2e0f76SAlex Sierra 		if (ret)
9444c2e0f76SAlex Sierra 			goto out;
9454c2e0f76SAlex Sierra 
9464c2e0f76SAlex Sierra 		pr_debug("Migrating from device mem to sys mem\n");
9474c2e0f76SAlex Sierra 		dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
9484c2e0f76SAlex Sierra 
9494c2e0f76SAlex Sierra 		migrate_vma_pages(&args);
9504c2e0f76SAlex Sierra 		cmd->cpages += dmirror_successful_migrated_pages(&args);
9514c2e0f76SAlex Sierra 		migrate_vma_finalize(&args);
9524c2e0f76SAlex Sierra 	}
9534c2e0f76SAlex Sierra out:
9544c2e0f76SAlex Sierra 	mmap_read_unlock(mm);
9554c2e0f76SAlex Sierra 	mmput(mm);
9564c2e0f76SAlex Sierra 
9574c2e0f76SAlex Sierra 	return ret;
9584c2e0f76SAlex Sierra }
9594c2e0f76SAlex Sierra 
dmirror_migrate_to_device(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)9604c2e0f76SAlex Sierra static int dmirror_migrate_to_device(struct dmirror *dmirror,
9614c2e0f76SAlex Sierra 				struct hmm_dmirror_cmd *cmd)
9624c2e0f76SAlex Sierra {
9634c2e0f76SAlex Sierra 	unsigned long start, end, addr;
9644c2e0f76SAlex Sierra 	unsigned long size = cmd->npages << PAGE_SHIFT;
9654c2e0f76SAlex Sierra 	struct mm_struct *mm = dmirror->notifier.mm;
9664c2e0f76SAlex Sierra 	struct vm_area_struct *vma;
9674c2e0f76SAlex Sierra 	unsigned long src_pfns[64] = { 0 };
9684c2e0f76SAlex Sierra 	unsigned long dst_pfns[64] = { 0 };
969b2ef9f5aSRalph Campbell 	struct dmirror_bounce bounce;
97016ce101dSAlistair Popple 	struct migrate_vma args = { 0 };
971b2ef9f5aSRalph Campbell 	unsigned long next;
972b2ef9f5aSRalph Campbell 	int ret;
973b2ef9f5aSRalph Campbell 
974b2ef9f5aSRalph Campbell 	start = cmd->addr;
975b2ef9f5aSRalph Campbell 	end = start + size;
976b2ef9f5aSRalph Campbell 	if (end < start)
977b2ef9f5aSRalph Campbell 		return -EINVAL;
978b2ef9f5aSRalph Campbell 
979b2ef9f5aSRalph Campbell 	/* Since the mm is for the mirrored process, get a reference first. */
980b2ef9f5aSRalph Campbell 	if (!mmget_not_zero(mm))
981b2ef9f5aSRalph Campbell 		return -EINVAL;
982b2ef9f5aSRalph Campbell 
98389154dd5SMichel Lespinasse 	mmap_read_lock(mm);
984b2ef9f5aSRalph Campbell 	for (addr = start; addr < end; addr = next) {
98546e6b31dSLiam Howlett 		vma = vma_lookup(mm, addr);
98646e6b31dSLiam Howlett 		if (!vma || !(vma->vm_flags & VM_READ)) {
987b2ef9f5aSRalph Campbell 			ret = -EINVAL;
988b2ef9f5aSRalph Campbell 			goto out;
989b2ef9f5aSRalph Campbell 		}
990b2ef9f5aSRalph Campbell 		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
991b2ef9f5aSRalph Campbell 		if (next > vma->vm_end)
992b2ef9f5aSRalph Campbell 			next = vma->vm_end;
993b2ef9f5aSRalph Campbell 
994b2ef9f5aSRalph Campbell 		args.vma = vma;
995b2ef9f5aSRalph Campbell 		args.src = src_pfns;
996b2ef9f5aSRalph Campbell 		args.dst = dst_pfns;
997b2ef9f5aSRalph Campbell 		args.start = addr;
998b2ef9f5aSRalph Campbell 		args.end = next;
9997d17e83aSRalph Campbell 		args.pgmap_owner = dmirror->mdevice;
10005143192cSRalph Campbell 		args.flags = MIGRATE_VMA_SELECT_SYSTEM;
1001b2ef9f5aSRalph Campbell 		ret = migrate_vma_setup(&args);
1002b2ef9f5aSRalph Campbell 		if (ret)
1003b2ef9f5aSRalph Campbell 			goto out;
1004b2ef9f5aSRalph Campbell 
10054c2e0f76SAlex Sierra 		pr_debug("Migrating from sys mem to device mem\n");
1006b2ef9f5aSRalph Campbell 		dmirror_migrate_alloc_and_copy(&args, dmirror);
1007b2ef9f5aSRalph Campbell 		migrate_vma_pages(&args);
1008b2ef9f5aSRalph Campbell 		dmirror_migrate_finalize_and_map(&args, dmirror);
1009b2ef9f5aSRalph Campbell 		migrate_vma_finalize(&args);
1010b2ef9f5aSRalph Campbell 	}
101189154dd5SMichel Lespinasse 	mmap_read_unlock(mm);
1012b2ef9f5aSRalph Campbell 	mmput(mm);
1013b2ef9f5aSRalph Campbell 
10144c2e0f76SAlex Sierra 	/*
10154c2e0f76SAlex Sierra 	 * Return the migrated data for verification.
10164c2e0f76SAlex Sierra 	 * Only for pages in device zone
10174c2e0f76SAlex Sierra 	 */
1018b2ef9f5aSRalph Campbell 	ret = dmirror_bounce_init(&bounce, start, size);
1019b2ef9f5aSRalph Campbell 	if (ret)
1020b2ef9f5aSRalph Campbell 		return ret;
1021b2ef9f5aSRalph Campbell 	mutex_lock(&dmirror->mutex);
1022b2ef9f5aSRalph Campbell 	ret = dmirror_do_read(dmirror, start, end, &bounce);
1023b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
1024b2ef9f5aSRalph Campbell 	if (ret == 0) {
1025b2ef9f5aSRalph Campbell 		if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
1026b2ef9f5aSRalph Campbell 				 bounce.size))
1027b2ef9f5aSRalph Campbell 			ret = -EFAULT;
1028b2ef9f5aSRalph Campbell 	}
1029b2ef9f5aSRalph Campbell 	cmd->cpages = bounce.cpages;
1030b2ef9f5aSRalph Campbell 	dmirror_bounce_fini(&bounce);
1031b2ef9f5aSRalph Campbell 	return ret;
1032b2ef9f5aSRalph Campbell 
1033b2ef9f5aSRalph Campbell out:
103489154dd5SMichel Lespinasse 	mmap_read_unlock(mm);
1035b2ef9f5aSRalph Campbell 	mmput(mm);
1036b2ef9f5aSRalph Campbell 	return ret;
1037b2ef9f5aSRalph Campbell }
1038b2ef9f5aSRalph Campbell 
dmirror_mkentry(struct dmirror * dmirror,struct hmm_range * range,unsigned char * perm,unsigned long entry)1039b2ef9f5aSRalph Campbell static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
1040b2ef9f5aSRalph Campbell 			    unsigned char *perm, unsigned long entry)
1041b2ef9f5aSRalph Campbell {
1042b2ef9f5aSRalph Campbell 	struct page *page;
1043b2ef9f5aSRalph Campbell 
1044b2ef9f5aSRalph Campbell 	if (entry & HMM_PFN_ERROR) {
1045b2ef9f5aSRalph Campbell 		*perm = HMM_DMIRROR_PROT_ERROR;
1046b2ef9f5aSRalph Campbell 		return;
1047b2ef9f5aSRalph Campbell 	}
1048b2ef9f5aSRalph Campbell 	if (!(entry & HMM_PFN_VALID)) {
1049b2ef9f5aSRalph Campbell 		*perm = HMM_DMIRROR_PROT_NONE;
1050b2ef9f5aSRalph Campbell 		return;
1051b2ef9f5aSRalph Campbell 	}
1052b2ef9f5aSRalph Campbell 
1053b2ef9f5aSRalph Campbell 	page = hmm_pfn_to_page(entry);
1054b2ef9f5aSRalph Campbell 	if (is_device_private_page(page)) {
1055b2ef9f5aSRalph Campbell 		/* Is the page migrated to this device or some other? */
1056b2ef9f5aSRalph Campbell 		if (dmirror->mdevice == dmirror_page_to_device(page))
1057b2ef9f5aSRalph Campbell 			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
1058b2ef9f5aSRalph Campbell 		else
1059b2ef9f5aSRalph Campbell 			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
10604c2e0f76SAlex Sierra 	} else if (is_device_coherent_page(page)) {
10614c2e0f76SAlex Sierra 		/* Is the page migrated to this device or some other? */
10624c2e0f76SAlex Sierra 		if (dmirror->mdevice == dmirror_page_to_device(page))
10634c2e0f76SAlex Sierra 			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
10644c2e0f76SAlex Sierra 		else
10654c2e0f76SAlex Sierra 			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
1066b2ef9f5aSRalph Campbell 	} else if (is_zero_pfn(page_to_pfn(page)))
1067b2ef9f5aSRalph Campbell 		*perm = HMM_DMIRROR_PROT_ZERO;
1068b2ef9f5aSRalph Campbell 	else
1069b2ef9f5aSRalph Campbell 		*perm = HMM_DMIRROR_PROT_NONE;
1070b2ef9f5aSRalph Campbell 	if (entry & HMM_PFN_WRITE)
1071b2ef9f5aSRalph Campbell 		*perm |= HMM_DMIRROR_PROT_WRITE;
1072b2ef9f5aSRalph Campbell 	else
1073b2ef9f5aSRalph Campbell 		*perm |= HMM_DMIRROR_PROT_READ;
1074e478425bSRalph Campbell 	if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT)
1075e478425bSRalph Campbell 		*perm |= HMM_DMIRROR_PROT_PMD;
1076e478425bSRalph Campbell 	else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT)
1077e478425bSRalph Campbell 		*perm |= HMM_DMIRROR_PROT_PUD;
1078b2ef9f5aSRalph Campbell }
1079b2ef9f5aSRalph Campbell 
dmirror_snapshot_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)1080b2ef9f5aSRalph Campbell static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
1081b2ef9f5aSRalph Campbell 				const struct mmu_notifier_range *range,
1082b2ef9f5aSRalph Campbell 				unsigned long cur_seq)
1083b2ef9f5aSRalph Campbell {
1084b2ef9f5aSRalph Campbell 	struct dmirror_interval *dmi =
1085b2ef9f5aSRalph Campbell 		container_of(mni, struct dmirror_interval, notifier);
1086b2ef9f5aSRalph Campbell 	struct dmirror *dmirror = dmi->dmirror;
1087b2ef9f5aSRalph Campbell 
1088b2ef9f5aSRalph Campbell 	if (mmu_notifier_range_blockable(range))
1089b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
1090b2ef9f5aSRalph Campbell 	else if (!mutex_trylock(&dmirror->mutex))
1091b2ef9f5aSRalph Campbell 		return false;
1092b2ef9f5aSRalph Campbell 
1093b2ef9f5aSRalph Campbell 	/*
1094b2ef9f5aSRalph Campbell 	 * Snapshots only need to set the sequence number since any
1095b2ef9f5aSRalph Campbell 	 * invalidation in the interval invalidates the whole snapshot.
1096b2ef9f5aSRalph Campbell 	 */
1097b2ef9f5aSRalph Campbell 	mmu_interval_set_seq(mni, cur_seq);
1098b2ef9f5aSRalph Campbell 
1099b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
1100b2ef9f5aSRalph Campbell 	return true;
1101b2ef9f5aSRalph Campbell }
1102b2ef9f5aSRalph Campbell 
1103b2ef9f5aSRalph Campbell static const struct mmu_interval_notifier_ops dmirror_mrn_ops = {
1104b2ef9f5aSRalph Campbell 	.invalidate = dmirror_snapshot_invalidate,
1105b2ef9f5aSRalph Campbell };
1106b2ef9f5aSRalph Campbell 
dmirror_range_snapshot(struct dmirror * dmirror,struct hmm_range * range,unsigned char * perm)1107b2ef9f5aSRalph Campbell static int dmirror_range_snapshot(struct dmirror *dmirror,
1108b2ef9f5aSRalph Campbell 				  struct hmm_range *range,
1109b2ef9f5aSRalph Campbell 				  unsigned char *perm)
1110b2ef9f5aSRalph Campbell {
1111b2ef9f5aSRalph Campbell 	struct mm_struct *mm = dmirror->notifier.mm;
1112b2ef9f5aSRalph Campbell 	struct dmirror_interval notifier;
1113b2ef9f5aSRalph Campbell 	unsigned long timeout =
1114b2ef9f5aSRalph Campbell 		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1115b2ef9f5aSRalph Campbell 	unsigned long i;
1116b2ef9f5aSRalph Campbell 	unsigned long n;
1117b2ef9f5aSRalph Campbell 	int ret = 0;
1118b2ef9f5aSRalph Campbell 
1119b2ef9f5aSRalph Campbell 	notifier.dmirror = dmirror;
1120b2ef9f5aSRalph Campbell 	range->notifier = &notifier.notifier;
1121b2ef9f5aSRalph Campbell 
1122b2ef9f5aSRalph Campbell 	ret = mmu_interval_notifier_insert(range->notifier, mm,
1123b2ef9f5aSRalph Campbell 			range->start, range->end - range->start,
1124b2ef9f5aSRalph Campbell 			&dmirror_mrn_ops);
1125b2ef9f5aSRalph Campbell 	if (ret)
1126b2ef9f5aSRalph Campbell 		return ret;
1127b2ef9f5aSRalph Campbell 
1128b2ef9f5aSRalph Campbell 	while (true) {
1129b2ef9f5aSRalph Campbell 		if (time_after(jiffies, timeout)) {
1130b2ef9f5aSRalph Campbell 			ret = -EBUSY;
1131b2ef9f5aSRalph Campbell 			goto out;
1132b2ef9f5aSRalph Campbell 		}
1133b2ef9f5aSRalph Campbell 
1134b2ef9f5aSRalph Campbell 		range->notifier_seq = mmu_interval_read_begin(range->notifier);
1135b2ef9f5aSRalph Campbell 
113689154dd5SMichel Lespinasse 		mmap_read_lock(mm);
1137b2ef9f5aSRalph Campbell 		ret = hmm_range_fault(range);
113889154dd5SMichel Lespinasse 		mmap_read_unlock(mm);
1139b2ef9f5aSRalph Campbell 		if (ret) {
1140b2ef9f5aSRalph Campbell 			if (ret == -EBUSY)
1141b2ef9f5aSRalph Campbell 				continue;
1142b2ef9f5aSRalph Campbell 			goto out;
1143b2ef9f5aSRalph Campbell 		}
1144b2ef9f5aSRalph Campbell 
1145b2ef9f5aSRalph Campbell 		mutex_lock(&dmirror->mutex);
1146b2ef9f5aSRalph Campbell 		if (mmu_interval_read_retry(range->notifier,
1147b2ef9f5aSRalph Campbell 					    range->notifier_seq)) {
1148b2ef9f5aSRalph Campbell 			mutex_unlock(&dmirror->mutex);
1149b2ef9f5aSRalph Campbell 			continue;
1150b2ef9f5aSRalph Campbell 		}
1151b2ef9f5aSRalph Campbell 		break;
1152b2ef9f5aSRalph Campbell 	}
1153b2ef9f5aSRalph Campbell 
1154b2ef9f5aSRalph Campbell 	n = (range->end - range->start) >> PAGE_SHIFT;
1155b2ef9f5aSRalph Campbell 	for (i = 0; i < n; i++)
1156b2ef9f5aSRalph Campbell 		dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]);
1157b2ef9f5aSRalph Campbell 
1158b2ef9f5aSRalph Campbell 	mutex_unlock(&dmirror->mutex);
1159b2ef9f5aSRalph Campbell out:
1160b2ef9f5aSRalph Campbell 	mmu_interval_notifier_remove(range->notifier);
1161b2ef9f5aSRalph Campbell 	return ret;
1162b2ef9f5aSRalph Campbell }
1163b2ef9f5aSRalph Campbell 
dmirror_snapshot(struct dmirror * dmirror,struct hmm_dmirror_cmd * cmd)1164b2ef9f5aSRalph Campbell static int dmirror_snapshot(struct dmirror *dmirror,
1165b2ef9f5aSRalph Campbell 			    struct hmm_dmirror_cmd *cmd)
1166b2ef9f5aSRalph Campbell {
1167b2ef9f5aSRalph Campbell 	struct mm_struct *mm = dmirror->notifier.mm;
1168b2ef9f5aSRalph Campbell 	unsigned long start, end;
1169b2ef9f5aSRalph Campbell 	unsigned long size = cmd->npages << PAGE_SHIFT;
1170b2ef9f5aSRalph Campbell 	unsigned long addr;
1171b2ef9f5aSRalph Campbell 	unsigned long next;
1172b2ef9f5aSRalph Campbell 	unsigned long pfns[64];
1173b2ef9f5aSRalph Campbell 	unsigned char perm[64];
1174b2ef9f5aSRalph Campbell 	char __user *uptr;
1175b2ef9f5aSRalph Campbell 	struct hmm_range range = {
1176b2ef9f5aSRalph Campbell 		.hmm_pfns = pfns,
1177b2ef9f5aSRalph Campbell 		.dev_private_owner = dmirror->mdevice,
1178b2ef9f5aSRalph Campbell 	};
1179b2ef9f5aSRalph Campbell 	int ret = 0;
1180b2ef9f5aSRalph Campbell 
1181b2ef9f5aSRalph Campbell 	start = cmd->addr;
1182b2ef9f5aSRalph Campbell 	end = start + size;
1183b2ef9f5aSRalph Campbell 	if (end < start)
1184b2ef9f5aSRalph Campbell 		return -EINVAL;
1185b2ef9f5aSRalph Campbell 
1186b2ef9f5aSRalph Campbell 	/* Since the mm is for the mirrored process, get a reference first. */
1187b2ef9f5aSRalph Campbell 	if (!mmget_not_zero(mm))
1188b2ef9f5aSRalph Campbell 		return -EINVAL;
1189b2ef9f5aSRalph Campbell 
1190b2ef9f5aSRalph Campbell 	/*
1191b2ef9f5aSRalph Campbell 	 * Register a temporary notifier to detect invalidations even if it
1192b2ef9f5aSRalph Campbell 	 * overlaps with other mmu_interval_notifiers.
1193b2ef9f5aSRalph Campbell 	 */
1194b2ef9f5aSRalph Campbell 	uptr = u64_to_user_ptr(cmd->ptr);
1195b2ef9f5aSRalph Campbell 	for (addr = start; addr < end; addr = next) {
1196b2ef9f5aSRalph Campbell 		unsigned long n;
1197b2ef9f5aSRalph Campbell 
1198b2ef9f5aSRalph Campbell 		next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
1199b2ef9f5aSRalph Campbell 		range.start = addr;
1200b2ef9f5aSRalph Campbell 		range.end = next;
1201b2ef9f5aSRalph Campbell 
1202b2ef9f5aSRalph Campbell 		ret = dmirror_range_snapshot(dmirror, &range, perm);
1203b2ef9f5aSRalph Campbell 		if (ret)
1204b2ef9f5aSRalph Campbell 			break;
1205b2ef9f5aSRalph Campbell 
1206b2ef9f5aSRalph Campbell 		n = (range.end - range.start) >> PAGE_SHIFT;
1207b2ef9f5aSRalph Campbell 		if (copy_to_user(uptr, perm, n)) {
1208b2ef9f5aSRalph Campbell 			ret = -EFAULT;
1209b2ef9f5aSRalph Campbell 			break;
1210b2ef9f5aSRalph Campbell 		}
1211b2ef9f5aSRalph Campbell 
1212b2ef9f5aSRalph Campbell 		cmd->cpages += n;
1213b2ef9f5aSRalph Campbell 		uptr += n;
1214b2ef9f5aSRalph Campbell 	}
1215b2ef9f5aSRalph Campbell 	mmput(mm);
1216b2ef9f5aSRalph Campbell 
1217b2ef9f5aSRalph Campbell 	return ret;
1218b2ef9f5aSRalph Campbell }
1219b2ef9f5aSRalph Campbell 
dmirror_device_evict_chunk(struct dmirror_chunk * chunk)1220ad4c3652SAlistair Popple static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
1221ad4c3652SAlistair Popple {
1222ad4c3652SAlistair Popple 	unsigned long start_pfn = chunk->pagemap.range.start >> PAGE_SHIFT;
1223ad4c3652SAlistair Popple 	unsigned long end_pfn = chunk->pagemap.range.end >> PAGE_SHIFT;
1224ad4c3652SAlistair Popple 	unsigned long npages = end_pfn - start_pfn + 1;
1225ad4c3652SAlistair Popple 	unsigned long i;
1226ad4c3652SAlistair Popple 	unsigned long *src_pfns;
1227ad4c3652SAlistair Popple 	unsigned long *dst_pfns;
1228ad4c3652SAlistair Popple 
1229ad4c3652SAlistair Popple 	src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
1230ad4c3652SAlistair Popple 	dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
1231ad4c3652SAlistair Popple 
1232ad4c3652SAlistair Popple 	migrate_device_range(src_pfns, start_pfn, npages);
1233ad4c3652SAlistair Popple 	for (i = 0; i < npages; i++) {
1234ad4c3652SAlistair Popple 		struct page *dpage, *spage;
1235ad4c3652SAlistair Popple 
1236ad4c3652SAlistair Popple 		spage = migrate_pfn_to_page(src_pfns[i]);
1237ad4c3652SAlistair Popple 		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
1238ad4c3652SAlistair Popple 			continue;
1239ad4c3652SAlistair Popple 
1240ad4c3652SAlistair Popple 		if (WARN_ON(!is_device_private_page(spage) &&
1241ad4c3652SAlistair Popple 			    !is_device_coherent_page(spage)))
1242ad4c3652SAlistair Popple 			continue;
1243ad4c3652SAlistair Popple 		spage = BACKING_PAGE(spage);
1244ad4c3652SAlistair Popple 		dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
1245ad4c3652SAlistair Popple 		lock_page(dpage);
1246ad4c3652SAlistair Popple 		copy_highpage(dpage, spage);
1247ad4c3652SAlistair Popple 		dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
1248ad4c3652SAlistair Popple 		if (src_pfns[i] & MIGRATE_PFN_WRITE)
1249ad4c3652SAlistair Popple 			dst_pfns[i] |= MIGRATE_PFN_WRITE;
1250ad4c3652SAlistair Popple 	}
1251ad4c3652SAlistair Popple 	migrate_device_pages(src_pfns, dst_pfns, npages);
1252ad4c3652SAlistair Popple 	migrate_device_finalize(src_pfns, dst_pfns, npages);
1253ad4c3652SAlistair Popple 	kfree(src_pfns);
1254ad4c3652SAlistair Popple 	kfree(dst_pfns);
1255ad4c3652SAlistair Popple }
1256ad4c3652SAlistair Popple 
1257ad4c3652SAlistair Popple /* Removes free pages from the free list so they can't be re-allocated */
dmirror_remove_free_pages(struct dmirror_chunk * devmem)1258ad4c3652SAlistair Popple static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
1259ad4c3652SAlistair Popple {
1260ad4c3652SAlistair Popple 	struct dmirror_device *mdevice = devmem->mdevice;
1261ad4c3652SAlistair Popple 	struct page *page;
1262ad4c3652SAlistair Popple 
1263ad4c3652SAlistair Popple 	for (page = mdevice->free_pages; page; page = page->zone_device_data)
1264ad4c3652SAlistair Popple 		if (dmirror_page_to_chunk(page) == devmem)
1265ad4c3652SAlistair Popple 			mdevice->free_pages = page->zone_device_data;
1266ad4c3652SAlistair Popple }
1267ad4c3652SAlistair Popple 
dmirror_device_remove_chunks(struct dmirror_device * mdevice)1268ad4c3652SAlistair Popple static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
1269ad4c3652SAlistair Popple {
1270ad4c3652SAlistair Popple 	unsigned int i;
1271ad4c3652SAlistair Popple 
1272ad4c3652SAlistair Popple 	mutex_lock(&mdevice->devmem_lock);
1273ad4c3652SAlistair Popple 	if (mdevice->devmem_chunks) {
1274ad4c3652SAlistair Popple 		for (i = 0; i < mdevice->devmem_count; i++) {
1275ad4c3652SAlistair Popple 			struct dmirror_chunk *devmem =
1276ad4c3652SAlistair Popple 				mdevice->devmem_chunks[i];
1277ad4c3652SAlistair Popple 
1278ad4c3652SAlistair Popple 			spin_lock(&mdevice->lock);
1279ad4c3652SAlistair Popple 			devmem->remove = true;
1280ad4c3652SAlistair Popple 			dmirror_remove_free_pages(devmem);
1281ad4c3652SAlistair Popple 			spin_unlock(&mdevice->lock);
1282ad4c3652SAlistair Popple 
1283ad4c3652SAlistair Popple 			dmirror_device_evict_chunk(devmem);
1284ad4c3652SAlistair Popple 			memunmap_pages(&devmem->pagemap);
1285ad4c3652SAlistair Popple 			if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
1286ad4c3652SAlistair Popple 				release_mem_region(devmem->pagemap.range.start,
1287ad4c3652SAlistair Popple 						   range_len(&devmem->pagemap.range));
1288ad4c3652SAlistair Popple 			kfree(devmem);
1289ad4c3652SAlistair Popple 		}
1290ad4c3652SAlistair Popple 		mdevice->devmem_count = 0;
1291ad4c3652SAlistair Popple 		mdevice->devmem_capacity = 0;
1292ad4c3652SAlistair Popple 		mdevice->free_pages = NULL;
1293ad4c3652SAlistair Popple 		kfree(mdevice->devmem_chunks);
1294ad4c3652SAlistair Popple 		mdevice->devmem_chunks = NULL;
1295ad4c3652SAlistair Popple 	}
1296ad4c3652SAlistair Popple 	mutex_unlock(&mdevice->devmem_lock);
1297ad4c3652SAlistair Popple }
1298ad4c3652SAlistair Popple 
dmirror_fops_unlocked_ioctl(struct file * filp,unsigned int command,unsigned long arg)1299b2ef9f5aSRalph Campbell static long dmirror_fops_unlocked_ioctl(struct file *filp,
1300b2ef9f5aSRalph Campbell 					unsigned int command,
1301b2ef9f5aSRalph Campbell 					unsigned long arg)
1302b2ef9f5aSRalph Campbell {
1303b2ef9f5aSRalph Campbell 	void __user *uarg = (void __user *)arg;
1304b2ef9f5aSRalph Campbell 	struct hmm_dmirror_cmd cmd;
1305b2ef9f5aSRalph Campbell 	struct dmirror *dmirror;
1306b2ef9f5aSRalph Campbell 	int ret;
1307b2ef9f5aSRalph Campbell 
1308b2ef9f5aSRalph Campbell 	dmirror = filp->private_data;
1309b2ef9f5aSRalph Campbell 	if (!dmirror)
1310b2ef9f5aSRalph Campbell 		return -EINVAL;
1311b2ef9f5aSRalph Campbell 
1312b2ef9f5aSRalph Campbell 	if (copy_from_user(&cmd, uarg, sizeof(cmd)))
1313b2ef9f5aSRalph Campbell 		return -EFAULT;
1314b2ef9f5aSRalph Campbell 
1315b2ef9f5aSRalph Campbell 	if (cmd.addr & ~PAGE_MASK)
1316b2ef9f5aSRalph Campbell 		return -EINVAL;
1317b2ef9f5aSRalph Campbell 	if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT)))
1318b2ef9f5aSRalph Campbell 		return -EINVAL;
1319b2ef9f5aSRalph Campbell 
1320b2ef9f5aSRalph Campbell 	cmd.cpages = 0;
1321b2ef9f5aSRalph Campbell 	cmd.faults = 0;
1322b2ef9f5aSRalph Campbell 
1323b2ef9f5aSRalph Campbell 	switch (command) {
1324b2ef9f5aSRalph Campbell 	case HMM_DMIRROR_READ:
1325b2ef9f5aSRalph Campbell 		ret = dmirror_read(dmirror, &cmd);
1326b2ef9f5aSRalph Campbell 		break;
1327b2ef9f5aSRalph Campbell 
1328b2ef9f5aSRalph Campbell 	case HMM_DMIRROR_WRITE:
1329b2ef9f5aSRalph Campbell 		ret = dmirror_write(dmirror, &cmd);
1330b2ef9f5aSRalph Campbell 		break;
1331b2ef9f5aSRalph Campbell 
13324c2e0f76SAlex Sierra 	case HMM_DMIRROR_MIGRATE_TO_DEV:
13334c2e0f76SAlex Sierra 		ret = dmirror_migrate_to_device(dmirror, &cmd);
13344c2e0f76SAlex Sierra 		break;
13354c2e0f76SAlex Sierra 
13364c2e0f76SAlex Sierra 	case HMM_DMIRROR_MIGRATE_TO_SYS:
13374c2e0f76SAlex Sierra 		ret = dmirror_migrate_to_system(dmirror, &cmd);
1338b2ef9f5aSRalph Campbell 		break;
1339b2ef9f5aSRalph Campbell 
1340b659baeaSAlistair Popple 	case HMM_DMIRROR_EXCLUSIVE:
1341b659baeaSAlistair Popple 		ret = dmirror_exclusive(dmirror, &cmd);
1342b659baeaSAlistair Popple 		break;
1343b659baeaSAlistair Popple 
1344b659baeaSAlistair Popple 	case HMM_DMIRROR_CHECK_EXCLUSIVE:
1345b659baeaSAlistair Popple 		ret = dmirror_check_atomic(dmirror, cmd.addr,
1346b659baeaSAlistair Popple 					cmd.addr + (cmd.npages << PAGE_SHIFT));
1347b659baeaSAlistair Popple 		break;
1348b659baeaSAlistair Popple 
1349b2ef9f5aSRalph Campbell 	case HMM_DMIRROR_SNAPSHOT:
1350b2ef9f5aSRalph Campbell 		ret = dmirror_snapshot(dmirror, &cmd);
1351b2ef9f5aSRalph Campbell 		break;
1352b2ef9f5aSRalph Campbell 
1353ad4c3652SAlistair Popple 	case HMM_DMIRROR_RELEASE:
1354ad4c3652SAlistair Popple 		dmirror_device_remove_chunks(dmirror->mdevice);
1355ad4c3652SAlistair Popple 		ret = 0;
1356ad4c3652SAlistair Popple 		break;
1357ad4c3652SAlistair Popple 
1358b2ef9f5aSRalph Campbell 	default:
1359b2ef9f5aSRalph Campbell 		return -EINVAL;
1360b2ef9f5aSRalph Campbell 	}
1361b2ef9f5aSRalph Campbell 	if (ret)
1362b2ef9f5aSRalph Campbell 		return ret;
1363b2ef9f5aSRalph Campbell 
1364b2ef9f5aSRalph Campbell 	if (copy_to_user(uarg, &cmd, sizeof(cmd)))
1365b2ef9f5aSRalph Campbell 		return -EFAULT;
1366b2ef9f5aSRalph Campbell 
1367b2ef9f5aSRalph Campbell 	return 0;
1368b2ef9f5aSRalph Campbell }
1369b2ef9f5aSRalph Campbell 
dmirror_fops_mmap(struct file * file,struct vm_area_struct * vma)137087c01d57SAlistair Popple static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
137187c01d57SAlistair Popple {
137287c01d57SAlistair Popple 	unsigned long addr;
137387c01d57SAlistair Popple 
137487c01d57SAlistair Popple 	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
137587c01d57SAlistair Popple 		struct page *page;
137687c01d57SAlistair Popple 		int ret;
137787c01d57SAlistair Popple 
137887c01d57SAlistair Popple 		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
137987c01d57SAlistair Popple 		if (!page)
138087c01d57SAlistair Popple 			return -ENOMEM;
138187c01d57SAlistair Popple 
138287c01d57SAlistair Popple 		ret = vm_insert_page(vma, addr, page);
138387c01d57SAlistair Popple 		if (ret) {
138487c01d57SAlistair Popple 			__free_page(page);
138587c01d57SAlistair Popple 			return ret;
138687c01d57SAlistair Popple 		}
138787c01d57SAlistair Popple 		put_page(page);
138887c01d57SAlistair Popple 	}
138987c01d57SAlistair Popple 
139087c01d57SAlistair Popple 	return 0;
139187c01d57SAlistair Popple }
139287c01d57SAlistair Popple 
1393b2ef9f5aSRalph Campbell static const struct file_operations dmirror_fops = {
1394b2ef9f5aSRalph Campbell 	.open		= dmirror_fops_open,
1395b2ef9f5aSRalph Campbell 	.release	= dmirror_fops_release,
139687c01d57SAlistair Popple 	.mmap		= dmirror_fops_mmap,
1397b2ef9f5aSRalph Campbell 	.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
1398b2ef9f5aSRalph Campbell 	.llseek		= default_llseek,
1399b2ef9f5aSRalph Campbell 	.owner		= THIS_MODULE,
1400b2ef9f5aSRalph Campbell };
1401b2ef9f5aSRalph Campbell 
dmirror_devmem_free(struct page * page)1402b2ef9f5aSRalph Campbell static void dmirror_devmem_free(struct page *page)
1403b2ef9f5aSRalph Campbell {
14044c2e0f76SAlex Sierra 	struct page *rpage = BACKING_PAGE(page);
1405b2ef9f5aSRalph Campbell 	struct dmirror_device *mdevice;
1406b2ef9f5aSRalph Campbell 
14074c2e0f76SAlex Sierra 	if (rpage != page)
1408b2ef9f5aSRalph Campbell 		__free_page(rpage);
1409b2ef9f5aSRalph Campbell 
1410b2ef9f5aSRalph Campbell 	mdevice = dmirror_page_to_device(page);
1411b2ef9f5aSRalph Campbell 	spin_lock(&mdevice->lock);
1412ad4c3652SAlistair Popple 
1413ad4c3652SAlistair Popple 	/* Return page to our allocator if not freeing the chunk */
1414ad4c3652SAlistair Popple 	if (!dmirror_page_to_chunk(page)->remove) {
1415b2ef9f5aSRalph Campbell 		mdevice->cfree++;
1416b2ef9f5aSRalph Campbell 		page->zone_device_data = mdevice->free_pages;
1417b2ef9f5aSRalph Campbell 		mdevice->free_pages = page;
1418ad4c3652SAlistair Popple 	}
1419b2ef9f5aSRalph Campbell 	spin_unlock(&mdevice->lock);
1420b2ef9f5aSRalph Campbell }
1421b2ef9f5aSRalph Campbell 
dmirror_devmem_fault(struct vm_fault * vmf)1422b2ef9f5aSRalph Campbell static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
1423b2ef9f5aSRalph Campbell {
142416ce101dSAlistair Popple 	struct migrate_vma args = { 0 };
14254c2e0f76SAlex Sierra 	unsigned long src_pfns = 0;
14264c2e0f76SAlex Sierra 	unsigned long dst_pfns = 0;
1427b2ef9f5aSRalph Campbell 	struct page *rpage;
1428b2ef9f5aSRalph Campbell 	struct dmirror *dmirror;
1429b2ef9f5aSRalph Campbell 	vm_fault_t ret;
1430b2ef9f5aSRalph Campbell 
1431b2ef9f5aSRalph Campbell 	/*
1432b2ef9f5aSRalph Campbell 	 * Normally, a device would use the page->zone_device_data to point to
1433b2ef9f5aSRalph Campbell 	 * the mirror but here we use it to hold the page for the simulated
1434b2ef9f5aSRalph Campbell 	 * device memory and that page holds the pointer to the mirror.
1435b2ef9f5aSRalph Campbell 	 */
1436b2ef9f5aSRalph Campbell 	rpage = vmf->page->zone_device_data;
1437b2ef9f5aSRalph Campbell 	dmirror = rpage->zone_device_data;
1438b2ef9f5aSRalph Campbell 
1439b2ef9f5aSRalph Campbell 	/* FIXME demonstrate how we can adjust migrate range */
1440b2ef9f5aSRalph Campbell 	args.vma = vmf->vma;
1441b2ef9f5aSRalph Campbell 	args.start = vmf->address;
1442b2ef9f5aSRalph Campbell 	args.end = args.start + PAGE_SIZE;
1443b2ef9f5aSRalph Campbell 	args.src = &src_pfns;
1444b2ef9f5aSRalph Campbell 	args.dst = &dst_pfns;
14455143192cSRalph Campbell 	args.pgmap_owner = dmirror->mdevice;
14464c2e0f76SAlex Sierra 	args.flags = dmirror_select_device(dmirror);
144716ce101dSAlistair Popple 	args.fault_page = vmf->page;
1448b2ef9f5aSRalph Campbell 
1449b2ef9f5aSRalph Campbell 	if (migrate_vma_setup(&args))
1450b2ef9f5aSRalph Campbell 		return VM_FAULT_SIGBUS;
1451b2ef9f5aSRalph Campbell 
14527d17e83aSRalph Campbell 	ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
1453b2ef9f5aSRalph Campbell 	if (ret)
1454b2ef9f5aSRalph Campbell 		return ret;
1455b2ef9f5aSRalph Campbell 	migrate_vma_pages(&args);
14567d17e83aSRalph Campbell 	/*
14577d17e83aSRalph Campbell 	 * No device finalize step is needed since
14587d17e83aSRalph Campbell 	 * dmirror_devmem_fault_alloc_and_copy() will have already
14597d17e83aSRalph Campbell 	 * invalidated the device page table.
14607d17e83aSRalph Campbell 	 */
1461b2ef9f5aSRalph Campbell 	migrate_vma_finalize(&args);
1462b2ef9f5aSRalph Campbell 	return 0;
1463b2ef9f5aSRalph Campbell }
1464b2ef9f5aSRalph Campbell 
1465b2ef9f5aSRalph Campbell static const struct dev_pagemap_ops dmirror_devmem_ops = {
1466b2ef9f5aSRalph Campbell 	.page_free	= dmirror_devmem_free,
1467b2ef9f5aSRalph Campbell 	.migrate_to_ram	= dmirror_devmem_fault,
1468b2ef9f5aSRalph Campbell };
1469b2ef9f5aSRalph Campbell 
dmirror_device_init(struct dmirror_device * mdevice,int id)1470b2ef9f5aSRalph Campbell static int dmirror_device_init(struct dmirror_device *mdevice, int id)
1471b2ef9f5aSRalph Campbell {
1472b2ef9f5aSRalph Campbell 	dev_t dev;
1473b2ef9f5aSRalph Campbell 	int ret;
1474b2ef9f5aSRalph Campbell 
1475b2ef9f5aSRalph Campbell 	dev = MKDEV(MAJOR(dmirror_dev), id);
1476b2ef9f5aSRalph Campbell 	mutex_init(&mdevice->devmem_lock);
1477b2ef9f5aSRalph Campbell 	spin_lock_init(&mdevice->lock);
1478b2ef9f5aSRalph Campbell 
1479b2ef9f5aSRalph Campbell 	cdev_init(&mdevice->cdevice, &dmirror_fops);
1480b2ef9f5aSRalph Campbell 	mdevice->cdevice.owner = THIS_MODULE;
14816a760f58SMika Penttilä 	device_initialize(&mdevice->device);
14826a760f58SMika Penttilä 	mdevice->device.devt = dev;
14836a760f58SMika Penttilä 
14846a760f58SMika Penttilä 	ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id);
14856a760f58SMika Penttilä 	if (ret)
14866a760f58SMika Penttilä 		return ret;
14876a760f58SMika Penttilä 
14886a760f58SMika Penttilä 	ret = cdev_device_add(&mdevice->cdevice, &mdevice->device);
1489b2ef9f5aSRalph Campbell 	if (ret)
1490b2ef9f5aSRalph Campbell 		return ret;
1491b2ef9f5aSRalph Campbell 
149225b80162SAlex Sierra 	/* Build a list of free ZONE_DEVICE struct pages */
149325b80162SAlex Sierra 	return dmirror_allocate_chunk(mdevice, NULL);
1494b2ef9f5aSRalph Campbell }
1495b2ef9f5aSRalph Campbell 
dmirror_device_remove(struct dmirror_device * mdevice)1496b2ef9f5aSRalph Campbell static void dmirror_device_remove(struct dmirror_device *mdevice)
1497b2ef9f5aSRalph Campbell {
1498ad4c3652SAlistair Popple 	dmirror_device_remove_chunks(mdevice);
14996a760f58SMika Penttilä 	cdev_device_del(&mdevice->cdevice, &mdevice->device);
1500b2ef9f5aSRalph Campbell }
1501b2ef9f5aSRalph Campbell 
hmm_dmirror_init(void)1502b2ef9f5aSRalph Campbell static int __init hmm_dmirror_init(void)
1503b2ef9f5aSRalph Campbell {
1504b2ef9f5aSRalph Campbell 	int ret;
1505188f4826SAlex Sierra 	int id = 0;
1506188f4826SAlex Sierra 	int ndevices = 0;
1507b2ef9f5aSRalph Campbell 
1508b2ef9f5aSRalph Campbell 	ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
1509b2ef9f5aSRalph Campbell 				  "HMM_DMIRROR");
1510b2ef9f5aSRalph Campbell 	if (ret)
1511b2ef9f5aSRalph Campbell 		goto err_unreg;
1512b2ef9f5aSRalph Campbell 
1513188f4826SAlex Sierra 	memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0]));
1514188f4826SAlex Sierra 	dmirror_devices[ndevices++].zone_device_type =
1515188f4826SAlex Sierra 				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
1516188f4826SAlex Sierra 	dmirror_devices[ndevices++].zone_device_type =
1517188f4826SAlex Sierra 				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
15184c2e0f76SAlex Sierra 	if (spm_addr_dev0 && spm_addr_dev1) {
15194c2e0f76SAlex Sierra 		dmirror_devices[ndevices++].zone_device_type =
15204c2e0f76SAlex Sierra 					HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
15214c2e0f76SAlex Sierra 		dmirror_devices[ndevices++].zone_device_type =
15224c2e0f76SAlex Sierra 					HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
15234c2e0f76SAlex Sierra 	}
1524188f4826SAlex Sierra 	for (id = 0; id < ndevices; id++) {
1525b2ef9f5aSRalph Campbell 		ret = dmirror_device_init(dmirror_devices + id, id);
1526b2ef9f5aSRalph Campbell 		if (ret)
1527b2ef9f5aSRalph Campbell 			goto err_chrdev;
1528b2ef9f5aSRalph Campbell 	}
1529b2ef9f5aSRalph Campbell 
1530b2ef9f5aSRalph Campbell 	pr_info("HMM test module loaded. This is only for testing HMM.\n");
1531b2ef9f5aSRalph Campbell 	return 0;
1532b2ef9f5aSRalph Campbell 
1533b2ef9f5aSRalph Campbell err_chrdev:
1534b2ef9f5aSRalph Campbell 	while (--id >= 0)
1535b2ef9f5aSRalph Campbell 		dmirror_device_remove(dmirror_devices + id);
1536b2ef9f5aSRalph Campbell 	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1537b2ef9f5aSRalph Campbell err_unreg:
1538b2ef9f5aSRalph Campbell 	return ret;
1539b2ef9f5aSRalph Campbell }
1540b2ef9f5aSRalph Campbell 
hmm_dmirror_exit(void)1541b2ef9f5aSRalph Campbell static void __exit hmm_dmirror_exit(void)
1542b2ef9f5aSRalph Campbell {
1543b2ef9f5aSRalph Campbell 	int id;
1544b2ef9f5aSRalph Campbell 
1545b2ef9f5aSRalph Campbell 	for (id = 0; id < DMIRROR_NDEVICES; id++)
15464c2e0f76SAlex Sierra 		if (dmirror_devices[id].zone_device_type)
1547b2ef9f5aSRalph Campbell 			dmirror_device_remove(dmirror_devices + id);
1548b2ef9f5aSRalph Campbell 	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1549b2ef9f5aSRalph Campbell }
1550b2ef9f5aSRalph Campbell 
1551b2ef9f5aSRalph Campbell module_init(hmm_dmirror_init);
1552b2ef9f5aSRalph Campbell module_exit(hmm_dmirror_exit);
1553b2ef9f5aSRalph Campbell MODULE_LICENSE("GPL");
1554