1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2020-2021 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <linux/types.h>
25 #include <linux/hmm.h>
26 #include <linux/dma-direction.h>
27 #include <linux/dma-mapping.h>
28 #include "amdgpu_sync.h"
29 #include "amdgpu_object.h"
30 #include "amdgpu_vm.h"
31 #include "amdgpu_mn.h"
32 #include "amdgpu_res_cursor.h"
33 #include "kfd_priv.h"
34 #include "kfd_svm.h"
35 #include "kfd_migrate.h"
36 
37 static uint64_t
38 svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
39 {
40 	return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
41 }
42 
43 static int
44 svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
45 		     dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
46 {
47 	struct amdgpu_device *adev = ring->adev;
48 	struct amdgpu_job *job;
49 	unsigned int num_dw, num_bytes;
50 	struct dma_fence *fence;
51 	uint64_t src_addr, dst_addr;
52 	uint64_t pte_flags;
53 	void *cpu_addr;
54 	int r;
55 
56 	/* use gart window 0 */
57 	*gart_addr = adev->gmc.gart_start;
58 
59 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
60 	num_bytes = npages * 8;
61 
62 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
63 				     AMDGPU_IB_POOL_DELAYED, &job);
64 	if (r)
65 		return r;
66 
67 	src_addr = num_dw * 4;
68 	src_addr += job->ibs[0].gpu_addr;
69 
70 	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
71 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
72 				dst_addr, num_bytes, false);
73 
74 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
75 	WARN_ON(job->ibs[0].length_dw > num_dw);
76 
77 	pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
78 	pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
79 	if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
80 		pte_flags |= AMDGPU_PTE_WRITEABLE;
81 	pte_flags |= adev->gart.gart_pte_flags;
82 
83 	cpu_addr = &job->ibs[0].ptr[num_dw];
84 
85 	r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
86 	if (r)
87 		goto error_free;
88 
89 	r = amdgpu_job_submit(job, &adev->mman.entity,
90 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
91 	if (r)
92 		goto error_free;
93 
94 	dma_fence_put(fence);
95 
96 	return r;
97 
98 error_free:
99 	amdgpu_job_free(job);
100 	return r;
101 }
102 
103 /**
104  * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
105  *
106  * @adev: amdgpu device the sdma ring running
107  * @src: source page address array
108  * @dst: destination page address array
109  * @npages: number of pages to copy
110  * @direction: enum MIGRATION_COPY_DIR
111  * @mfence: output, sdma fence to signal after sdma is done
112  *
113  * ram address uses GART table continuous entries mapping to ram pages,
114  * vram address uses direct mapping of vram pages, which must have npages
115  * number of continuous pages.
116  * GART update and sdma uses same buf copy function ring, sdma is splited to
117  * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
118  * the last sdma finish fence which is returned to check copy memory is done.
119  *
120  * Context: Process context, takes and releases gtt_window_lock
121  *
122  * Return:
123  * 0 - OK, otherwise error code
124  */
125 
126 static int
127 svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
128 			     uint64_t *vram, uint64_t npages,
129 			     enum MIGRATION_COPY_DIR direction,
130 			     struct dma_fence **mfence)
131 {
132 	const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
133 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
134 	uint64_t gart_s, gart_d;
135 	struct dma_fence *next;
136 	uint64_t size;
137 	int r;
138 
139 	mutex_lock(&adev->mman.gtt_window_lock);
140 
141 	while (npages) {
142 		size = min(GTT_MAX_PAGES, npages);
143 
144 		if (direction == FROM_VRAM_TO_RAM) {
145 			gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
146 			r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
147 
148 		} else if (direction == FROM_RAM_TO_VRAM) {
149 			r = svm_migrate_gart_map(ring, size, sys, &gart_s,
150 						 KFD_IOCTL_SVM_FLAG_GPU_RO);
151 			gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
152 		}
153 		if (r) {
154 			pr_debug("failed %d to create gart mapping\n", r);
155 			goto out_unlock;
156 		}
157 
158 		r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
159 				       NULL, &next, false, true, false);
160 		if (r) {
161 			pr_debug("failed %d to copy memory\n", r);
162 			goto out_unlock;
163 		}
164 
165 		dma_fence_put(*mfence);
166 		*mfence = next;
167 		npages -= size;
168 		if (npages) {
169 			sys += size;
170 			vram += size;
171 		}
172 	}
173 
174 out_unlock:
175 	mutex_unlock(&adev->mman.gtt_window_lock);
176 
177 	return r;
178 }
179 
180 /**
181  * svm_migrate_copy_done - wait for memory copy sdma is done
182  *
183  * @adev: amdgpu device the sdma memory copy is executing on
184  * @mfence: migrate fence
185  *
186  * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
187  * operations, this is the last sdma operation fence.
188  *
189  * Context: called after svm_migrate_copy_memory
190  *
191  * Return:
192  * 0		- success
193  * otherwise	- error code from dma fence signal
194  */
195 static int
196 svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
197 {
198 	int r = 0;
199 
200 	if (mfence) {
201 		r = dma_fence_wait(mfence, false);
202 		dma_fence_put(mfence);
203 		pr_debug("sdma copy memory fence done\n");
204 	}
205 
206 	return r;
207 }
208 
209 unsigned long
210 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
211 {
212 	return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
213 }
214 
215 static void
216 svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
217 {
218 	struct page *page;
219 
220 	page = pfn_to_page(pfn);
221 	page->zone_device_data = prange;
222 	get_page(page);
223 	lock_page(page);
224 }
225 
226 static void
227 svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
228 {
229 	struct page *page;
230 
231 	page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
232 	unlock_page(page);
233 	put_page(page);
234 }
235 
236 static unsigned long
237 svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
238 {
239 	unsigned long addr;
240 
241 	addr = page_to_pfn(page) << PAGE_SHIFT;
242 	return (addr - adev->kfd.dev->pgmap.range.start);
243 }
244 
245 static struct page *
246 svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
247 {
248 	struct page *page;
249 
250 	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
251 	if (page)
252 		lock_page(page);
253 
254 	return page;
255 }
256 
257 static void svm_migrate_put_sys_page(unsigned long addr)
258 {
259 	struct page *page;
260 
261 	page = pfn_to_page(addr >> PAGE_SHIFT);
262 	unlock_page(page);
263 	put_page(page);
264 }
265 
266 static int
267 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
268 			 struct migrate_vma *migrate, struct dma_fence **mfence,
269 			 dma_addr_t *scratch)
270 {
271 	uint64_t npages = migrate->cpages;
272 	struct device *dev = adev->dev;
273 	struct amdgpu_res_cursor cursor;
274 	dma_addr_t *src;
275 	uint64_t *dst;
276 	uint64_t i, j;
277 	int r;
278 
279 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
280 		 prange->last);
281 
282 	src = scratch;
283 	dst = (uint64_t *)(scratch + npages);
284 
285 	r = svm_range_vram_node_new(adev, prange, true);
286 	if (r) {
287 		pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
288 		goto out;
289 	}
290 
291 	amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
292 			 npages << PAGE_SHIFT, &cursor);
293 	for (i = j = 0; i < npages; i++) {
294 		struct page *spage;
295 
296 		dst[i] = cursor.start + (j << PAGE_SHIFT);
297 		migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
298 		svm_migrate_get_vram_page(prange, migrate->dst[i]);
299 
300 		migrate->dst[i] = migrate_pfn(migrate->dst[i]);
301 		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
302 
303 		if (migrate->src[i] & MIGRATE_PFN_VALID) {
304 			spage = migrate_pfn_to_page(migrate->src[i]);
305 			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
306 					      DMA_TO_DEVICE);
307 			r = dma_mapping_error(dev, src[i]);
308 			if (r) {
309 				pr_debug("failed %d dma_map_page\n", r);
310 				goto out_free_vram_pages;
311 			}
312 		} else {
313 			if (j) {
314 				r = svm_migrate_copy_memory_gart(
315 						adev, src + i - j,
316 						dst + i - j, j,
317 						FROM_RAM_TO_VRAM,
318 						mfence);
319 				if (r)
320 					goto out_free_vram_pages;
321 				amdgpu_res_next(&cursor, j << PAGE_SHIFT);
322 				j = 0;
323 			} else {
324 				amdgpu_res_next(&cursor, PAGE_SIZE);
325 			}
326 			continue;
327 		}
328 
329 		pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
330 			 src[i] >> PAGE_SHIFT, page_to_pfn(spage));
331 
332 		if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
333 			r = svm_migrate_copy_memory_gart(adev, src + i - j,
334 							 dst + i - j, j + 1,
335 							 FROM_RAM_TO_VRAM,
336 							 mfence);
337 			if (r)
338 				goto out_free_vram_pages;
339 			amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
340 			j= 0;
341 		} else {
342 			j++;
343 		}
344 	}
345 
346 	r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
347 					 FROM_RAM_TO_VRAM, mfence);
348 
349 out_free_vram_pages:
350 	if (r) {
351 		pr_debug("failed %d to copy memory to vram\n", r);
352 		while (i--) {
353 			svm_migrate_put_vram_page(adev, dst[i]);
354 			migrate->dst[i] = 0;
355 		}
356 	}
357 
358 out:
359 	return r;
360 }
361 
362 static int
363 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
364 			struct vm_area_struct *vma, uint64_t start,
365 			uint64_t end)
366 {
367 	uint64_t npages = (end - start) >> PAGE_SHIFT;
368 	struct dma_fence *mfence = NULL;
369 	struct migrate_vma migrate;
370 	dma_addr_t *scratch;
371 	size_t size;
372 	void *buf;
373 	int r = -ENOMEM;
374 	int retry = 0;
375 
376 	memset(&migrate, 0, sizeof(migrate));
377 	migrate.vma = vma;
378 	migrate.start = start;
379 	migrate.end = end;
380 	migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
381 	migrate.pgmap_owner = adev;
382 
383 	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
384 	size *= npages;
385 	buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
386 	if (!buf)
387 		goto out;
388 
389 	migrate.src = buf;
390 	migrate.dst = migrate.src + npages;
391 	scratch = (dma_addr_t *)(migrate.dst + npages);
392 
393 retry:
394 	r = migrate_vma_setup(&migrate);
395 	if (r) {
396 		pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
397 			 r, prange->svms, prange->start, prange->last);
398 		goto out_free;
399 	}
400 	if (migrate.cpages != npages) {
401 		pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
402 			 npages);
403 		migrate_vma_finalize(&migrate);
404 		if (retry++ >= 3) {
405 			r = -ENOMEM;
406 			pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
407 				 r, prange->svms, prange->start, prange->last);
408 			goto out_free;
409 		}
410 
411 		goto retry;
412 	}
413 
414 	if (migrate.cpages) {
415 		r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
416 					     scratch);
417 		migrate_vma_pages(&migrate);
418 		svm_migrate_copy_done(adev, mfence);
419 		migrate_vma_finalize(&migrate);
420 	}
421 
422 	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
423 	svm_range_free_dma_mappings(prange);
424 
425 out_free:
426 	kvfree(buf);
427 out:
428 	return r;
429 }
430 
431 /**
432  * svm_migrate_ram_to_vram - migrate svm range from system to device
433  * @prange: range structure
434  * @best_loc: the device to migrate to
435  * @mm: the process mm structure
436  *
437  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
438  *
439  * Return:
440  * 0 - OK, otherwise error code
441  */
442 static int
443 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
444 			struct mm_struct *mm)
445 {
446 	unsigned long addr, start, end;
447 	struct vm_area_struct *vma;
448 	struct amdgpu_device *adev;
449 	int r = 0;
450 
451 	if (prange->actual_loc == best_loc) {
452 		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
453 			 prange->svms, prange->start, prange->last, best_loc);
454 		return 0;
455 	}
456 
457 	adev = svm_range_get_adev_by_id(prange, best_loc);
458 	if (!adev) {
459 		pr_debug("failed to get device by id 0x%x\n", best_loc);
460 		return -ENODEV;
461 	}
462 
463 	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
464 		 prange->start, prange->last, best_loc);
465 
466 	/* FIXME: workaround for page locking bug with invalid pages */
467 	svm_range_prefault(prange, mm);
468 
469 	start = prange->start << PAGE_SHIFT;
470 	end = (prange->last + 1) << PAGE_SHIFT;
471 
472 	for (addr = start; addr < end;) {
473 		unsigned long next;
474 
475 		vma = find_vma(mm, addr);
476 		if (!vma || addr < vma->vm_start)
477 			break;
478 
479 		next = min(vma->vm_end, end);
480 		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
481 		if (r) {
482 			pr_debug("failed to migrate\n");
483 			break;
484 		}
485 		addr = next;
486 	}
487 
488 	if (!r)
489 		prange->actual_loc = best_loc;
490 
491 	return r;
492 }
493 
494 static void svm_migrate_page_free(struct page *page)
495 {
496 	/* Keep this function to avoid warning */
497 }
498 
499 static int
500 svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
501 			struct migrate_vma *migrate, struct dma_fence **mfence,
502 			dma_addr_t *scratch)
503 {
504 	uint64_t npages = migrate->cpages;
505 	struct device *dev = adev->dev;
506 	uint64_t *src;
507 	dma_addr_t *dst;
508 	struct page *dpage;
509 	uint64_t i = 0, j;
510 	uint64_t addr;
511 	int r = 0;
512 
513 	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
514 		 prange->last);
515 
516 	addr = prange->start << PAGE_SHIFT;
517 
518 	src = (uint64_t *)(scratch + npages);
519 	dst = scratch;
520 
521 	for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
522 		struct page *spage;
523 
524 		spage = migrate_pfn_to_page(migrate->src[i]);
525 		if (!spage) {
526 			pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
527 				 prange->svms, prange->start, prange->last);
528 			r = -ENOMEM;
529 			goto out_oom;
530 		}
531 		src[i] = svm_migrate_addr(adev, spage);
532 		if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
533 			r = svm_migrate_copy_memory_gart(adev, dst + i - j,
534 							 src + i - j, j,
535 							 FROM_VRAM_TO_RAM,
536 							 mfence);
537 			if (r)
538 				goto out_oom;
539 			j = 0;
540 		}
541 
542 		dpage = svm_migrate_get_sys_page(migrate->vma, addr);
543 		if (!dpage) {
544 			pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
545 				 prange->svms, prange->start, prange->last);
546 			r = -ENOMEM;
547 			goto out_oom;
548 		}
549 
550 		dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
551 		r = dma_mapping_error(dev, dst[i]);
552 		if (r) {
553 			pr_debug("failed %d dma_map_page\n", r);
554 			goto out_oom;
555 		}
556 
557 		pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
558 			      dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
559 
560 		migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
561 		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
562 	}
563 
564 	r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
565 					 FROM_VRAM_TO_RAM, mfence);
566 
567 out_oom:
568 	if (r) {
569 		pr_debug("failed %d copy to ram\n", r);
570 		while (i--) {
571 			svm_migrate_put_sys_page(dst[i]);
572 			migrate->dst[i] = 0;
573 		}
574 	}
575 
576 	return r;
577 }
578 
579 static int
580 svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
581 		       struct vm_area_struct *vma, uint64_t start, uint64_t end)
582 {
583 	uint64_t npages = (end - start) >> PAGE_SHIFT;
584 	struct dma_fence *mfence = NULL;
585 	struct migrate_vma migrate;
586 	dma_addr_t *scratch;
587 	size_t size;
588 	void *buf;
589 	int r = -ENOMEM;
590 
591 	memset(&migrate, 0, sizeof(migrate));
592 	migrate.vma = vma;
593 	migrate.start = start;
594 	migrate.end = end;
595 	migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
596 	migrate.pgmap_owner = adev;
597 
598 	size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
599 	size *= npages;
600 	buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
601 	if (!buf)
602 		goto out;
603 
604 	migrate.src = buf;
605 	migrate.dst = migrate.src + npages;
606 	scratch = (dma_addr_t *)(migrate.dst + npages);
607 
608 	r = migrate_vma_setup(&migrate);
609 	if (r) {
610 		pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
611 			 r, prange->svms, prange->start, prange->last);
612 		goto out_free;
613 	}
614 
615 	pr_debug("cpages %ld\n", migrate.cpages);
616 
617 	if (migrate.cpages) {
618 		r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
619 					    scratch);
620 		migrate_vma_pages(&migrate);
621 		svm_migrate_copy_done(adev, mfence);
622 		migrate_vma_finalize(&migrate);
623 	} else {
624 		pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
625 			 prange->start, prange->last);
626 	}
627 
628 	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
629 
630 out_free:
631 	kvfree(buf);
632 out:
633 	return r;
634 }
635 
636 /**
637  * svm_migrate_vram_to_ram - migrate svm range from device to system
638  * @prange: range structure
639  * @mm: process mm, use current->mm if NULL
640  *
641  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
642  *
643  * Return:
644  * 0 - OK, otherwise error code
645  */
646 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
647 {
648 	struct amdgpu_device *adev;
649 	struct vm_area_struct *vma;
650 	unsigned long addr;
651 	unsigned long start;
652 	unsigned long end;
653 	int r = 0;
654 
655 	if (!prange->actual_loc) {
656 		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
657 			 prange->start, prange->last);
658 		return 0;
659 	}
660 
661 	adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
662 	if (!adev) {
663 		pr_debug("failed to get device by id 0x%x\n",
664 			 prange->actual_loc);
665 		return -ENODEV;
666 	}
667 
668 	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
669 		 prange->svms, prange, prange->start, prange->last,
670 		 prange->actual_loc);
671 
672 	start = prange->start << PAGE_SHIFT;
673 	end = (prange->last + 1) << PAGE_SHIFT;
674 
675 	for (addr = start; addr < end;) {
676 		unsigned long next;
677 
678 		vma = find_vma(mm, addr);
679 		if (!vma || addr < vma->vm_start)
680 			break;
681 
682 		next = min(vma->vm_end, end);
683 		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
684 		if (r) {
685 			pr_debug("failed %d to migrate\n", r);
686 			break;
687 		}
688 		addr = next;
689 	}
690 
691 	if (!r) {
692 		svm_range_vram_node_free(prange);
693 		prange->actual_loc = 0;
694 	}
695 	return r;
696 }
697 
698 /**
699  * svm_migrate_vram_to_vram - migrate svm range from device to device
700  * @prange: range structure
701  * @best_loc: the device to migrate to
702  * @mm: process mm, use current->mm if NULL
703  *
704  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
705  *
706  * Return:
707  * 0 - OK, otherwise error code
708  */
709 static int
710 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
711 			 struct mm_struct *mm)
712 {
713 	int r;
714 
715 	/*
716 	 * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
717 	 * system memory as migration bridge
718 	 */
719 
720 	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
721 
722 	r = svm_migrate_vram_to_ram(prange, mm);
723 	if (r)
724 		return r;
725 
726 	return svm_migrate_ram_to_vram(prange, best_loc, mm);
727 }
728 
729 int
730 svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
731 		    struct mm_struct *mm)
732 {
733 	if  (!prange->actual_loc)
734 		return svm_migrate_ram_to_vram(prange, best_loc, mm);
735 	else
736 		return svm_migrate_vram_to_vram(prange, best_loc, mm);
737 
738 }
739 
740 /**
741  * svm_migrate_to_ram - CPU page fault handler
742  * @vmf: CPU vm fault vma, address
743  *
744  * Context: vm fault handler, caller holds the mmap read lock
745  *
746  * Return:
747  * 0 - OK
748  * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
749  */
750 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
751 {
752 	unsigned long addr = vmf->address;
753 	struct vm_area_struct *vma;
754 	enum svm_work_list_ops op;
755 	struct svm_range *parent;
756 	struct svm_range *prange;
757 	struct kfd_process *p;
758 	struct mm_struct *mm;
759 	int r = 0;
760 
761 	vma = vmf->vma;
762 	mm = vma->vm_mm;
763 
764 	p = kfd_lookup_process_by_mm(vma->vm_mm);
765 	if (!p) {
766 		pr_debug("failed find process at fault address 0x%lx\n", addr);
767 		return VM_FAULT_SIGBUS;
768 	}
769 	addr >>= PAGE_SHIFT;
770 	pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
771 
772 	mutex_lock(&p->svms.lock);
773 
774 	prange = svm_range_from_addr(&p->svms, addr, &parent);
775 	if (!prange) {
776 		pr_debug("cannot find svm range at 0x%lx\n", addr);
777 		r = -EFAULT;
778 		goto out;
779 	}
780 
781 	mutex_lock(&parent->migrate_mutex);
782 	if (prange != parent)
783 		mutex_lock_nested(&prange->migrate_mutex, 1);
784 
785 	if (!prange->actual_loc)
786 		goto out_unlock_prange;
787 
788 	svm_range_lock(parent);
789 	if (prange != parent)
790 		mutex_lock_nested(&prange->lock, 1);
791 	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
792 	if (prange != parent)
793 		mutex_unlock(&prange->lock);
794 	svm_range_unlock(parent);
795 	if (r) {
796 		pr_debug("failed %d to split range by granularity\n", r);
797 		goto out_unlock_prange;
798 	}
799 
800 	r = svm_migrate_vram_to_ram(prange, mm);
801 	if (r)
802 		pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
803 			 prange, prange->start, prange->last);
804 
805 	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
806 	if (p->xnack_enabled && parent == prange)
807 		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
808 	else
809 		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
810 	svm_range_add_list_work(&p->svms, parent, mm, op);
811 	schedule_deferred_list_work(&p->svms);
812 
813 out_unlock_prange:
814 	if (prange != parent)
815 		mutex_unlock(&prange->migrate_mutex);
816 	mutex_unlock(&parent->migrate_mutex);
817 out:
818 	mutex_unlock(&p->svms.lock);
819 	kfd_unref_process(p);
820 
821 	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
822 
823 	return r ? VM_FAULT_SIGBUS : 0;
824 }
825 
826 static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
827 	.page_free		= svm_migrate_page_free,
828 	.migrate_to_ram		= svm_migrate_to_ram,
829 };
830 
831 /* Each VRAM page uses sizeof(struct page) on system memory */
832 #define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
833 
834 int svm_migrate_init(struct amdgpu_device *adev)
835 {
836 	struct kfd_dev *kfddev = adev->kfd.dev;
837 	struct dev_pagemap *pgmap;
838 	struct resource *res;
839 	unsigned long size;
840 	void *r;
841 
842 	/* Page migration works on Vega10 or newer */
843 	if (kfddev->device_info->asic_family < CHIP_VEGA10)
844 		return -EINVAL;
845 
846 	pgmap = &kfddev->pgmap;
847 	memset(pgmap, 0, sizeof(*pgmap));
848 
849 	/* TODO: register all vram to HMM for now.
850 	 * should remove reserved size
851 	 */
852 	size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
853 	res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
854 	if (IS_ERR(res))
855 		return -ENOMEM;
856 
857 	pgmap->type = MEMORY_DEVICE_PRIVATE;
858 	pgmap->nr_range = 1;
859 	pgmap->range.start = res->start;
860 	pgmap->range.end = res->end;
861 	pgmap->ops = &svm_migrate_pgmap_ops;
862 	pgmap->owner = adev;
863 	pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
864 	r = devm_memremap_pages(adev->dev, pgmap);
865 	if (IS_ERR(r)) {
866 		pr_err("failed to register HMM device memory\n");
867 		devm_release_mem_region(adev->dev, res->start,
868 					res->end - res->start + 1);
869 		return PTR_ERR(r);
870 	}
871 
872 	pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
873 		 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
874 
875 	amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
876 
877 	pr_info("HMM registered %ldMB device memory\n", size >> 20);
878 
879 	return 0;
880 }
881 
882 void svm_migrate_fini(struct amdgpu_device *adev)
883 {
884 	struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
885 
886 	devm_memunmap_pages(adev->dev, pgmap);
887 	devm_release_mem_region(adev->dev, pgmap->range.start,
888 				pgmap->range.end - pgmap->range.start + 1);
889 }
890