1 /*
2  * Copyright 2014-2018 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #define pr_fmt(fmt) "kfd2kgd: " fmt
24 
25 #include <linux/list.h>
26 #include <drm/drmP.h>
27 #include "amdgpu_object.h"
28 #include "amdgpu_vm.h"
29 #include "amdgpu_amdkfd.h"
30 
31 /* Special VM and GART address alignment needed for VI pre-Fiji due to
32  * a HW bug.
33  */
34 #define VI_BO_SIZE_ALIGN (0x8000)
35 
36 /* Impose limit on how much memory KFD can use */
37 static struct {
38 	uint64_t max_system_mem_limit;
39 	int64_t system_mem_used;
40 	spinlock_t mem_limit_lock;
41 } kfd_mem_limit;
42 
43 /* Struct used for amdgpu_amdkfd_bo_validate */
44 struct amdgpu_vm_parser {
45 	uint32_t        domain;
46 	bool            wait;
47 };
48 
49 static const char * const domain_bit_to_string[] = {
50 		"CPU",
51 		"GTT",
52 		"VRAM",
53 		"GDS",
54 		"GWS",
55 		"OA"
56 };
57 
58 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
59 
60 
61 
62 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
63 {
64 	return (struct amdgpu_device *)kgd;
65 }
66 
67 static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
68 		struct kgd_mem *mem)
69 {
70 	struct kfd_bo_va_list *entry;
71 
72 	list_for_each_entry(entry, &mem->bo_va_list, bo_list)
73 		if (entry->bo_va->base.vm == avm)
74 			return false;
75 
76 	return true;
77 }
78 
79 /* Set memory usage limits. Current, limits are
80  *  System (kernel) memory - 3/8th System RAM
81  */
82 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
83 {
84 	struct sysinfo si;
85 	uint64_t mem;
86 
87 	si_meminfo(&si);
88 	mem = si.totalram - si.totalhigh;
89 	mem *= si.mem_unit;
90 
91 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
92 	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
93 	pr_debug("Kernel memory limit %lluM\n",
94 		(kfd_mem_limit.max_system_mem_limit >> 20));
95 }
96 
97 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
98 					      uint64_t size, u32 domain)
99 {
100 	size_t acc_size;
101 	int ret = 0;
102 
103 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
104 				       sizeof(struct amdgpu_bo));
105 
106 	spin_lock(&kfd_mem_limit.mem_limit_lock);
107 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
108 		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
109 			kfd_mem_limit.max_system_mem_limit) {
110 			ret = -ENOMEM;
111 			goto err_no_mem;
112 		}
113 		kfd_mem_limit.system_mem_used += (acc_size + size);
114 	}
115 err_no_mem:
116 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
117 	return ret;
118 }
119 
120 static void unreserve_system_mem_limit(struct amdgpu_device *adev,
121 				       uint64_t size, u32 domain)
122 {
123 	size_t acc_size;
124 
125 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
126 				       sizeof(struct amdgpu_bo));
127 
128 	spin_lock(&kfd_mem_limit.mem_limit_lock);
129 	if (domain == AMDGPU_GEM_DOMAIN_GTT)
130 		kfd_mem_limit.system_mem_used -= (acc_size + size);
131 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
132 		  "kfd system memory accounting unbalanced");
133 
134 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
135 }
136 
137 void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
138 {
139 	spin_lock(&kfd_mem_limit.mem_limit_lock);
140 
141 	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
142 		kfd_mem_limit.system_mem_used -=
143 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
144 	}
145 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
146 		  "kfd system memory accounting unbalanced");
147 
148 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
149 }
150 
151 
152 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
153  *  reservation object.
154  *
155  * @bo: [IN] Remove eviction fence(s) from this BO
156  * @ef: [IN] If ef is specified, then this eviction fence is removed if it
157  *  is present in the shared list.
158  * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
159  *  from BO's reservation object shared list.
160  * @ef_count: [OUT] Number of fences in ef_list.
161  *
162  * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
163  *  called to restore the eviction fences and to avoid memory leak. This is
164  *  useful for shared BOs.
165  * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
166  */
167 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
168 					struct amdgpu_amdkfd_fence *ef,
169 					struct amdgpu_amdkfd_fence ***ef_list,
170 					unsigned int *ef_count)
171 {
172 	struct reservation_object_list *fobj;
173 	struct reservation_object *resv;
174 	unsigned int i = 0, j = 0, k = 0, shared_count;
175 	unsigned int count = 0;
176 	struct amdgpu_amdkfd_fence **fence_list;
177 
178 	if (!ef && !ef_list)
179 		return -EINVAL;
180 
181 	if (ef_list) {
182 		*ef_list = NULL;
183 		*ef_count = 0;
184 	}
185 
186 	resv = bo->tbo.resv;
187 	fobj = reservation_object_get_list(resv);
188 
189 	if (!fobj)
190 		return 0;
191 
192 	preempt_disable();
193 	write_seqcount_begin(&resv->seq);
194 
195 	/* Go through all the shared fences in the resevation object. If
196 	 * ef is specified and it exists in the list, remove it and reduce the
197 	 * count. If ef is not specified, then get the count of eviction fences
198 	 * present.
199 	 */
200 	shared_count = fobj->shared_count;
201 	for (i = 0; i < shared_count; ++i) {
202 		struct dma_fence *f;
203 
204 		f = rcu_dereference_protected(fobj->shared[i],
205 					      reservation_object_held(resv));
206 
207 		if (ef) {
208 			if (f->context == ef->base.context) {
209 				dma_fence_put(f);
210 				fobj->shared_count--;
211 			} else {
212 				RCU_INIT_POINTER(fobj->shared[j++], f);
213 			}
214 		} else if (to_amdgpu_amdkfd_fence(f))
215 			count++;
216 	}
217 	write_seqcount_end(&resv->seq);
218 	preempt_enable();
219 
220 	if (ef || !count)
221 		return 0;
222 
223 	/* Alloc memory for count number of eviction fence pointers. Fill the
224 	 * ef_list array and ef_count
225 	 */
226 	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
227 			     GFP_KERNEL);
228 	if (!fence_list)
229 		return -ENOMEM;
230 
231 	preempt_disable();
232 	write_seqcount_begin(&resv->seq);
233 
234 	j = 0;
235 	for (i = 0; i < shared_count; ++i) {
236 		struct dma_fence *f;
237 		struct amdgpu_amdkfd_fence *efence;
238 
239 		f = rcu_dereference_protected(fobj->shared[i],
240 			reservation_object_held(resv));
241 
242 		efence = to_amdgpu_amdkfd_fence(f);
243 		if (efence) {
244 			fence_list[k++] = efence;
245 			fobj->shared_count--;
246 		} else {
247 			RCU_INIT_POINTER(fobj->shared[j++], f);
248 		}
249 	}
250 
251 	write_seqcount_end(&resv->seq);
252 	preempt_enable();
253 
254 	*ef_list = fence_list;
255 	*ef_count = k;
256 
257 	return 0;
258 }
259 
260 /* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
261  *  reservation object.
262  *
263  * @bo: [IN] Add eviction fences to this BO
264  * @ef_list: [IN] List of eviction fences to be added
265  * @ef_count: [IN] Number of fences in ef_list.
266  *
267  * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
268  *  function.
269  */
270 static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
271 				struct amdgpu_amdkfd_fence **ef_list,
272 				unsigned int ef_count)
273 {
274 	int i;
275 
276 	if (!ef_list || !ef_count)
277 		return;
278 
279 	for (i = 0; i < ef_count; i++) {
280 		amdgpu_bo_fence(bo, &ef_list[i]->base, true);
281 		/* Re-adding the fence takes an additional reference. Drop that
282 		 * reference.
283 		 */
284 		dma_fence_put(&ef_list[i]->base);
285 	}
286 
287 	kfree(ef_list);
288 }
289 
290 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
291 				     bool wait)
292 {
293 	struct ttm_operation_ctx ctx = { false, false };
294 	int ret;
295 
296 	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
297 		 "Called with userptr BO"))
298 		return -EINVAL;
299 
300 	amdgpu_ttm_placement_from_domain(bo, domain);
301 
302 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
303 	if (ret)
304 		goto validate_fail;
305 	if (wait) {
306 		struct amdgpu_amdkfd_fence **ef_list;
307 		unsigned int ef_count;
308 
309 		ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
310 							  &ef_count);
311 		if (ret)
312 			goto validate_fail;
313 
314 		ttm_bo_wait(&bo->tbo, false, false);
315 		amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
316 	}
317 
318 validate_fail:
319 	return ret;
320 }
321 
322 static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
323 {
324 	struct amdgpu_vm_parser *p = param;
325 
326 	return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
327 }
328 
329 /* vm_validate_pt_pd_bos - Validate page table and directory BOs
330  *
331  * Page directories are not updated here because huge page handling
332  * during page table updates can invalidate page directory entries
333  * again. Page directories are only updated after updating page
334  * tables.
335  */
336 static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
337 {
338 	struct amdgpu_bo *pd = vm->base.root.base.bo;
339 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
340 	struct amdgpu_vm_parser param;
341 	uint64_t addr, flags = AMDGPU_PTE_VALID;
342 	int ret;
343 
344 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
345 	param.wait = false;
346 
347 	ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
348 					&param);
349 	if (ret) {
350 		pr_err("amdgpu: failed to validate PT BOs\n");
351 		return ret;
352 	}
353 
354 	ret = amdgpu_amdkfd_validate(&param, pd);
355 	if (ret) {
356 		pr_err("amdgpu: failed to validate PD\n");
357 		return ret;
358 	}
359 
360 	addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
361 	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
362 	vm->pd_phys_addr = addr;
363 
364 	if (vm->base.use_cpu_for_update) {
365 		ret = amdgpu_bo_kmap(pd, NULL);
366 		if (ret) {
367 			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
368 			return ret;
369 		}
370 	}
371 
372 	return 0;
373 }
374 
375 static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
376 			 struct dma_fence *f)
377 {
378 	int ret = amdgpu_sync_fence(adev, sync, f, false);
379 
380 	/* Sync objects can't handle multiple GPUs (contexts) updating
381 	 * sync->last_vm_update. Fortunately we don't need it for
382 	 * KFD's purposes, so we can just drop that fence.
383 	 */
384 	if (sync->last_vm_update) {
385 		dma_fence_put(sync->last_vm_update);
386 		sync->last_vm_update = NULL;
387 	}
388 
389 	return ret;
390 }
391 
392 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
393 {
394 	struct amdgpu_bo *pd = vm->root.base.bo;
395 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
396 	int ret;
397 
398 	ret = amdgpu_vm_update_directories(adev, vm);
399 	if (ret)
400 		return ret;
401 
402 	return sync_vm_fence(adev, sync, vm->last_update);
403 }
404 
405 /* add_bo_to_vm - Add a BO to a VM
406  *
407  * Everything that needs to bo done only once when a BO is first added
408  * to a VM. It can later be mapped and unmapped many times without
409  * repeating these steps.
410  *
411  * 1. Allocate and initialize BO VA entry data structure
412  * 2. Add BO to the VM
413  * 3. Determine ASIC-specific PTE flags
414  * 4. Alloc page tables and directories if needed
415  * 4a.  Validate new page tables and directories
416  */
417 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
418 		struct amdgpu_vm *avm, bool is_aql,
419 		struct kfd_bo_va_list **p_bo_va_entry)
420 {
421 	int ret;
422 	struct kfd_bo_va_list *bo_va_entry;
423 	struct amdkfd_vm *kvm = container_of(avm,
424 					     struct amdkfd_vm, base);
425 	struct amdgpu_bo *pd = avm->root.base.bo;
426 	struct amdgpu_bo *bo = mem->bo;
427 	uint64_t va = mem->va;
428 	struct list_head *list_bo_va = &mem->bo_va_list;
429 	unsigned long bo_size = bo->tbo.mem.size;
430 
431 	if (!va) {
432 		pr_err("Invalid VA when adding BO to VM\n");
433 		return -EINVAL;
434 	}
435 
436 	if (is_aql)
437 		va += bo_size;
438 
439 	bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
440 	if (!bo_va_entry)
441 		return -ENOMEM;
442 
443 	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
444 			va + bo_size, avm);
445 
446 	/* Add BO to VM internal data structures*/
447 	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
448 	if (!bo_va_entry->bo_va) {
449 		ret = -EINVAL;
450 		pr_err("Failed to add BO object to VM. ret == %d\n",
451 				ret);
452 		goto err_vmadd;
453 	}
454 
455 	bo_va_entry->va = va;
456 	bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
457 							 mem->mapping_flags);
458 	bo_va_entry->kgd_dev = (void *)adev;
459 	list_add(&bo_va_entry->bo_list, list_bo_va);
460 
461 	if (p_bo_va_entry)
462 		*p_bo_va_entry = bo_va_entry;
463 
464 	/* Allocate new page tables if needed and validate
465 	 * them. Clearing of new page tables and validate need to wait
466 	 * on move fences. We don't want that to trigger the eviction
467 	 * fence, so remove it temporarily.
468 	 */
469 	amdgpu_amdkfd_remove_eviction_fence(pd,
470 					kvm->process_info->eviction_fence,
471 					NULL, NULL);
472 
473 	ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
474 	if (ret) {
475 		pr_err("Failed to allocate pts, err=%d\n", ret);
476 		goto err_alloc_pts;
477 	}
478 
479 	ret = vm_validate_pt_pd_bos(kvm);
480 	if (ret) {
481 		pr_err("validate_pt_pd_bos() failed\n");
482 		goto err_alloc_pts;
483 	}
484 
485 	/* Add the eviction fence back */
486 	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
487 
488 	return 0;
489 
490 err_alloc_pts:
491 	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
492 	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
493 	list_del(&bo_va_entry->bo_list);
494 err_vmadd:
495 	kfree(bo_va_entry);
496 	return ret;
497 }
498 
499 static void remove_bo_from_vm(struct amdgpu_device *adev,
500 		struct kfd_bo_va_list *entry, unsigned long size)
501 {
502 	pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
503 			entry->va,
504 			entry->va + size, entry);
505 	amdgpu_vm_bo_rmv(adev, entry->bo_va);
506 	list_del(&entry->bo_list);
507 	kfree(entry);
508 }
509 
510 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
511 				struct amdkfd_process_info *process_info)
512 {
513 	struct ttm_validate_buffer *entry = &mem->validate_list;
514 	struct amdgpu_bo *bo = mem->bo;
515 
516 	INIT_LIST_HEAD(&entry->head);
517 	entry->shared = true;
518 	entry->bo = &bo->tbo;
519 	mutex_lock(&process_info->lock);
520 	list_add_tail(&entry->head, &process_info->kfd_bo_list);
521 	mutex_unlock(&process_info->lock);
522 }
523 
524 /* Reserving a BO and its page table BOs must happen atomically to
525  * avoid deadlocks. Some operations update multiple VMs at once. Track
526  * all the reservation info in a context structure. Optionally a sync
527  * object can track VM updates.
528  */
529 struct bo_vm_reservation_context {
530 	struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
531 	unsigned int n_vms;		    /* Number of VMs reserved	    */
532 	struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
533 	struct ww_acquire_ctx ticket;	    /* Reservation ticket	    */
534 	struct list_head list, duplicates;  /* BO lists			    */
535 	struct amdgpu_sync *sync;	    /* Pointer to sync object	    */
536 	bool reserved;			    /* Whether BOs are reserved	    */
537 };
538 
539 enum bo_vm_match {
540 	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
541 	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
542 	BO_VM_ALL,		/* Match all VMs a BO was added to    */
543 };
544 
545 /**
546  * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
547  * @mem: KFD BO structure.
548  * @vm: the VM to reserve.
549  * @ctx: the struct that will be used in unreserve_bo_and_vms().
550  */
551 static int reserve_bo_and_vm(struct kgd_mem *mem,
552 			      struct amdgpu_vm *vm,
553 			      struct bo_vm_reservation_context *ctx)
554 {
555 	struct amdgpu_bo *bo = mem->bo;
556 	int ret;
557 
558 	WARN_ON(!vm);
559 
560 	ctx->reserved = false;
561 	ctx->n_vms = 1;
562 	ctx->sync = &mem->sync;
563 
564 	INIT_LIST_HEAD(&ctx->list);
565 	INIT_LIST_HEAD(&ctx->duplicates);
566 
567 	ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
568 	if (!ctx->vm_pd)
569 		return -ENOMEM;
570 
571 	ctx->kfd_bo.robj = bo;
572 	ctx->kfd_bo.priority = 0;
573 	ctx->kfd_bo.tv.bo = &bo->tbo;
574 	ctx->kfd_bo.tv.shared = true;
575 	ctx->kfd_bo.user_pages = NULL;
576 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
577 
578 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
579 
580 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
581 				     false, &ctx->duplicates);
582 	if (!ret)
583 		ctx->reserved = true;
584 	else {
585 		pr_err("Failed to reserve buffers in ttm\n");
586 		kfree(ctx->vm_pd);
587 		ctx->vm_pd = NULL;
588 	}
589 
590 	return ret;
591 }
592 
593 /**
594  * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
595  * @mem: KFD BO structure.
596  * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
597  * is used. Otherwise, a single VM associated with the BO.
598  * @map_type: the mapping status that will be used to filter the VMs.
599  * @ctx: the struct that will be used in unreserve_bo_and_vms().
600  *
601  * Returns 0 for success, negative for failure.
602  */
603 static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
604 				struct amdgpu_vm *vm, enum bo_vm_match map_type,
605 				struct bo_vm_reservation_context *ctx)
606 {
607 	struct amdgpu_bo *bo = mem->bo;
608 	struct kfd_bo_va_list *entry;
609 	unsigned int i;
610 	int ret;
611 
612 	ctx->reserved = false;
613 	ctx->n_vms = 0;
614 	ctx->vm_pd = NULL;
615 	ctx->sync = &mem->sync;
616 
617 	INIT_LIST_HEAD(&ctx->list);
618 	INIT_LIST_HEAD(&ctx->duplicates);
619 
620 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
621 		if ((vm && vm != entry->bo_va->base.vm) ||
622 			(entry->is_mapped != map_type
623 			&& map_type != BO_VM_ALL))
624 			continue;
625 
626 		ctx->n_vms++;
627 	}
628 
629 	if (ctx->n_vms != 0) {
630 		ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
631 				     GFP_KERNEL);
632 		if (!ctx->vm_pd)
633 			return -ENOMEM;
634 	}
635 
636 	ctx->kfd_bo.robj = bo;
637 	ctx->kfd_bo.priority = 0;
638 	ctx->kfd_bo.tv.bo = &bo->tbo;
639 	ctx->kfd_bo.tv.shared = true;
640 	ctx->kfd_bo.user_pages = NULL;
641 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
642 
643 	i = 0;
644 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
645 		if ((vm && vm != entry->bo_va->base.vm) ||
646 			(entry->is_mapped != map_type
647 			&& map_type != BO_VM_ALL))
648 			continue;
649 
650 		amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
651 				&ctx->vm_pd[i]);
652 		i++;
653 	}
654 
655 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
656 				     false, &ctx->duplicates);
657 	if (!ret)
658 		ctx->reserved = true;
659 	else
660 		pr_err("Failed to reserve buffers in ttm.\n");
661 
662 	if (ret) {
663 		kfree(ctx->vm_pd);
664 		ctx->vm_pd = NULL;
665 	}
666 
667 	return ret;
668 }
669 
670 /**
671  * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
672  * @ctx: Reservation context to unreserve
673  * @wait: Optionally wait for a sync object representing pending VM updates
674  * @intr: Whether the wait is interruptible
675  *
676  * Also frees any resources allocated in
677  * reserve_bo_and_(cond_)vm(s). Returns the status from
678  * amdgpu_sync_wait.
679  */
680 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
681 				 bool wait, bool intr)
682 {
683 	int ret = 0;
684 
685 	if (wait)
686 		ret = amdgpu_sync_wait(ctx->sync, intr);
687 
688 	if (ctx->reserved)
689 		ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
690 	kfree(ctx->vm_pd);
691 
692 	ctx->sync = NULL;
693 
694 	ctx->reserved = false;
695 	ctx->vm_pd = NULL;
696 
697 	return ret;
698 }
699 
700 static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
701 				struct kfd_bo_va_list *entry,
702 				struct amdgpu_sync *sync)
703 {
704 	struct amdgpu_bo_va *bo_va = entry->bo_va;
705 	struct amdgpu_vm *vm = bo_va->base.vm;
706 	struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
707 	struct amdgpu_bo *pd = vm->root.base.bo;
708 
709 	/* Remove eviction fence from PD (and thereby from PTs too as
710 	 * they share the resv. object). Otherwise during PT update
711 	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
712 	 * get added to job->sync object and job execution would
713 	 * trigger the eviction fence.
714 	 */
715 	amdgpu_amdkfd_remove_eviction_fence(pd,
716 					    kvm->process_info->eviction_fence,
717 					    NULL, NULL);
718 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
719 
720 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
721 
722 	/* Add the eviction fence back */
723 	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
724 
725 	sync_vm_fence(adev, sync, bo_va->last_pt_update);
726 
727 	return 0;
728 }
729 
730 static int update_gpuvm_pte(struct amdgpu_device *adev,
731 		struct kfd_bo_va_list *entry,
732 		struct amdgpu_sync *sync)
733 {
734 	int ret;
735 	struct amdgpu_vm *vm;
736 	struct amdgpu_bo_va *bo_va;
737 	struct amdgpu_bo *bo;
738 
739 	bo_va = entry->bo_va;
740 	vm = bo_va->base.vm;
741 	bo = bo_va->base.bo;
742 
743 	/* Update the page tables  */
744 	ret = amdgpu_vm_bo_update(adev, bo_va, false);
745 	if (ret) {
746 		pr_err("amdgpu_vm_bo_update failed\n");
747 		return ret;
748 	}
749 
750 	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
751 }
752 
753 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
754 		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
755 {
756 	int ret;
757 
758 	/* Set virtual address for the allocation */
759 	ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
760 			       amdgpu_bo_size(entry->bo_va->base.bo),
761 			       entry->pte_flags);
762 	if (ret) {
763 		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
764 				entry->va, ret);
765 		return ret;
766 	}
767 
768 	ret = update_gpuvm_pte(adev, entry, sync);
769 	if (ret) {
770 		pr_err("update_gpuvm_pte() failed\n");
771 		goto update_gpuvm_pte_failed;
772 	}
773 
774 	return 0;
775 
776 update_gpuvm_pte_failed:
777 	unmap_bo_from_gpuvm(adev, entry, sync);
778 	return ret;
779 }
780 
781 static int process_validate_vms(struct amdkfd_process_info *process_info)
782 {
783 	struct amdkfd_vm *peer_vm;
784 	int ret;
785 
786 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
787 			    vm_list_node) {
788 		ret = vm_validate_pt_pd_bos(peer_vm);
789 		if (ret)
790 			return ret;
791 	}
792 
793 	return 0;
794 }
795 
796 static int process_update_pds(struct amdkfd_process_info *process_info,
797 			      struct amdgpu_sync *sync)
798 {
799 	struct amdkfd_vm *peer_vm;
800 	int ret;
801 
802 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
803 			    vm_list_node) {
804 		ret = vm_update_pds(&peer_vm->base, sync);
805 		if (ret)
806 			return ret;
807 	}
808 
809 	return 0;
810 }
811 
812 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
813 					  void **process_info,
814 					  struct dma_fence **ef)
815 {
816 	int ret;
817 	struct amdkfd_vm *new_vm;
818 	struct amdkfd_process_info *info;
819 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
820 
821 	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
822 	if (!new_vm)
823 		return -ENOMEM;
824 
825 	/* Initialize the VM context, allocate the page directory and zero it */
826 	ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
827 	if (ret) {
828 		pr_err("Failed init vm ret %d\n", ret);
829 		goto vm_init_fail;
830 	}
831 	new_vm->adev = adev;
832 
833 	if (!*process_info) {
834 		info = kzalloc(sizeof(*info), GFP_KERNEL);
835 		if (!info) {
836 			ret = -ENOMEM;
837 			goto alloc_process_info_fail;
838 		}
839 
840 		mutex_init(&info->lock);
841 		INIT_LIST_HEAD(&info->vm_list_head);
842 		INIT_LIST_HEAD(&info->kfd_bo_list);
843 
844 		info->eviction_fence =
845 			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
846 						   current->mm);
847 		if (!info->eviction_fence) {
848 			pr_err("Failed to create eviction fence\n");
849 			goto create_evict_fence_fail;
850 		}
851 
852 		*process_info = info;
853 		*ef = dma_fence_get(&info->eviction_fence->base);
854 	}
855 
856 	new_vm->process_info = *process_info;
857 
858 	mutex_lock(&new_vm->process_info->lock);
859 	list_add_tail(&new_vm->vm_list_node,
860 			&(new_vm->process_info->vm_list_head));
861 	new_vm->process_info->n_vms++;
862 	mutex_unlock(&new_vm->process_info->lock);
863 
864 	*vm = (void *) new_vm;
865 
866 	pr_debug("Created process vm %p\n", *vm);
867 
868 	return ret;
869 
870 create_evict_fence_fail:
871 	mutex_destroy(&info->lock);
872 	kfree(info);
873 alloc_process_info_fail:
874 	amdgpu_vm_fini(adev, &new_vm->base);
875 vm_init_fail:
876 	kfree(new_vm);
877 	return ret;
878 
879 }
880 
881 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
882 {
883 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
884 	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
885 	struct amdgpu_vm *avm = &kfd_vm->base;
886 	struct amdgpu_bo *pd;
887 	struct amdkfd_process_info *process_info;
888 
889 	if (WARN_ON(!kgd || !vm))
890 		return;
891 
892 	pr_debug("Destroying process vm %p\n", vm);
893 	/* Release eviction fence from PD */
894 	pd = avm->root.base.bo;
895 	amdgpu_bo_reserve(pd, false);
896 	amdgpu_bo_fence(pd, NULL, false);
897 	amdgpu_bo_unreserve(pd);
898 
899 	process_info = kfd_vm->process_info;
900 
901 	mutex_lock(&process_info->lock);
902 	process_info->n_vms--;
903 	list_del(&kfd_vm->vm_list_node);
904 	mutex_unlock(&process_info->lock);
905 
906 	/* Release per-process resources */
907 	if (!process_info->n_vms) {
908 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
909 
910 		dma_fence_put(&process_info->eviction_fence->base);
911 		mutex_destroy(&process_info->lock);
912 		kfree(process_info);
913 	}
914 
915 	/* Release the VM context */
916 	amdgpu_vm_fini(adev, avm);
917 	kfree(vm);
918 }
919 
920 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
921 {
922 	struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
923 
924 	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
925 }
926 
927 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
928 		struct kgd_dev *kgd, uint64_t va, uint64_t size,
929 		void *vm, struct kgd_mem **mem,
930 		uint64_t *offset, uint32_t flags)
931 {
932 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
933 	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
934 	struct amdgpu_bo *bo;
935 	int byte_align;
936 	u32 alloc_domain;
937 	u64 alloc_flags;
938 	uint32_t mapping_flags;
939 	int ret;
940 
941 	/*
942 	 * Check on which domain to allocate BO
943 	 */
944 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
945 		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
946 		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
947 		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
948 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
949 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
950 	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
951 		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
952 		alloc_flags = 0;
953 	} else {
954 		return -EINVAL;
955 	}
956 
957 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
958 	if (!*mem)
959 		return -ENOMEM;
960 	INIT_LIST_HEAD(&(*mem)->bo_va_list);
961 	mutex_init(&(*mem)->lock);
962 	(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
963 
964 	/* Workaround for AQL queue wraparound bug. Map the same
965 	 * memory twice. That means we only actually allocate half
966 	 * the memory.
967 	 */
968 	if ((*mem)->aql_queue)
969 		size = size >> 1;
970 
971 	/* Workaround for TLB bug on older VI chips */
972 	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
973 			adev->asic_type != CHIP_FIJI &&
974 			adev->asic_type != CHIP_POLARIS10 &&
975 			adev->asic_type != CHIP_POLARIS11) ?
976 			VI_BO_SIZE_ALIGN : 1;
977 
978 	mapping_flags = AMDGPU_VM_PAGE_READABLE;
979 	if (flags & ALLOC_MEM_FLAGS_WRITABLE)
980 		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
981 	if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
982 		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
983 	if (flags & ALLOC_MEM_FLAGS_COHERENT)
984 		mapping_flags |= AMDGPU_VM_MTYPE_UC;
985 	else
986 		mapping_flags |= AMDGPU_VM_MTYPE_NC;
987 	(*mem)->mapping_flags = mapping_flags;
988 
989 	amdgpu_sync_create(&(*mem)->sync);
990 
991 	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
992 	if (ret) {
993 		pr_debug("Insufficient system memory\n");
994 		goto err_reserve_system_mem;
995 	}
996 
997 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
998 			va, size, domain_string(alloc_domain));
999 
1000 	ret = amdgpu_bo_create(adev, size, byte_align,
1001 				alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
1002 	if (ret) {
1003 		pr_debug("Failed to create BO on domain %s. ret %d\n",
1004 				domain_string(alloc_domain), ret);
1005 		goto err_bo_create;
1006 	}
1007 	bo->kfd_bo = *mem;
1008 	(*mem)->bo = bo;
1009 
1010 	(*mem)->va = va;
1011 	(*mem)->domain = alloc_domain;
1012 	(*mem)->mapped_to_gpu_memory = 0;
1013 	(*mem)->process_info = kfd_vm->process_info;
1014 	add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info);
1015 
1016 	if (offset)
1017 		*offset = amdgpu_bo_mmap_offset(bo);
1018 
1019 	return 0;
1020 
1021 err_bo_create:
1022 	unreserve_system_mem_limit(adev, size, alloc_domain);
1023 err_reserve_system_mem:
1024 	mutex_destroy(&(*mem)->lock);
1025 	kfree(*mem);
1026 	return ret;
1027 }
1028 
1029 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1030 		struct kgd_dev *kgd, struct kgd_mem *mem)
1031 {
1032 	struct amdkfd_process_info *process_info = mem->process_info;
1033 	unsigned long bo_size = mem->bo->tbo.mem.size;
1034 	struct kfd_bo_va_list *entry, *tmp;
1035 	struct bo_vm_reservation_context ctx;
1036 	struct ttm_validate_buffer *bo_list_entry;
1037 	int ret;
1038 
1039 	mutex_lock(&mem->lock);
1040 
1041 	if (mem->mapped_to_gpu_memory > 0) {
1042 		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1043 				mem->va, bo_size);
1044 		mutex_unlock(&mem->lock);
1045 		return -EBUSY;
1046 	}
1047 
1048 	mutex_unlock(&mem->lock);
1049 	/* lock is not needed after this, since mem is unused and will
1050 	 * be freed anyway
1051 	 */
1052 
1053 	/* Make sure restore workers don't access the BO any more */
1054 	bo_list_entry = &mem->validate_list;
1055 	mutex_lock(&process_info->lock);
1056 	list_del(&bo_list_entry->head);
1057 	mutex_unlock(&process_info->lock);
1058 
1059 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1060 	if (unlikely(ret))
1061 		return ret;
1062 
1063 	/* The eviction fence should be removed by the last unmap.
1064 	 * TODO: Log an error condition if the bo still has the eviction fence
1065 	 * attached
1066 	 */
1067 	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1068 					process_info->eviction_fence,
1069 					NULL, NULL);
1070 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1071 		mem->va + bo_size * (1 + mem->aql_queue));
1072 
1073 	/* Remove from VM internal data structures */
1074 	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
1075 		remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
1076 				entry, bo_size);
1077 
1078 	ret = unreserve_bo_and_vms(&ctx, false, false);
1079 
1080 	/* Free the sync object */
1081 	amdgpu_sync_free(&mem->sync);
1082 
1083 	/* Free the BO*/
1084 	amdgpu_bo_unref(&mem->bo);
1085 	mutex_destroy(&mem->lock);
1086 	kfree(mem);
1087 
1088 	return ret;
1089 }
1090 
1091 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1092 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1093 {
1094 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1095 	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
1096 	int ret;
1097 	struct amdgpu_bo *bo;
1098 	uint32_t domain;
1099 	struct kfd_bo_va_list *entry;
1100 	struct bo_vm_reservation_context ctx;
1101 	struct kfd_bo_va_list *bo_va_entry = NULL;
1102 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
1103 	unsigned long bo_size;
1104 
1105 	/* Make sure restore is not running concurrently.
1106 	 */
1107 	mutex_lock(&mem->process_info->lock);
1108 
1109 	mutex_lock(&mem->lock);
1110 
1111 	bo = mem->bo;
1112 
1113 	if (!bo) {
1114 		pr_err("Invalid BO when mapping memory to GPU\n");
1115 		ret = -EINVAL;
1116 		goto out;
1117 	}
1118 
1119 	domain = mem->domain;
1120 	bo_size = bo->tbo.mem.size;
1121 
1122 	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
1123 			mem->va,
1124 			mem->va + bo_size * (1 + mem->aql_queue),
1125 			vm, domain_string(domain));
1126 
1127 	ret = reserve_bo_and_vm(mem, vm, &ctx);
1128 	if (unlikely(ret))
1129 		goto out;
1130 
1131 	if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
1132 		ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
1133 				&bo_va_entry);
1134 		if (ret)
1135 			goto add_bo_to_vm_failed;
1136 		if (mem->aql_queue) {
1137 			ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
1138 					true, &bo_va_entry_aql);
1139 			if (ret)
1140 				goto add_bo_to_vm_failed_aql;
1141 		}
1142 	} else {
1143 		ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
1144 		if (unlikely(ret))
1145 			goto add_bo_to_vm_failed;
1146 	}
1147 
1148 	if (mem->mapped_to_gpu_memory == 0) {
1149 		/* Validate BO only once. The eviction fence gets added to BO
1150 		 * the first time it is mapped. Validate will wait for all
1151 		 * background evictions to complete.
1152 		 */
1153 		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
1154 		if (ret) {
1155 			pr_debug("Validate failed\n");
1156 			goto map_bo_to_gpuvm_failed;
1157 		}
1158 	}
1159 
1160 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1161 		if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
1162 			pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
1163 					entry->va, entry->va + bo_size,
1164 					entry);
1165 
1166 			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
1167 			if (ret) {
1168 				pr_err("Failed to map radeon bo to gpuvm\n");
1169 				goto map_bo_to_gpuvm_failed;
1170 			}
1171 
1172 			ret = vm_update_pds(vm, ctx.sync);
1173 			if (ret) {
1174 				pr_err("Failed to update page directories\n");
1175 				goto map_bo_to_gpuvm_failed;
1176 			}
1177 
1178 			entry->is_mapped = true;
1179 			mem->mapped_to_gpu_memory++;
1180 			pr_debug("\t INC mapping count %d\n",
1181 					mem->mapped_to_gpu_memory);
1182 		}
1183 	}
1184 
1185 	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
1186 		amdgpu_bo_fence(bo,
1187 				&kfd_vm->process_info->eviction_fence->base,
1188 				true);
1189 	ret = unreserve_bo_and_vms(&ctx, false, false);
1190 
1191 	goto out;
1192 
1193 map_bo_to_gpuvm_failed:
1194 	if (bo_va_entry_aql)
1195 		remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
1196 add_bo_to_vm_failed_aql:
1197 	if (bo_va_entry)
1198 		remove_bo_from_vm(adev, bo_va_entry, bo_size);
1199 add_bo_to_vm_failed:
1200 	unreserve_bo_and_vms(&ctx, false, false);
1201 out:
1202 	mutex_unlock(&mem->process_info->lock);
1203 	mutex_unlock(&mem->lock);
1204 	return ret;
1205 }
1206 
1207 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1208 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1209 {
1210 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
1211 	struct amdkfd_process_info *process_info =
1212 		((struct amdkfd_vm *)vm)->process_info;
1213 	unsigned long bo_size = mem->bo->tbo.mem.size;
1214 	struct kfd_bo_va_list *entry;
1215 	struct bo_vm_reservation_context ctx;
1216 	int ret;
1217 
1218 	mutex_lock(&mem->lock);
1219 
1220 	ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
1221 	if (unlikely(ret))
1222 		goto out;
1223 	/* If no VMs were reserved, it means the BO wasn't actually mapped */
1224 	if (ctx.n_vms == 0) {
1225 		ret = -EINVAL;
1226 		goto unreserve_out;
1227 	}
1228 
1229 	ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
1230 	if (unlikely(ret))
1231 		goto unreserve_out;
1232 
1233 	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
1234 		mem->va,
1235 		mem->va + bo_size * (1 + mem->aql_queue),
1236 		vm);
1237 
1238 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1239 		if (entry->bo_va->base.vm == vm && entry->is_mapped) {
1240 			pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
1241 					entry->va,
1242 					entry->va + bo_size,
1243 					entry);
1244 
1245 			ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
1246 			if (ret == 0) {
1247 				entry->is_mapped = false;
1248 			} else {
1249 				pr_err("failed to unmap VA 0x%llx\n",
1250 						mem->va);
1251 				goto unreserve_out;
1252 			}
1253 
1254 			mem->mapped_to_gpu_memory--;
1255 			pr_debug("\t DEC mapping count %d\n",
1256 					mem->mapped_to_gpu_memory);
1257 		}
1258 	}
1259 
1260 	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
1261 	 * required.
1262 	 */
1263 	if (mem->mapped_to_gpu_memory == 0 &&
1264 	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
1265 		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1266 						process_info->eviction_fence,
1267 						    NULL, NULL);
1268 
1269 unreserve_out:
1270 	unreserve_bo_and_vms(&ctx, false, false);
1271 out:
1272 	mutex_unlock(&mem->lock);
1273 	return ret;
1274 }
1275 
1276 int amdgpu_amdkfd_gpuvm_sync_memory(
1277 		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
1278 {
1279 	struct amdgpu_sync sync;
1280 	int ret;
1281 
1282 	amdgpu_sync_create(&sync);
1283 
1284 	mutex_lock(&mem->lock);
1285 	amdgpu_sync_clone(&mem->sync, &sync);
1286 	mutex_unlock(&mem->lock);
1287 
1288 	ret = amdgpu_sync_wait(&sync, intr);
1289 	amdgpu_sync_free(&sync);
1290 	return ret;
1291 }
1292 
1293 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
1294 		struct kgd_mem *mem, void **kptr, uint64_t *size)
1295 {
1296 	int ret;
1297 	struct amdgpu_bo *bo = mem->bo;
1298 
1299 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1300 		pr_err("userptr can't be mapped to kernel\n");
1301 		return -EINVAL;
1302 	}
1303 
1304 	/* delete kgd_mem from kfd_bo_list to avoid re-validating
1305 	 * this BO in BO's restoring after eviction.
1306 	 */
1307 	mutex_lock(&mem->process_info->lock);
1308 
1309 	ret = amdgpu_bo_reserve(bo, true);
1310 	if (ret) {
1311 		pr_err("Failed to reserve bo. ret %d\n", ret);
1312 		goto bo_reserve_failed;
1313 	}
1314 
1315 	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
1316 	if (ret) {
1317 		pr_err("Failed to pin bo. ret %d\n", ret);
1318 		goto pin_failed;
1319 	}
1320 
1321 	ret = amdgpu_bo_kmap(bo, kptr);
1322 	if (ret) {
1323 		pr_err("Failed to map bo to kernel. ret %d\n", ret);
1324 		goto kmap_failed;
1325 	}
1326 
1327 	amdgpu_amdkfd_remove_eviction_fence(
1328 		bo, mem->process_info->eviction_fence, NULL, NULL);
1329 	list_del_init(&mem->validate_list.head);
1330 
1331 	if (size)
1332 		*size = amdgpu_bo_size(bo);
1333 
1334 	amdgpu_bo_unreserve(bo);
1335 
1336 	mutex_unlock(&mem->process_info->lock);
1337 	return 0;
1338 
1339 kmap_failed:
1340 	amdgpu_bo_unpin(bo);
1341 pin_failed:
1342 	amdgpu_bo_unreserve(bo);
1343 bo_reserve_failed:
1344 	mutex_unlock(&mem->process_info->lock);
1345 
1346 	return ret;
1347 }
1348 
1349 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
1350  *   KFD process identified by process_info
1351  *
1352  * @process_info: amdkfd_process_info of the KFD process
1353  *
1354  * After memory eviction, restore thread calls this function. The function
1355  * should be called when the Process is still valid. BO restore involves -
1356  *
1357  * 1.  Release old eviction fence and create new one
1358  * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
1359  * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
1360  *     BOs that need to be reserved.
1361  * 4.  Reserve all the BOs
1362  * 5.  Validate of PD and PT BOs.
1363  * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
1364  * 7.  Add fence to all PD and PT BOs.
1365  * 8.  Unreserve all BOs
1366  */
1367 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1368 {
1369 	struct amdgpu_bo_list_entry *pd_bo_list;
1370 	struct amdkfd_process_info *process_info = info;
1371 	struct amdkfd_vm *peer_vm;
1372 	struct kgd_mem *mem;
1373 	struct bo_vm_reservation_context ctx;
1374 	struct amdgpu_amdkfd_fence *new_fence;
1375 	int ret = 0, i;
1376 	struct list_head duplicate_save;
1377 	struct amdgpu_sync sync_obj;
1378 
1379 	INIT_LIST_HEAD(&duplicate_save);
1380 	INIT_LIST_HEAD(&ctx.list);
1381 	INIT_LIST_HEAD(&ctx.duplicates);
1382 
1383 	pd_bo_list = kcalloc(process_info->n_vms,
1384 			     sizeof(struct amdgpu_bo_list_entry),
1385 			     GFP_KERNEL);
1386 	if (!pd_bo_list)
1387 		return -ENOMEM;
1388 
1389 	i = 0;
1390 	mutex_lock(&process_info->lock);
1391 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1392 			vm_list_node)
1393 		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
1394 				    &pd_bo_list[i++]);
1395 
1396 	/* Reserve all BOs and page tables/directory. Add all BOs from
1397 	 * kfd_bo_list to ctx.list
1398 	 */
1399 	list_for_each_entry(mem, &process_info->kfd_bo_list,
1400 			    validate_list.head) {
1401 
1402 		list_add_tail(&mem->resv_list.head, &ctx.list);
1403 		mem->resv_list.bo = mem->validate_list.bo;
1404 		mem->resv_list.shared = mem->validate_list.shared;
1405 	}
1406 
1407 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
1408 				     false, &duplicate_save);
1409 	if (ret) {
1410 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
1411 		goto ttm_reserve_fail;
1412 	}
1413 
1414 	amdgpu_sync_create(&sync_obj);
1415 
1416 	/* Validate PDs and PTs */
1417 	ret = process_validate_vms(process_info);
1418 	if (ret)
1419 		goto validate_map_fail;
1420 
1421 	/* Wait for PD/PTs validate to finish */
1422 	/* FIXME: I think this isn't needed */
1423 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1424 			    vm_list_node) {
1425 		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
1426 
1427 		ttm_bo_wait(&bo->tbo, false, false);
1428 	}
1429 
1430 	/* Validate BOs and map them to GPUVM (update VM page tables). */
1431 	list_for_each_entry(mem, &process_info->kfd_bo_list,
1432 			    validate_list.head) {
1433 
1434 		struct amdgpu_bo *bo = mem->bo;
1435 		uint32_t domain = mem->domain;
1436 		struct kfd_bo_va_list *bo_va_entry;
1437 
1438 		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
1439 		if (ret) {
1440 			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
1441 			goto validate_map_fail;
1442 		}
1443 
1444 		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
1445 				    bo_list) {
1446 			ret = update_gpuvm_pte((struct amdgpu_device *)
1447 					      bo_va_entry->kgd_dev,
1448 					      bo_va_entry,
1449 					      &sync_obj);
1450 			if (ret) {
1451 				pr_debug("Memory eviction: update PTE failed. Try again\n");
1452 				goto validate_map_fail;
1453 			}
1454 		}
1455 	}
1456 
1457 	/* Update page directories */
1458 	ret = process_update_pds(process_info, &sync_obj);
1459 	if (ret) {
1460 		pr_debug("Memory eviction: update PDs failed. Try again\n");
1461 		goto validate_map_fail;
1462 	}
1463 
1464 	amdgpu_sync_wait(&sync_obj, false);
1465 
1466 	/* Release old eviction fence and create new one, because fence only
1467 	 * goes from unsignaled to signaled, fence cannot be reused.
1468 	 * Use context and mm from the old fence.
1469 	 */
1470 	new_fence = amdgpu_amdkfd_fence_create(
1471 				process_info->eviction_fence->base.context,
1472 				process_info->eviction_fence->mm);
1473 	if (!new_fence) {
1474 		pr_err("Failed to create eviction fence\n");
1475 		ret = -ENOMEM;
1476 		goto validate_map_fail;
1477 	}
1478 	dma_fence_put(&process_info->eviction_fence->base);
1479 	process_info->eviction_fence = new_fence;
1480 	*ef = dma_fence_get(&new_fence->base);
1481 
1482 	/* Wait for validate to finish and attach new eviction fence */
1483 	list_for_each_entry(mem, &process_info->kfd_bo_list,
1484 		validate_list.head)
1485 		ttm_bo_wait(&mem->bo->tbo, false, false);
1486 	list_for_each_entry(mem, &process_info->kfd_bo_list,
1487 		validate_list.head)
1488 		amdgpu_bo_fence(mem->bo,
1489 			&process_info->eviction_fence->base, true);
1490 
1491 	/* Attach eviction fence to PD / PT BOs */
1492 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
1493 			    vm_list_node) {
1494 		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
1495 
1496 		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
1497 	}
1498 
1499 validate_map_fail:
1500 	ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
1501 	amdgpu_sync_free(&sync_obj);
1502 ttm_reserve_fail:
1503 	mutex_unlock(&process_info->lock);
1504 	kfree(pd_bo_list);
1505 	return ret;
1506 }
1507