1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/dma-fence-array.h>
29 #include <linux/interval_tree_generic.h>
30 #include <drm/drmP.h>
31 #include <drm/amdgpu_drm.h>
32 #include "amdgpu.h"
33 #include "amdgpu_trace.h"
34 
35 /*
36  * GPUVM
37  * GPUVM is similar to the legacy gart on older asics, however
38  * rather than there being a single global gart table
39  * for the entire GPU, there are multiple VM page tables active
40  * at any given time.  The VM page tables can contain a mix
41  * vram pages and system memory pages and system memory pages
42  * can be mapped as snooped (cached system pages) or unsnooped
43  * (uncached system pages).
44  * Each VM has an ID associated with it and there is a page table
45  * associated with each VMID.  When execting a command buffer,
46  * the kernel tells the the ring what VMID to use for that command
47  * buffer.  VMIDs are allocated dynamically as commands are submitted.
48  * The userspace drivers maintain their own address space and the kernel
49  * sets up their pages tables accordingly when they submit their
50  * command buffers and a VMID is assigned.
51  * Cayman/Trinity support up to 8 active VMs at any given time;
52  * SI supports 16.
53  */
54 
55 #define START(node) ((node)->start)
56 #define LAST(node) ((node)->last)
57 
58 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
59 		     START, LAST, static, amdgpu_vm_it)
60 
61 #undef START
62 #undef LAST
63 
64 /* Local structure. Encapsulate some VM table update parameters to reduce
65  * the number of function parameters
66  */
67 struct amdgpu_pte_update_params {
68 	/* amdgpu device we do this update for */
69 	struct amdgpu_device *adev;
70 	/* optional amdgpu_vm we do this update for */
71 	struct amdgpu_vm *vm;
72 	/* address where to copy page table entries from */
73 	uint64_t src;
74 	/* indirect buffer to fill with commands */
75 	struct amdgpu_ib *ib;
76 	/* Function which actually does the update */
77 	void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
78 		     uint64_t addr, unsigned count, uint32_t incr,
79 		     uint64_t flags);
80 	/* The next two are used during VM update by CPU
81 	 *  DMA addresses to use for mapping
82 	 *  Kernel pointer of PD/PT BO that needs to be updated
83 	 */
84 	dma_addr_t *pages_addr;
85 	void *kptr;
86 };
87 
88 /* Helper to disable partial resident texture feature from a fence callback */
89 struct amdgpu_prt_cb {
90 	struct amdgpu_device *adev;
91 	struct dma_fence_cb cb;
92 };
93 
94 /**
95  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
96  *
97  * @adev: amdgpu_device pointer
98  *
99  * Calculate the number of entries in a page directory or page table.
100  */
101 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
102 				      unsigned level)
103 {
104 	if (level == 0)
105 		/* For the root directory */
106 		return adev->vm_manager.max_pfn >>
107 			(adev->vm_manager.block_size *
108 			 adev->vm_manager.num_level);
109 	else if (level == adev->vm_manager.num_level)
110 		/* For the page tables on the leaves */
111 		return AMDGPU_VM_PTE_COUNT(adev);
112 	else
113 		/* Everything in between */
114 		return 1 << adev->vm_manager.block_size;
115 }
116 
117 /**
118  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
119  *
120  * @adev: amdgpu_device pointer
121  *
122  * Calculate the size of the BO for a page directory or page table in bytes.
123  */
124 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
125 {
126 	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
127 }
128 
129 /**
130  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
131  *
132  * @vm: vm providing the BOs
133  * @validated: head of validation list
134  * @entry: entry to add
135  *
136  * Add the page directory to the list of BOs to
137  * validate for command submission.
138  */
139 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
140 			 struct list_head *validated,
141 			 struct amdgpu_bo_list_entry *entry)
142 {
143 	entry->robj = vm->root.bo;
144 	entry->priority = 0;
145 	entry->tv.bo = &entry->robj->tbo;
146 	entry->tv.shared = true;
147 	entry->user_pages = NULL;
148 	list_add(&entry->tv.head, validated);
149 }
150 
151 /**
152  * amdgpu_vm_validate_layer - validate a single page table level
153  *
154  * @parent: parent page table level
155  * @validate: callback to do the validation
156  * @param: parameter for the validation callback
157  *
158  * Validate the page table BOs on command submission if neccessary.
159  */
160 static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
161 				    int (*validate)(void *, struct amdgpu_bo *),
162 				    void *param, bool use_cpu_for_update,
163 				    struct ttm_bo_global *glob)
164 {
165 	unsigned i;
166 	int r;
167 
168 	if (parent->bo->shadow) {
169 		struct amdgpu_bo *shadow = parent->bo->shadow;
170 
171 		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
172 		if (r)
173 			return r;
174 	}
175 
176 	if (use_cpu_for_update) {
177 		r = amdgpu_bo_kmap(parent->bo, NULL);
178 		if (r)
179 			return r;
180 	}
181 
182 	if (!parent->entries)
183 		return 0;
184 
185 	for (i = 0; i <= parent->last_entry_used; ++i) {
186 		struct amdgpu_vm_pt *entry = &parent->entries[i];
187 
188 		if (!entry->bo)
189 			continue;
190 
191 		r = validate(param, entry->bo);
192 		if (r)
193 			return r;
194 
195 		spin_lock(&glob->lru_lock);
196 		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
197 		if (entry->bo->shadow)
198 			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
199 		spin_unlock(&glob->lru_lock);
200 
201 		/*
202 		 * Recurse into the sub directory. This is harmless because we
203 		 * have only a maximum of 5 layers.
204 		 */
205 		r = amdgpu_vm_validate_level(entry, validate, param,
206 					     use_cpu_for_update, glob);
207 		if (r)
208 			return r;
209 	}
210 
211 	return r;
212 }
213 
214 /**
215  * amdgpu_vm_validate_pt_bos - validate the page table BOs
216  *
217  * @adev: amdgpu device pointer
218  * @vm: vm providing the BOs
219  * @validate: callback to do the validation
220  * @param: parameter for the validation callback
221  *
222  * Validate the page table BOs on command submission if neccessary.
223  */
224 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
225 			      int (*validate)(void *p, struct amdgpu_bo *bo),
226 			      void *param)
227 {
228 	uint64_t num_evictions;
229 
230 	/* We only need to validate the page tables
231 	 * if they aren't already valid.
232 	 */
233 	num_evictions = atomic64_read(&adev->num_evictions);
234 	if (num_evictions == vm->last_eviction_counter)
235 		return 0;
236 
237 	return amdgpu_vm_validate_level(&vm->root, validate, param,
238 					vm->use_cpu_for_update,
239 					adev->mman.bdev.glob);
240 }
241 
242 /**
243  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
244  *
245  * @adev: amdgpu_device pointer
246  * @vm: requested vm
247  * @saddr: start of the address range
248  * @eaddr: end of the address range
249  *
250  * Make sure the page directories and page tables are allocated
251  */
252 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
253 				  struct amdgpu_vm *vm,
254 				  struct amdgpu_vm_pt *parent,
255 				  uint64_t saddr, uint64_t eaddr,
256 				  unsigned level)
257 {
258 	unsigned shift = (adev->vm_manager.num_level - level) *
259 		adev->vm_manager.block_size;
260 	unsigned pt_idx, from, to;
261 	int r;
262 	u64 flags;
263 	uint64_t init_value = 0;
264 
265 	if (!parent->entries) {
266 		unsigned num_entries = amdgpu_vm_num_entries(adev, level);
267 
268 		parent->entries = kvmalloc_array(num_entries,
269 						   sizeof(struct amdgpu_vm_pt),
270 						   GFP_KERNEL | __GFP_ZERO);
271 		if (!parent->entries)
272 			return -ENOMEM;
273 		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
274 	}
275 
276 	from = saddr >> shift;
277 	to = eaddr >> shift;
278 	if (from >= amdgpu_vm_num_entries(adev, level) ||
279 	    to >= amdgpu_vm_num_entries(adev, level))
280 		return -EINVAL;
281 
282 	if (to > parent->last_entry_used)
283 		parent->last_entry_used = to;
284 
285 	++level;
286 	saddr = saddr & ((1 << shift) - 1);
287 	eaddr = eaddr & ((1 << shift) - 1);
288 
289 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
290 			AMDGPU_GEM_CREATE_VRAM_CLEARED;
291 	if (vm->use_cpu_for_update)
292 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
293 	else
294 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
295 				AMDGPU_GEM_CREATE_SHADOW);
296 
297 	if (vm->pte_support_ats) {
298 		init_value = AMDGPU_PTE_SYSTEM;
299 		if (level != adev->vm_manager.num_level - 1)
300 			init_value |= AMDGPU_PDE_PTE;
301 	}
302 
303 	/* walk over the address space and allocate the page tables */
304 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
305 		struct reservation_object *resv = vm->root.bo->tbo.resv;
306 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
307 		struct amdgpu_bo *pt;
308 
309 		if (!entry->bo) {
310 			r = amdgpu_bo_create(adev,
311 					     amdgpu_vm_bo_size(adev, level),
312 					     AMDGPU_GPU_PAGE_SIZE, true,
313 					     AMDGPU_GEM_DOMAIN_VRAM,
314 					     flags,
315 					     NULL, resv, init_value, &pt);
316 			if (r)
317 				return r;
318 
319 			if (vm->use_cpu_for_update) {
320 				r = amdgpu_bo_kmap(pt, NULL);
321 				if (r) {
322 					amdgpu_bo_unref(&pt);
323 					return r;
324 				}
325 			}
326 
327 			/* Keep a reference to the root directory to avoid
328 			* freeing them up in the wrong order.
329 			*/
330 			pt->parent = amdgpu_bo_ref(vm->root.bo);
331 
332 			entry->bo = pt;
333 			entry->addr = 0;
334 		}
335 
336 		if (level < adev->vm_manager.num_level) {
337 			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
338 			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
339 				((1 << shift) - 1);
340 			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
341 						   sub_eaddr, level);
342 			if (r)
343 				return r;
344 		}
345 	}
346 
347 	return 0;
348 }
349 
350 /**
351  * amdgpu_vm_alloc_pts - Allocate page tables.
352  *
353  * @adev: amdgpu_device pointer
354  * @vm: VM to allocate page tables for
355  * @saddr: Start address which needs to be allocated
356  * @size: Size from start address we need.
357  *
358  * Make sure the page tables are allocated.
359  */
360 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
361 			struct amdgpu_vm *vm,
362 			uint64_t saddr, uint64_t size)
363 {
364 	uint64_t last_pfn;
365 	uint64_t eaddr;
366 
367 	/* validate the parameters */
368 	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
369 		return -EINVAL;
370 
371 	eaddr = saddr + size - 1;
372 	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
373 	if (last_pfn >= adev->vm_manager.max_pfn) {
374 		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
375 			last_pfn, adev->vm_manager.max_pfn);
376 		return -EINVAL;
377 	}
378 
379 	saddr /= AMDGPU_GPU_PAGE_SIZE;
380 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
381 
382 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
383 }
384 
385 /**
386  * amdgpu_vm_had_gpu_reset - check if reset occured since last use
387  *
388  * @adev: amdgpu_device pointer
389  * @id: VMID structure
390  *
391  * Check if GPU reset occured since last use of the VMID.
392  */
393 static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
394 				    struct amdgpu_vm_id *id)
395 {
396 	return id->current_gpu_reset_count !=
397 		atomic_read(&adev->gpu_reset_counter);
398 }
399 
400 static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
401 {
402 	return !!vm->reserved_vmid[vmhub];
403 }
404 
405 /* idr_mgr->lock must be held */
406 static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
407 					       struct amdgpu_ring *ring,
408 					       struct amdgpu_sync *sync,
409 					       struct dma_fence *fence,
410 					       struct amdgpu_job *job)
411 {
412 	struct amdgpu_device *adev = ring->adev;
413 	unsigned vmhub = ring->funcs->vmhub;
414 	uint64_t fence_context = adev->fence_context + ring->idx;
415 	struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
416 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
417 	struct dma_fence *updates = sync->last_vm_update;
418 	int r = 0;
419 	struct dma_fence *flushed, *tmp;
420 	bool needs_flush = vm->use_cpu_for_update;
421 
422 	flushed  = id->flushed_updates;
423 	if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
424 	    (atomic64_read(&id->owner) != vm->client_id) ||
425 	    (job->vm_pd_addr != id->pd_gpu_addr) ||
426 	    (updates && (!flushed || updates->context != flushed->context ||
427 			dma_fence_is_later(updates, flushed))) ||
428 	    (!id->last_flush || (id->last_flush->context != fence_context &&
429 				 !dma_fence_is_signaled(id->last_flush)))) {
430 		needs_flush = true;
431 		/* to prevent one context starved by another context */
432 		id->pd_gpu_addr = 0;
433 		tmp = amdgpu_sync_peek_fence(&id->active, ring);
434 		if (tmp) {
435 			r = amdgpu_sync_fence(adev, sync, tmp);
436 			return r;
437 		}
438 	}
439 
440 	/* Good we can use this VMID. Remember this submission as
441 	* user of the VMID.
442 	*/
443 	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
444 	if (r)
445 		goto out;
446 
447 	if (updates && (!flushed || updates->context != flushed->context ||
448 			dma_fence_is_later(updates, flushed))) {
449 		dma_fence_put(id->flushed_updates);
450 		id->flushed_updates = dma_fence_get(updates);
451 	}
452 	id->pd_gpu_addr = job->vm_pd_addr;
453 	atomic64_set(&id->owner, vm->client_id);
454 	job->vm_needs_flush = needs_flush;
455 	if (needs_flush) {
456 		dma_fence_put(id->last_flush);
457 		id->last_flush = NULL;
458 	}
459 	job->vm_id = id - id_mgr->ids;
460 	trace_amdgpu_vm_grab_id(vm, ring, job);
461 out:
462 	return r;
463 }
464 
465 /**
466  * amdgpu_vm_grab_id - allocate the next free VMID
467  *
468  * @vm: vm to allocate id for
469  * @ring: ring we want to submit job to
470  * @sync: sync object where we add dependencies
471  * @fence: fence protecting ID from reuse
472  *
473  * Allocate an id for the vm, adding fences to the sync obj as necessary.
474  */
475 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
476 		      struct amdgpu_sync *sync, struct dma_fence *fence,
477 		      struct amdgpu_job *job)
478 {
479 	struct amdgpu_device *adev = ring->adev;
480 	unsigned vmhub = ring->funcs->vmhub;
481 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
482 	uint64_t fence_context = adev->fence_context + ring->idx;
483 	struct dma_fence *updates = sync->last_vm_update;
484 	struct amdgpu_vm_id *id, *idle;
485 	struct dma_fence **fences;
486 	unsigned i;
487 	int r = 0;
488 
489 	mutex_lock(&id_mgr->lock);
490 	if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
491 		r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
492 		mutex_unlock(&id_mgr->lock);
493 		return r;
494 	}
495 	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
496 	if (!fences) {
497 		mutex_unlock(&id_mgr->lock);
498 		return -ENOMEM;
499 	}
500 	/* Check if we have an idle VMID */
501 	i = 0;
502 	list_for_each_entry(idle, &id_mgr->ids_lru, list) {
503 		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
504 		if (!fences[i])
505 			break;
506 		++i;
507 	}
508 
509 	/* If we can't find a idle VMID to use, wait till one becomes available */
510 	if (&idle->list == &id_mgr->ids_lru) {
511 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
512 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
513 		struct dma_fence_array *array;
514 		unsigned j;
515 
516 		for (j = 0; j < i; ++j)
517 			dma_fence_get(fences[j]);
518 
519 		array = dma_fence_array_create(i, fences, fence_context,
520 					   seqno, true);
521 		if (!array) {
522 			for (j = 0; j < i; ++j)
523 				dma_fence_put(fences[j]);
524 			kfree(fences);
525 			r = -ENOMEM;
526 			goto error;
527 		}
528 
529 
530 		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
531 		dma_fence_put(&array->base);
532 		if (r)
533 			goto error;
534 
535 		mutex_unlock(&id_mgr->lock);
536 		return 0;
537 
538 	}
539 	kfree(fences);
540 
541 	job->vm_needs_flush = vm->use_cpu_for_update;
542 	/* Check if we can use a VMID already assigned to this VM */
543 	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
544 		struct dma_fence *flushed;
545 		bool needs_flush = vm->use_cpu_for_update;
546 
547 		/* Check all the prerequisites to using this VMID */
548 		if (amdgpu_vm_had_gpu_reset(adev, id))
549 			continue;
550 
551 		if (atomic64_read(&id->owner) != vm->client_id)
552 			continue;
553 
554 		if (job->vm_pd_addr != id->pd_gpu_addr)
555 			continue;
556 
557 		if (!id->last_flush ||
558 		    (id->last_flush->context != fence_context &&
559 		     !dma_fence_is_signaled(id->last_flush)))
560 			needs_flush = true;
561 
562 		flushed  = id->flushed_updates;
563 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
564 			needs_flush = true;
565 
566 		/* Concurrent flushes are only possible starting with Vega10 */
567 		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
568 			continue;
569 
570 		/* Good we can use this VMID. Remember this submission as
571 		 * user of the VMID.
572 		 */
573 		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
574 		if (r)
575 			goto error;
576 
577 		if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
578 			dma_fence_put(id->flushed_updates);
579 			id->flushed_updates = dma_fence_get(updates);
580 		}
581 
582 		if (needs_flush)
583 			goto needs_flush;
584 		else
585 			goto no_flush_needed;
586 
587 	};
588 
589 	/* Still no ID to use? Then use the idle one found earlier */
590 	id = idle;
591 
592 	/* Remember this submission as user of the VMID */
593 	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
594 	if (r)
595 		goto error;
596 
597 	id->pd_gpu_addr = job->vm_pd_addr;
598 	dma_fence_put(id->flushed_updates);
599 	id->flushed_updates = dma_fence_get(updates);
600 	atomic64_set(&id->owner, vm->client_id);
601 
602 needs_flush:
603 	job->vm_needs_flush = true;
604 	dma_fence_put(id->last_flush);
605 	id->last_flush = NULL;
606 
607 no_flush_needed:
608 	list_move_tail(&id->list, &id_mgr->ids_lru);
609 
610 	job->vm_id = id - id_mgr->ids;
611 	trace_amdgpu_vm_grab_id(vm, ring, job);
612 
613 error:
614 	mutex_unlock(&id_mgr->lock);
615 	return r;
616 }
617 
618 static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
619 					  struct amdgpu_vm *vm,
620 					  unsigned vmhub)
621 {
622 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
623 
624 	mutex_lock(&id_mgr->lock);
625 	if (vm->reserved_vmid[vmhub]) {
626 		list_add(&vm->reserved_vmid[vmhub]->list,
627 			&id_mgr->ids_lru);
628 		vm->reserved_vmid[vmhub] = NULL;
629 		atomic_dec(&id_mgr->reserved_vmid_num);
630 	}
631 	mutex_unlock(&id_mgr->lock);
632 }
633 
634 static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
635 					 struct amdgpu_vm *vm,
636 					 unsigned vmhub)
637 {
638 	struct amdgpu_vm_id_manager *id_mgr;
639 	struct amdgpu_vm_id *idle;
640 	int r = 0;
641 
642 	id_mgr = &adev->vm_manager.id_mgr[vmhub];
643 	mutex_lock(&id_mgr->lock);
644 	if (vm->reserved_vmid[vmhub])
645 		goto unlock;
646 	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
647 	    AMDGPU_VM_MAX_RESERVED_VMID) {
648 		DRM_ERROR("Over limitation of reserved vmid\n");
649 		atomic_dec(&id_mgr->reserved_vmid_num);
650 		r = -EINVAL;
651 		goto unlock;
652 	}
653 	/* Select the first entry VMID */
654 	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
655 	list_del_init(&idle->list);
656 	vm->reserved_vmid[vmhub] = idle;
657 	mutex_unlock(&id_mgr->lock);
658 
659 	return 0;
660 unlock:
661 	mutex_unlock(&id_mgr->lock);
662 	return r;
663 }
664 
665 /**
666  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
667  *
668  * @adev: amdgpu_device pointer
669  */
670 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
671 {
672 	const struct amdgpu_ip_block *ip_block;
673 	bool has_compute_vm_bug;
674 	struct amdgpu_ring *ring;
675 	int i;
676 
677 	has_compute_vm_bug = false;
678 
679 	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
680 	if (ip_block) {
681 		/* Compute has a VM bug for GFX version < 7.
682 		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
683 		if (ip_block->version->major <= 7)
684 			has_compute_vm_bug = true;
685 		else if (ip_block->version->major == 8)
686 			if (adev->gfx.mec_fw_version < 673)
687 				has_compute_vm_bug = true;
688 	}
689 
690 	for (i = 0; i < adev->num_rings; i++) {
691 		ring = adev->rings[i];
692 		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
693 			/* only compute rings */
694 			ring->has_compute_vm_bug = has_compute_vm_bug;
695 		else
696 			ring->has_compute_vm_bug = false;
697 	}
698 }
699 
700 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
701 				  struct amdgpu_job *job)
702 {
703 	struct amdgpu_device *adev = ring->adev;
704 	unsigned vmhub = ring->funcs->vmhub;
705 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
706 	struct amdgpu_vm_id *id;
707 	bool gds_switch_needed;
708 	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
709 
710 	if (job->vm_id == 0)
711 		return false;
712 	id = &id_mgr->ids[job->vm_id];
713 	gds_switch_needed = ring->funcs->emit_gds_switch && (
714 		id->gds_base != job->gds_base ||
715 		id->gds_size != job->gds_size ||
716 		id->gws_base != job->gws_base ||
717 		id->gws_size != job->gws_size ||
718 		id->oa_base != job->oa_base ||
719 		id->oa_size != job->oa_size);
720 
721 	if (amdgpu_vm_had_gpu_reset(adev, id))
722 		return true;
723 
724 	return vm_flush_needed || gds_switch_needed;
725 }
726 
727 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
728 {
729 	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
730 }
731 
732 /**
733  * amdgpu_vm_flush - hardware flush the vm
734  *
735  * @ring: ring to use for flush
736  * @vm_id: vmid number to use
737  * @pd_addr: address of the page directory
738  *
739  * Emit a VM flush when it is necessary.
740  */
741 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
742 {
743 	struct amdgpu_device *adev = ring->adev;
744 	unsigned vmhub = ring->funcs->vmhub;
745 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
746 	struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
747 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
748 		id->gds_base != job->gds_base ||
749 		id->gds_size != job->gds_size ||
750 		id->gws_base != job->gws_base ||
751 		id->gws_size != job->gws_size ||
752 		id->oa_base != job->oa_base ||
753 		id->oa_size != job->oa_size);
754 	bool vm_flush_needed = job->vm_needs_flush;
755 	unsigned patch_offset = 0;
756 	int r;
757 
758 	if (amdgpu_vm_had_gpu_reset(adev, id)) {
759 		gds_switch_needed = true;
760 		vm_flush_needed = true;
761 	}
762 
763 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
764 		return 0;
765 
766 	if (ring->funcs->init_cond_exec)
767 		patch_offset = amdgpu_ring_init_cond_exec(ring);
768 
769 	if (need_pipe_sync)
770 		amdgpu_ring_emit_pipeline_sync(ring);
771 
772 	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
773 		struct dma_fence *fence;
774 
775 		trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
776 		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
777 
778 		r = amdgpu_fence_emit(ring, &fence);
779 		if (r)
780 			return r;
781 
782 		mutex_lock(&id_mgr->lock);
783 		dma_fence_put(id->last_flush);
784 		id->last_flush = fence;
785 		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
786 		mutex_unlock(&id_mgr->lock);
787 	}
788 
789 	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
790 		id->gds_base = job->gds_base;
791 		id->gds_size = job->gds_size;
792 		id->gws_base = job->gws_base;
793 		id->gws_size = job->gws_size;
794 		id->oa_base = job->oa_base;
795 		id->oa_size = job->oa_size;
796 		amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
797 					    job->gds_size, job->gws_base,
798 					    job->gws_size, job->oa_base,
799 					    job->oa_size);
800 	}
801 
802 	if (ring->funcs->patch_cond_exec)
803 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
804 
805 	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
806 	if (ring->funcs->emit_switch_buffer) {
807 		amdgpu_ring_emit_switch_buffer(ring);
808 		amdgpu_ring_emit_switch_buffer(ring);
809 	}
810 	return 0;
811 }
812 
813 /**
814  * amdgpu_vm_reset_id - reset VMID to zero
815  *
816  * @adev: amdgpu device structure
817  * @vm_id: vmid number to use
818  *
819  * Reset saved GDW, GWS and OA to force switch on next flush.
820  */
821 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
822 			unsigned vmid)
823 {
824 	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
825 	struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
826 
827 	atomic64_set(&id->owner, 0);
828 	id->gds_base = 0;
829 	id->gds_size = 0;
830 	id->gws_base = 0;
831 	id->gws_size = 0;
832 	id->oa_base = 0;
833 	id->oa_size = 0;
834 }
835 
836 /**
837  * amdgpu_vm_reset_all_id - reset VMID to zero
838  *
839  * @adev: amdgpu device structure
840  *
841  * Reset VMID to force flush on next use
842  */
843 void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
844 {
845 	unsigned i, j;
846 
847 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
848 		struct amdgpu_vm_id_manager *id_mgr =
849 			&adev->vm_manager.id_mgr[i];
850 
851 		for (j = 1; j < id_mgr->num_ids; ++j)
852 			amdgpu_vm_reset_id(adev, i, j);
853 	}
854 }
855 
856 /**
857  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
858  *
859  * @vm: requested vm
860  * @bo: requested buffer object
861  *
862  * Find @bo inside the requested vm.
863  * Search inside the @bos vm list for the requested vm
864  * Returns the found bo_va or NULL if none is found
865  *
866  * Object has to be reserved!
867  */
868 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
869 				       struct amdgpu_bo *bo)
870 {
871 	struct amdgpu_bo_va *bo_va;
872 
873 	list_for_each_entry(bo_va, &bo->va, base.bo_list) {
874 		if (bo_va->base.vm == vm) {
875 			return bo_va;
876 		}
877 	}
878 	return NULL;
879 }
880 
881 /**
882  * amdgpu_vm_do_set_ptes - helper to call the right asic function
883  *
884  * @params: see amdgpu_pte_update_params definition
885  * @pe: addr of the page entry
886  * @addr: dst addr to write into pe
887  * @count: number of page entries to update
888  * @incr: increase next addr by incr bytes
889  * @flags: hw access flags
890  *
891  * Traces the parameters and calls the right asic functions
892  * to setup the page table using the DMA.
893  */
894 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
895 				  uint64_t pe, uint64_t addr,
896 				  unsigned count, uint32_t incr,
897 				  uint64_t flags)
898 {
899 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
900 
901 	if (count < 3) {
902 		amdgpu_vm_write_pte(params->adev, params->ib, pe,
903 				    addr | flags, count, incr);
904 
905 	} else {
906 		amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
907 				      count, incr, flags);
908 	}
909 }
910 
911 /**
912  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
913  *
914  * @params: see amdgpu_pte_update_params definition
915  * @pe: addr of the page entry
916  * @addr: dst addr to write into pe
917  * @count: number of page entries to update
918  * @incr: increase next addr by incr bytes
919  * @flags: hw access flags
920  *
921  * Traces the parameters and calls the DMA function to copy the PTEs.
922  */
923 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
924 				   uint64_t pe, uint64_t addr,
925 				   unsigned count, uint32_t incr,
926 				   uint64_t flags)
927 {
928 	uint64_t src = (params->src + (addr >> 12) * 8);
929 
930 
931 	trace_amdgpu_vm_copy_ptes(pe, src, count);
932 
933 	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
934 }
935 
936 /**
937  * amdgpu_vm_map_gart - Resolve gart mapping of addr
938  *
939  * @pages_addr: optional DMA address to use for lookup
940  * @addr: the unmapped addr
941  *
942  * Look up the physical address of the page that the pte resolves
943  * to and return the pointer for the page table entry.
944  */
945 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
946 {
947 	uint64_t result;
948 
949 	/* page table offset */
950 	result = pages_addr[addr >> PAGE_SHIFT];
951 
952 	/* in case cpu page size != gpu page size*/
953 	result |= addr & (~PAGE_MASK);
954 
955 	result &= 0xFFFFFFFFFFFFF000ULL;
956 
957 	return result;
958 }
959 
960 /**
961  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
962  *
963  * @params: see amdgpu_pte_update_params definition
964  * @pe: kmap addr of the page entry
965  * @addr: dst addr to write into pe
966  * @count: number of page entries to update
967  * @incr: increase next addr by incr bytes
968  * @flags: hw access flags
969  *
970  * Write count number of PT/PD entries directly.
971  */
972 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
973 				   uint64_t pe, uint64_t addr,
974 				   unsigned count, uint32_t incr,
975 				   uint64_t flags)
976 {
977 	unsigned int i;
978 	uint64_t value;
979 
980 	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
981 
982 	for (i = 0; i < count; i++) {
983 		value = params->pages_addr ?
984 			amdgpu_vm_map_gart(params->pages_addr, addr) :
985 			addr;
986 		amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
987 					i, value, flags);
988 		addr += incr;
989 	}
990 }
991 
992 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
993 			     void *owner)
994 {
995 	struct amdgpu_sync sync;
996 	int r;
997 
998 	amdgpu_sync_create(&sync);
999 	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
1000 	r = amdgpu_sync_wait(&sync, true);
1001 	amdgpu_sync_free(&sync);
1002 
1003 	return r;
1004 }
1005 
1006 /*
1007  * amdgpu_vm_update_level - update a single level in the hierarchy
1008  *
1009  * @adev: amdgpu_device pointer
1010  * @vm: requested vm
1011  * @parent: parent directory
1012  *
1013  * Makes sure all entries in @parent are up to date.
1014  * Returns 0 for success, error for failure.
1015  */
1016 static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1017 				  struct amdgpu_vm *vm,
1018 				  struct amdgpu_vm_pt *parent,
1019 				  unsigned level)
1020 {
1021 	struct amdgpu_bo *shadow;
1022 	struct amdgpu_ring *ring = NULL;
1023 	uint64_t pd_addr, shadow_addr = 0;
1024 	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
1025 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
1026 	unsigned count = 0, pt_idx, ndw = 0;
1027 	struct amdgpu_job *job;
1028 	struct amdgpu_pte_update_params params;
1029 	struct dma_fence *fence = NULL;
1030 
1031 	int r;
1032 
1033 	if (!parent->entries)
1034 		return 0;
1035 
1036 	memset(&params, 0, sizeof(params));
1037 	params.adev = adev;
1038 	shadow = parent->bo->shadow;
1039 
1040 	if (vm->use_cpu_for_update) {
1041 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
1042 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
1043 		if (unlikely(r))
1044 			return r;
1045 
1046 		params.func = amdgpu_vm_cpu_set_ptes;
1047 	} else {
1048 		ring = container_of(vm->entity.sched, struct amdgpu_ring,
1049 				    sched);
1050 
1051 		/* padding, etc. */
1052 		ndw = 64;
1053 
1054 		/* assume the worst case */
1055 		ndw += parent->last_entry_used * 6;
1056 
1057 		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
1058 
1059 		if (shadow) {
1060 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
1061 			ndw *= 2;
1062 		} else {
1063 			shadow_addr = 0;
1064 		}
1065 
1066 		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1067 		if (r)
1068 			return r;
1069 
1070 		params.ib = &job->ibs[0];
1071 		params.func = amdgpu_vm_do_set_ptes;
1072 	}
1073 
1074 
1075 	/* walk over the address space and update the directory */
1076 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1077 		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
1078 		uint64_t pde, pt;
1079 
1080 		if (bo == NULL)
1081 			continue;
1082 
1083 		pt = amdgpu_bo_gpu_offset(bo);
1084 		pt = amdgpu_gart_get_vm_pde(adev, pt);
1085 		/* Don't update huge pages here */
1086 		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
1087 		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
1088 			continue;
1089 
1090 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
1091 
1092 		pde = pd_addr + pt_idx * 8;
1093 		if (((last_pde + 8 * count) != pde) ||
1094 		    ((last_pt + incr * count) != pt) ||
1095 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
1096 
1097 			if (count) {
1098 				if (shadow)
1099 					params.func(&params,
1100 						    last_shadow,
1101 						    last_pt, count,
1102 						    incr,
1103 						    AMDGPU_PTE_VALID);
1104 
1105 				params.func(&params, last_pde,
1106 					    last_pt, count, incr,
1107 					    AMDGPU_PTE_VALID);
1108 			}
1109 
1110 			count = 1;
1111 			last_pde = pde;
1112 			last_shadow = shadow_addr + pt_idx * 8;
1113 			last_pt = pt;
1114 		} else {
1115 			++count;
1116 		}
1117 	}
1118 
1119 	if (count) {
1120 		if (vm->root.bo->shadow)
1121 			params.func(&params, last_shadow, last_pt,
1122 				    count, incr, AMDGPU_PTE_VALID);
1123 
1124 		params.func(&params, last_pde, last_pt,
1125 			    count, incr, AMDGPU_PTE_VALID);
1126 	}
1127 
1128 	if (!vm->use_cpu_for_update) {
1129 		if (params.ib->length_dw == 0) {
1130 			amdgpu_job_free(job);
1131 		} else {
1132 			amdgpu_ring_pad_ib(ring, params.ib);
1133 			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
1134 					 AMDGPU_FENCE_OWNER_VM);
1135 			if (shadow)
1136 				amdgpu_sync_resv(adev, &job->sync,
1137 						 shadow->tbo.resv,
1138 						 AMDGPU_FENCE_OWNER_VM);
1139 
1140 			WARN_ON(params.ib->length_dw > ndw);
1141 			r = amdgpu_job_submit(job, ring, &vm->entity,
1142 					AMDGPU_FENCE_OWNER_VM, &fence);
1143 			if (r)
1144 				goto error_free;
1145 
1146 			amdgpu_bo_fence(parent->bo, fence, true);
1147 			dma_fence_put(vm->last_dir_update);
1148 			vm->last_dir_update = dma_fence_get(fence);
1149 			dma_fence_put(fence);
1150 		}
1151 	}
1152 	/*
1153 	 * Recurse into the subdirectories. This recursion is harmless because
1154 	 * we only have a maximum of 5 layers.
1155 	 */
1156 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1157 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1158 
1159 		if (!entry->bo)
1160 			continue;
1161 
1162 		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
1163 		if (r)
1164 			return r;
1165 	}
1166 
1167 	return 0;
1168 
1169 error_free:
1170 	amdgpu_job_free(job);
1171 	return r;
1172 }
1173 
1174 /*
1175  * amdgpu_vm_invalidate_level - mark all PD levels as invalid
1176  *
1177  * @parent: parent PD
1178  *
1179  * Mark all PD level as invalid after an error.
1180  */
1181 static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
1182 {
1183 	unsigned pt_idx;
1184 
1185 	/*
1186 	 * Recurse into the subdirectories. This recursion is harmless because
1187 	 * we only have a maximum of 5 layers.
1188 	 */
1189 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1190 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1191 
1192 		if (!entry->bo)
1193 			continue;
1194 
1195 		entry->addr = ~0ULL;
1196 		amdgpu_vm_invalidate_level(entry);
1197 	}
1198 }
1199 
1200 /*
1201  * amdgpu_vm_update_directories - make sure that all directories are valid
1202  *
1203  * @adev: amdgpu_device pointer
1204  * @vm: requested vm
1205  *
1206  * Makes sure all directories are up to date.
1207  * Returns 0 for success, error for failure.
1208  */
1209 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1210 				 struct amdgpu_vm *vm)
1211 {
1212 	int r;
1213 
1214 	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
1215 	if (r)
1216 		amdgpu_vm_invalidate_level(&vm->root);
1217 
1218 	if (vm->use_cpu_for_update) {
1219 		/* Flush HDP */
1220 		mb();
1221 		amdgpu_gart_flush_gpu_tlb(adev, 0);
1222 	}
1223 
1224 	return r;
1225 }
1226 
1227 /**
1228  * amdgpu_vm_find_entry - find the entry for an address
1229  *
1230  * @p: see amdgpu_pte_update_params definition
1231  * @addr: virtual address in question
1232  * @entry: resulting entry or NULL
1233  * @parent: parent entry
1234  *
1235  * Find the vm_pt entry and it's parent for the given address.
1236  */
1237 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1238 			 struct amdgpu_vm_pt **entry,
1239 			 struct amdgpu_vm_pt **parent)
1240 {
1241 	unsigned idx, level = p->adev->vm_manager.num_level;
1242 
1243 	*parent = NULL;
1244 	*entry = &p->vm->root;
1245 	while ((*entry)->entries) {
1246 		idx = addr >> (p->adev->vm_manager.block_size * level--);
1247 		idx %= amdgpu_bo_size((*entry)->bo) / 8;
1248 		*parent = *entry;
1249 		*entry = &(*entry)->entries[idx];
1250 	}
1251 
1252 	if (level)
1253 		*entry = NULL;
1254 }
1255 
1256 /**
1257  * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1258  *
1259  * @p: see amdgpu_pte_update_params definition
1260  * @entry: vm_pt entry to check
1261  * @parent: parent entry
1262  * @nptes: number of PTEs updated with this operation
1263  * @dst: destination address where the PTEs should point to
1264  * @flags: access flags fro the PTEs
1265  *
1266  * Check if we can update the PD with a huge page.
1267  */
1268 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1269 					struct amdgpu_vm_pt *entry,
1270 					struct amdgpu_vm_pt *parent,
1271 					unsigned nptes, uint64_t dst,
1272 					uint64_t flags)
1273 {
1274 	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
1275 	uint64_t pd_addr, pde;
1276 
1277 	/* In the case of a mixed PT the PDE must point to it*/
1278 	if (p->adev->asic_type < CHIP_VEGA10 ||
1279 	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
1280 	    p->func == amdgpu_vm_do_copy_ptes ||
1281 	    !(flags & AMDGPU_PTE_VALID)) {
1282 
1283 		dst = amdgpu_bo_gpu_offset(entry->bo);
1284 		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
1285 		flags = AMDGPU_PTE_VALID;
1286 	} else {
1287 		/* Set the huge page flag to stop scanning at this PDE */
1288 		flags |= AMDGPU_PDE_PTE;
1289 	}
1290 
1291 	if (entry->addr == (dst | flags))
1292 		return;
1293 
1294 	entry->addr = (dst | flags);
1295 
1296 	if (use_cpu_update) {
1297 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
1298 		pde = pd_addr + (entry - parent->entries) * 8;
1299 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
1300 	} else {
1301 		if (parent->bo->shadow) {
1302 			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
1303 			pde = pd_addr + (entry - parent->entries) * 8;
1304 			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
1305 		}
1306 		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
1307 		pde = pd_addr + (entry - parent->entries) * 8;
1308 		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
1309 	}
1310 }
1311 
1312 /**
1313  * amdgpu_vm_update_ptes - make sure that page tables are valid
1314  *
1315  * @params: see amdgpu_pte_update_params definition
1316  * @vm: requested vm
1317  * @start: start of GPU address range
1318  * @end: end of GPU address range
1319  * @dst: destination address to map to, the next dst inside the function
1320  * @flags: mapping flags
1321  *
1322  * Update the page tables in the range @start - @end.
1323  * Returns 0 for success, -EINVAL for failure.
1324  */
1325 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1326 				  uint64_t start, uint64_t end,
1327 				  uint64_t dst, uint64_t flags)
1328 {
1329 	struct amdgpu_device *adev = params->adev;
1330 	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1331 
1332 	uint64_t addr, pe_start;
1333 	struct amdgpu_bo *pt;
1334 	unsigned nptes;
1335 	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
1336 
1337 	/* walk over the address space and update the page tables */
1338 	for (addr = start; addr < end; addr += nptes,
1339 	     dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1340 		struct amdgpu_vm_pt *entry, *parent;
1341 
1342 		amdgpu_vm_get_entry(params, addr, &entry, &parent);
1343 		if (!entry)
1344 			return -ENOENT;
1345 
1346 		if ((addr & ~mask) == (end & ~mask))
1347 			nptes = end - addr;
1348 		else
1349 			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1350 
1351 		amdgpu_vm_handle_huge_pages(params, entry, parent,
1352 					    nptes, dst, flags);
1353 		/* We don't need to update PTEs for huge pages */
1354 		if (entry->addr & AMDGPU_PDE_PTE)
1355 			continue;
1356 
1357 		pt = entry->bo;
1358 		if (use_cpu_update) {
1359 			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
1360 		} else {
1361 			if (pt->shadow) {
1362 				pe_start = amdgpu_bo_gpu_offset(pt->shadow);
1363 				pe_start += (addr & mask) * 8;
1364 				params->func(params, pe_start, dst, nptes,
1365 					     AMDGPU_GPU_PAGE_SIZE, flags);
1366 			}
1367 			pe_start = amdgpu_bo_gpu_offset(pt);
1368 		}
1369 
1370 		pe_start += (addr & mask) * 8;
1371 		params->func(params, pe_start, dst, nptes,
1372 			     AMDGPU_GPU_PAGE_SIZE, flags);
1373 	}
1374 
1375 	return 0;
1376 }
1377 
1378 /*
1379  * amdgpu_vm_frag_ptes - add fragment information to PTEs
1380  *
1381  * @params: see amdgpu_pte_update_params definition
1382  * @vm: requested vm
1383  * @start: first PTE to handle
1384  * @end: last PTE to handle
1385  * @dst: addr those PTEs should point to
1386  * @flags: hw mapping flags
1387  * Returns 0 for success, -EINVAL for failure.
1388  */
1389 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
1390 				uint64_t start, uint64_t end,
1391 				uint64_t dst, uint64_t flags)
1392 {
1393 	int r;
1394 
1395 	/**
1396 	 * The MC L1 TLB supports variable sized pages, based on a fragment
1397 	 * field in the PTE. When this field is set to a non-zero value, page
1398 	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1399 	 * flags are considered valid for all PTEs within the fragment range
1400 	 * and corresponding mappings are assumed to be physically contiguous.
1401 	 *
1402 	 * The L1 TLB can store a single PTE for the whole fragment,
1403 	 * significantly increasing the space available for translation
1404 	 * caching. This leads to large improvements in throughput when the
1405 	 * TLB is under pressure.
1406 	 *
1407 	 * The L2 TLB distributes small and large fragments into two
1408 	 * asymmetric partitions. The large fragment cache is significantly
1409 	 * larger. Thus, we try to use large fragments wherever possible.
1410 	 * Userspace can support this by aligning virtual base address and
1411 	 * allocation size to the fragment size.
1412 	 */
1413 	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
1414 	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
1415 	uint64_t frag_align = 1 << pages_per_frag;
1416 
1417 	uint64_t frag_start = ALIGN(start, frag_align);
1418 	uint64_t frag_end = end & ~(frag_align - 1);
1419 
1420 	/* system pages are non continuously */
1421 	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
1422 	    (frag_start >= frag_end))
1423 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1424 
1425 	/* handle the 4K area at the beginning */
1426 	if (start != frag_start) {
1427 		r = amdgpu_vm_update_ptes(params, start, frag_start,
1428 					  dst, flags);
1429 		if (r)
1430 			return r;
1431 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
1432 	}
1433 
1434 	/* handle the area in the middle */
1435 	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
1436 				  flags | frag_flags);
1437 	if (r)
1438 		return r;
1439 
1440 	/* handle the 4K area at the end */
1441 	if (frag_end != end) {
1442 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
1443 		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
1444 	}
1445 	return r;
1446 }
1447 
1448 /**
1449  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1450  *
1451  * @adev: amdgpu_device pointer
1452  * @exclusive: fence we need to sync to
1453  * @src: address where to copy page table entries from
1454  * @pages_addr: DMA addresses to use for mapping
1455  * @vm: requested vm
1456  * @start: start of mapped range
1457  * @last: last mapped entry
1458  * @flags: flags for the entries
1459  * @addr: addr to set the area to
1460  * @fence: optional resulting fence
1461  *
1462  * Fill in the page table entries between @start and @last.
1463  * Returns 0 for success, -EINVAL for failure.
1464  */
1465 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1466 				       struct dma_fence *exclusive,
1467 				       uint64_t src,
1468 				       dma_addr_t *pages_addr,
1469 				       struct amdgpu_vm *vm,
1470 				       uint64_t start, uint64_t last,
1471 				       uint64_t flags, uint64_t addr,
1472 				       struct dma_fence **fence)
1473 {
1474 	struct amdgpu_ring *ring;
1475 	void *owner = AMDGPU_FENCE_OWNER_VM;
1476 	unsigned nptes, ncmds, ndw;
1477 	struct amdgpu_job *job;
1478 	struct amdgpu_pte_update_params params;
1479 	struct dma_fence *f = NULL;
1480 	int r;
1481 
1482 	memset(&params, 0, sizeof(params));
1483 	params.adev = adev;
1484 	params.vm = vm;
1485 	params.src = src;
1486 
1487 	/* sync to everything on unmapping */
1488 	if (!(flags & AMDGPU_PTE_VALID))
1489 		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1490 
1491 	if (vm->use_cpu_for_update) {
1492 		/* params.src is used as flag to indicate system Memory */
1493 		if (pages_addr)
1494 			params.src = ~0;
1495 
1496 		/* Wait for PT BOs to be free. PTs share the same resv. object
1497 		 * as the root PD BO
1498 		 */
1499 		r = amdgpu_vm_wait_pd(adev, vm, owner);
1500 		if (unlikely(r))
1501 			return r;
1502 
1503 		params.func = amdgpu_vm_cpu_set_ptes;
1504 		params.pages_addr = pages_addr;
1505 		return amdgpu_vm_frag_ptes(&params, start, last + 1,
1506 					   addr, flags);
1507 	}
1508 
1509 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1510 
1511 	nptes = last - start + 1;
1512 
1513 	/*
1514 	 * reserve space for one command every (1 << BLOCK_SIZE)
1515 	 *  entries or 2k dwords (whatever is smaller)
1516 	 */
1517 	ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1;
1518 
1519 	/* padding, etc. */
1520 	ndw = 64;
1521 
1522 	/* one PDE write for each huge page */
1523 	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
1524 
1525 	if (src) {
1526 		/* only copy commands needed */
1527 		ndw += ncmds * 7;
1528 
1529 		params.func = amdgpu_vm_do_copy_ptes;
1530 
1531 	} else if (pages_addr) {
1532 		/* copy commands needed */
1533 		ndw += ncmds * 7;
1534 
1535 		/* and also PTEs */
1536 		ndw += nptes * 2;
1537 
1538 		params.func = amdgpu_vm_do_copy_ptes;
1539 
1540 	} else {
1541 		/* set page commands needed */
1542 		ndw += ncmds * 10;
1543 
1544 		/* two extra commands for begin/end of fragment */
1545 		ndw += 2 * 10;
1546 
1547 		params.func = amdgpu_vm_do_set_ptes;
1548 	}
1549 
1550 	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1551 	if (r)
1552 		return r;
1553 
1554 	params.ib = &job->ibs[0];
1555 
1556 	if (!src && pages_addr) {
1557 		uint64_t *pte;
1558 		unsigned i;
1559 
1560 		/* Put the PTEs at the end of the IB. */
1561 		i = ndw - nptes * 2;
1562 		pte= (uint64_t *)&(job->ibs->ptr[i]);
1563 		params.src = job->ibs->gpu_addr + i * 4;
1564 
1565 		for (i = 0; i < nptes; ++i) {
1566 			pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1567 						    AMDGPU_GPU_PAGE_SIZE);
1568 			pte[i] |= flags;
1569 		}
1570 		addr = 0;
1571 	}
1572 
1573 	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
1574 	if (r)
1575 		goto error_free;
1576 
1577 	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
1578 			     owner);
1579 	if (r)
1580 		goto error_free;
1581 
1582 	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
1583 	if (r)
1584 		goto error_free;
1585 
1586 	r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1587 	if (r)
1588 		goto error_free;
1589 
1590 	amdgpu_ring_pad_ib(ring, params.ib);
1591 	WARN_ON(params.ib->length_dw > ndw);
1592 	r = amdgpu_job_submit(job, ring, &vm->entity,
1593 			      AMDGPU_FENCE_OWNER_VM, &f);
1594 	if (r)
1595 		goto error_free;
1596 
1597 	amdgpu_bo_fence(vm->root.bo, f, true);
1598 	dma_fence_put(*fence);
1599 	*fence = f;
1600 	return 0;
1601 
1602 error_free:
1603 	amdgpu_job_free(job);
1604 	amdgpu_vm_invalidate_level(&vm->root);
1605 	return r;
1606 }
1607 
1608 /**
1609  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1610  *
1611  * @adev: amdgpu_device pointer
1612  * @exclusive: fence we need to sync to
1613  * @gtt_flags: flags as they are used for GTT
1614  * @pages_addr: DMA addresses to use for mapping
1615  * @vm: requested vm
1616  * @mapping: mapped range and flags to use for the update
1617  * @flags: HW flags for the mapping
1618  * @nodes: array of drm_mm_nodes with the MC addresses
1619  * @fence: optional resulting fence
1620  *
1621  * Split the mapping into smaller chunks so that each update fits
1622  * into a SDMA IB.
1623  * Returns 0 for success, -EINVAL for failure.
1624  */
1625 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1626 				      struct dma_fence *exclusive,
1627 				      uint64_t gtt_flags,
1628 				      dma_addr_t *pages_addr,
1629 				      struct amdgpu_vm *vm,
1630 				      struct amdgpu_bo_va_mapping *mapping,
1631 				      uint64_t flags,
1632 				      struct drm_mm_node *nodes,
1633 				      struct dma_fence **fence)
1634 {
1635 	uint64_t pfn, src = 0, start = mapping->start;
1636 	int r;
1637 
1638 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1639 	 * but in case of something, we filter the flags in first place
1640 	 */
1641 	if (!(mapping->flags & AMDGPU_PTE_READABLE))
1642 		flags &= ~AMDGPU_PTE_READABLE;
1643 	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1644 		flags &= ~AMDGPU_PTE_WRITEABLE;
1645 
1646 	flags &= ~AMDGPU_PTE_EXECUTABLE;
1647 	flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1648 
1649 	flags &= ~AMDGPU_PTE_MTYPE_MASK;
1650 	flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1651 
1652 	if ((mapping->flags & AMDGPU_PTE_PRT) &&
1653 	    (adev->asic_type >= CHIP_VEGA10)) {
1654 		flags |= AMDGPU_PTE_PRT;
1655 		flags &= ~AMDGPU_PTE_VALID;
1656 	}
1657 
1658 	trace_amdgpu_vm_bo_update(mapping);
1659 
1660 	pfn = mapping->offset >> PAGE_SHIFT;
1661 	if (nodes) {
1662 		while (pfn >= nodes->size) {
1663 			pfn -= nodes->size;
1664 			++nodes;
1665 		}
1666 	}
1667 
1668 	do {
1669 		uint64_t max_entries;
1670 		uint64_t addr, last;
1671 
1672 		if (nodes) {
1673 			addr = nodes->start << PAGE_SHIFT;
1674 			max_entries = (nodes->size - pfn) *
1675 				(PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1676 		} else {
1677 			addr = 0;
1678 			max_entries = S64_MAX;
1679 		}
1680 
1681 		if (pages_addr) {
1682 			if (flags == gtt_flags)
1683 				src = adev->gart.table_addr +
1684 					(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
1685 			else
1686 				max_entries = min(max_entries, 16ull * 1024ull);
1687 			addr = 0;
1688 		} else if (flags & AMDGPU_PTE_VALID) {
1689 			addr += adev->vm_manager.vram_base_offset;
1690 		}
1691 		addr += pfn << PAGE_SHIFT;
1692 
1693 		last = min((uint64_t)mapping->last, start + max_entries - 1);
1694 		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
1695 						src, pages_addr, vm,
1696 						start, last, flags, addr,
1697 						fence);
1698 		if (r)
1699 			return r;
1700 
1701 		pfn += last - start + 1;
1702 		if (nodes && nodes->size == pfn) {
1703 			pfn = 0;
1704 			++nodes;
1705 		}
1706 		start = last + 1;
1707 
1708 	} while (unlikely(start != mapping->last + 1));
1709 
1710 	return 0;
1711 }
1712 
1713 /**
1714  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1715  *
1716  * @adev: amdgpu_device pointer
1717  * @bo_va: requested BO and VM object
1718  * @clear: if true clear the entries
1719  *
1720  * Fill in the page table entries for @bo_va.
1721  * Returns 0 for success, -EINVAL for failure.
1722  */
1723 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1724 			struct amdgpu_bo_va *bo_va,
1725 			bool clear)
1726 {
1727 	struct amdgpu_bo *bo = bo_va->base.bo;
1728 	struct amdgpu_vm *vm = bo_va->base.vm;
1729 	struct amdgpu_bo_va_mapping *mapping;
1730 	dma_addr_t *pages_addr = NULL;
1731 	uint64_t gtt_flags, flags;
1732 	struct ttm_mem_reg *mem;
1733 	struct drm_mm_node *nodes;
1734 	struct dma_fence *exclusive;
1735 	int r;
1736 
1737 	if (clear || !bo_va->base.bo) {
1738 		mem = NULL;
1739 		nodes = NULL;
1740 		exclusive = NULL;
1741 	} else {
1742 		struct ttm_dma_tt *ttm;
1743 
1744 		mem = &bo_va->base.bo->tbo.mem;
1745 		nodes = mem->mm_node;
1746 		if (mem->mem_type == TTM_PL_TT) {
1747 			ttm = container_of(bo_va->base.bo->tbo.ttm,
1748 					   struct ttm_dma_tt, ttm);
1749 			pages_addr = ttm->dma_address;
1750 		}
1751 		exclusive = reservation_object_get_excl(bo->tbo.resv);
1752 	}
1753 
1754 	if (bo) {
1755 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1756 		gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
1757 			adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
1758 			flags : 0;
1759 	} else {
1760 		flags = 0x0;
1761 		gtt_flags = ~0x0;
1762 	}
1763 
1764 	spin_lock(&vm->status_lock);
1765 	if (!list_empty(&bo_va->base.vm_status))
1766 		list_splice_init(&bo_va->valids, &bo_va->invalids);
1767 	spin_unlock(&vm->status_lock);
1768 
1769 	list_for_each_entry(mapping, &bo_va->invalids, list) {
1770 		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
1771 					       gtt_flags, pages_addr, vm,
1772 					       mapping, flags, nodes,
1773 					       &bo_va->last_pt_update);
1774 		if (r)
1775 			return r;
1776 	}
1777 
1778 	if (trace_amdgpu_vm_bo_mapping_enabled()) {
1779 		list_for_each_entry(mapping, &bo_va->valids, list)
1780 			trace_amdgpu_vm_bo_mapping(mapping);
1781 
1782 		list_for_each_entry(mapping, &bo_va->invalids, list)
1783 			trace_amdgpu_vm_bo_mapping(mapping);
1784 	}
1785 
1786 	spin_lock(&vm->status_lock);
1787 	list_splice_init(&bo_va->invalids, &bo_va->valids);
1788 	list_del_init(&bo_va->base.vm_status);
1789 	if (clear)
1790 		list_add(&bo_va->base.vm_status, &vm->cleared);
1791 	spin_unlock(&vm->status_lock);
1792 
1793 	if (vm->use_cpu_for_update) {
1794 		/* Flush HDP */
1795 		mb();
1796 		amdgpu_gart_flush_gpu_tlb(adev, 0);
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 /**
1803  * amdgpu_vm_update_prt_state - update the global PRT state
1804  */
1805 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1806 {
1807 	unsigned long flags;
1808 	bool enable;
1809 
1810 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1811 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1812 	adev->gart.gart_funcs->set_prt(adev, enable);
1813 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1814 }
1815 
1816 /**
1817  * amdgpu_vm_prt_get - add a PRT user
1818  */
1819 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1820 {
1821 	if (!adev->gart.gart_funcs->set_prt)
1822 		return;
1823 
1824 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1825 		amdgpu_vm_update_prt_state(adev);
1826 }
1827 
1828 /**
1829  * amdgpu_vm_prt_put - drop a PRT user
1830  */
1831 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1832 {
1833 	if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1834 		amdgpu_vm_update_prt_state(adev);
1835 }
1836 
1837 /**
1838  * amdgpu_vm_prt_cb - callback for updating the PRT status
1839  */
1840 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1841 {
1842 	struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1843 
1844 	amdgpu_vm_prt_put(cb->adev);
1845 	kfree(cb);
1846 }
1847 
1848 /**
1849  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1850  */
1851 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1852 				 struct dma_fence *fence)
1853 {
1854 	struct amdgpu_prt_cb *cb;
1855 
1856 	if (!adev->gart.gart_funcs->set_prt)
1857 		return;
1858 
1859 	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1860 	if (!cb) {
1861 		/* Last resort when we are OOM */
1862 		if (fence)
1863 			dma_fence_wait(fence, false);
1864 
1865 		amdgpu_vm_prt_put(adev);
1866 	} else {
1867 		cb->adev = adev;
1868 		if (!fence || dma_fence_add_callback(fence, &cb->cb,
1869 						     amdgpu_vm_prt_cb))
1870 			amdgpu_vm_prt_cb(fence, &cb->cb);
1871 	}
1872 }
1873 
1874 /**
1875  * amdgpu_vm_free_mapping - free a mapping
1876  *
1877  * @adev: amdgpu_device pointer
1878  * @vm: requested vm
1879  * @mapping: mapping to be freed
1880  * @fence: fence of the unmap operation
1881  *
1882  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1883  */
1884 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1885 				   struct amdgpu_vm *vm,
1886 				   struct amdgpu_bo_va_mapping *mapping,
1887 				   struct dma_fence *fence)
1888 {
1889 	if (mapping->flags & AMDGPU_PTE_PRT)
1890 		amdgpu_vm_add_prt_cb(adev, fence);
1891 	kfree(mapping);
1892 }
1893 
1894 /**
1895  * amdgpu_vm_prt_fini - finish all prt mappings
1896  *
1897  * @adev: amdgpu_device pointer
1898  * @vm: requested vm
1899  *
1900  * Register a cleanup callback to disable PRT support after VM dies.
1901  */
1902 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1903 {
1904 	struct reservation_object *resv = vm->root.bo->tbo.resv;
1905 	struct dma_fence *excl, **shared;
1906 	unsigned i, shared_count;
1907 	int r;
1908 
1909 	r = reservation_object_get_fences_rcu(resv, &excl,
1910 					      &shared_count, &shared);
1911 	if (r) {
1912 		/* Not enough memory to grab the fence list, as last resort
1913 		 * block for all the fences to complete.
1914 		 */
1915 		reservation_object_wait_timeout_rcu(resv, true, false,
1916 						    MAX_SCHEDULE_TIMEOUT);
1917 		return;
1918 	}
1919 
1920 	/* Add a callback for each fence in the reservation object */
1921 	amdgpu_vm_prt_get(adev);
1922 	amdgpu_vm_add_prt_cb(adev, excl);
1923 
1924 	for (i = 0; i < shared_count; ++i) {
1925 		amdgpu_vm_prt_get(adev);
1926 		amdgpu_vm_add_prt_cb(adev, shared[i]);
1927 	}
1928 
1929 	kfree(shared);
1930 }
1931 
1932 /**
1933  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1934  *
1935  * @adev: amdgpu_device pointer
1936  * @vm: requested vm
1937  * @fence: optional resulting fence (unchanged if no work needed to be done
1938  * or if an error occurred)
1939  *
1940  * Make sure all freed BOs are cleared in the PT.
1941  * Returns 0 for success.
1942  *
1943  * PTs have to be reserved and mutex must be locked!
1944  */
1945 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1946 			  struct amdgpu_vm *vm,
1947 			  struct dma_fence **fence)
1948 {
1949 	struct amdgpu_bo_va_mapping *mapping;
1950 	struct dma_fence *f = NULL;
1951 	int r;
1952 	uint64_t init_pte_value = 0;
1953 
1954 	while (!list_empty(&vm->freed)) {
1955 		mapping = list_first_entry(&vm->freed,
1956 			struct amdgpu_bo_va_mapping, list);
1957 		list_del(&mapping->list);
1958 
1959 		if (vm->pte_support_ats)
1960 			init_pte_value = AMDGPU_PTE_SYSTEM;
1961 
1962 		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
1963 						mapping->start, mapping->last,
1964 						init_pte_value, 0, &f);
1965 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
1966 		if (r) {
1967 			dma_fence_put(f);
1968 			return r;
1969 		}
1970 	}
1971 
1972 	if (fence && f) {
1973 		dma_fence_put(*fence);
1974 		*fence = f;
1975 	} else {
1976 		dma_fence_put(f);
1977 	}
1978 
1979 	return 0;
1980 
1981 }
1982 
1983 /**
1984  * amdgpu_vm_clear_moved - clear moved BOs in the PT
1985  *
1986  * @adev: amdgpu_device pointer
1987  * @vm: requested vm
1988  *
1989  * Make sure all moved BOs are cleared in the PT.
1990  * Returns 0 for success.
1991  *
1992  * PTs have to be reserved and mutex must be locked!
1993  */
1994 int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1995 			    struct amdgpu_sync *sync)
1996 {
1997 	struct amdgpu_bo_va *bo_va = NULL;
1998 	int r = 0;
1999 
2000 	spin_lock(&vm->status_lock);
2001 	while (!list_empty(&vm->moved)) {
2002 		bo_va = list_first_entry(&vm->moved,
2003 			struct amdgpu_bo_va, base.vm_status);
2004 		spin_unlock(&vm->status_lock);
2005 
2006 		r = amdgpu_vm_bo_update(adev, bo_va, true);
2007 		if (r)
2008 			return r;
2009 
2010 		spin_lock(&vm->status_lock);
2011 	}
2012 	spin_unlock(&vm->status_lock);
2013 
2014 	if (bo_va)
2015 		r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
2016 
2017 	return r;
2018 }
2019 
2020 /**
2021  * amdgpu_vm_bo_add - add a bo to a specific vm
2022  *
2023  * @adev: amdgpu_device pointer
2024  * @vm: requested vm
2025  * @bo: amdgpu buffer object
2026  *
2027  * Add @bo into the requested vm.
2028  * Add @bo to the list of bos associated with the vm
2029  * Returns newly added bo_va or NULL for failure
2030  *
2031  * Object has to be reserved!
2032  */
2033 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
2034 				      struct amdgpu_vm *vm,
2035 				      struct amdgpu_bo *bo)
2036 {
2037 	struct amdgpu_bo_va *bo_va;
2038 
2039 	bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
2040 	if (bo_va == NULL) {
2041 		return NULL;
2042 	}
2043 	bo_va->base.vm = vm;
2044 	bo_va->base.bo = bo;
2045 	INIT_LIST_HEAD(&bo_va->base.bo_list);
2046 	INIT_LIST_HEAD(&bo_va->base.vm_status);
2047 
2048 	bo_va->ref_count = 1;
2049 	INIT_LIST_HEAD(&bo_va->valids);
2050 	INIT_LIST_HEAD(&bo_va->invalids);
2051 
2052 	if (bo)
2053 		list_add_tail(&bo_va->base.bo_list, &bo->va);
2054 
2055 	return bo_va;
2056 }
2057 
2058 /**
2059  * amdgpu_vm_bo_map - map bo inside a vm
2060  *
2061  * @adev: amdgpu_device pointer
2062  * @bo_va: bo_va to store the address
2063  * @saddr: where to map the BO
2064  * @offset: requested offset in the BO
2065  * @flags: attributes of pages (read/write/valid/etc.)
2066  *
2067  * Add a mapping of the BO at the specefied addr into the VM.
2068  * Returns 0 for success, error for failure.
2069  *
2070  * Object has to be reserved and unreserved outside!
2071  */
2072 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2073 		     struct amdgpu_bo_va *bo_va,
2074 		     uint64_t saddr, uint64_t offset,
2075 		     uint64_t size, uint64_t flags)
2076 {
2077 	struct amdgpu_bo_va_mapping *mapping, *tmp;
2078 	struct amdgpu_bo *bo = bo_va->base.bo;
2079 	struct amdgpu_vm *vm = bo_va->base.vm;
2080 	uint64_t eaddr;
2081 
2082 	/* validate the parameters */
2083 	if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
2084 	    size == 0 || size & AMDGPU_GPU_PAGE_MASK)
2085 		return -EINVAL;
2086 
2087 	/* make sure object fit at this offset */
2088 	eaddr = saddr + size - 1;
2089 	if (saddr >= eaddr ||
2090 	    (bo && offset + size > amdgpu_bo_size(bo)))
2091 		return -EINVAL;
2092 
2093 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2094 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
2095 
2096 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2097 	if (tmp) {
2098 		/* bo and tmp overlap, invalid addr */
2099 		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
2100 			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
2101 			tmp->start, tmp->last + 1);
2102 		return -EINVAL;
2103 	}
2104 
2105 	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
2106 	if (!mapping)
2107 		return -ENOMEM;
2108 
2109 	INIT_LIST_HEAD(&mapping->list);
2110 	mapping->start = saddr;
2111 	mapping->last = eaddr;
2112 	mapping->offset = offset;
2113 	mapping->flags = flags;
2114 
2115 	list_add(&mapping->list, &bo_va->invalids);
2116 	amdgpu_vm_it_insert(mapping, &vm->va);
2117 
2118 	if (flags & AMDGPU_PTE_PRT)
2119 		amdgpu_vm_prt_get(adev);
2120 
2121 	return 0;
2122 }
2123 
2124 /**
2125  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
2126  *
2127  * @adev: amdgpu_device pointer
2128  * @bo_va: bo_va to store the address
2129  * @saddr: where to map the BO
2130  * @offset: requested offset in the BO
2131  * @flags: attributes of pages (read/write/valid/etc.)
2132  *
2133  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
2134  * mappings as we do so.
2135  * Returns 0 for success, error for failure.
2136  *
2137  * Object has to be reserved and unreserved outside!
2138  */
2139 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
2140 			     struct amdgpu_bo_va *bo_va,
2141 			     uint64_t saddr, uint64_t offset,
2142 			     uint64_t size, uint64_t flags)
2143 {
2144 	struct amdgpu_bo_va_mapping *mapping;
2145 	struct amdgpu_bo *bo = bo_va->base.bo;
2146 	struct amdgpu_vm *vm = bo_va->base.vm;
2147 	uint64_t eaddr;
2148 	int r;
2149 
2150 	/* validate the parameters */
2151 	if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
2152 	    size == 0 || size & AMDGPU_GPU_PAGE_MASK)
2153 		return -EINVAL;
2154 
2155 	/* make sure object fit at this offset */
2156 	eaddr = saddr + size - 1;
2157 	if (saddr >= eaddr ||
2158 	    (bo && offset + size > amdgpu_bo_size(bo)))
2159 		return -EINVAL;
2160 
2161 	/* Allocate all the needed memory */
2162 	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
2163 	if (!mapping)
2164 		return -ENOMEM;
2165 
2166 	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
2167 	if (r) {
2168 		kfree(mapping);
2169 		return r;
2170 	}
2171 
2172 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2173 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
2174 
2175 	mapping->start = saddr;
2176 	mapping->last = eaddr;
2177 	mapping->offset = offset;
2178 	mapping->flags = flags;
2179 
2180 	list_add(&mapping->list, &bo_va->invalids);
2181 	amdgpu_vm_it_insert(mapping, &vm->va);
2182 
2183 	if (flags & AMDGPU_PTE_PRT)
2184 		amdgpu_vm_prt_get(adev);
2185 
2186 	return 0;
2187 }
2188 
2189 /**
2190  * amdgpu_vm_bo_unmap - remove bo mapping from vm
2191  *
2192  * @adev: amdgpu_device pointer
2193  * @bo_va: bo_va to remove the address from
2194  * @saddr: where to the BO is mapped
2195  *
2196  * Remove a mapping of the BO at the specefied addr from the VM.
2197  * Returns 0 for success, error for failure.
2198  *
2199  * Object has to be reserved and unreserved outside!
2200  */
2201 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2202 		       struct amdgpu_bo_va *bo_va,
2203 		       uint64_t saddr)
2204 {
2205 	struct amdgpu_bo_va_mapping *mapping;
2206 	struct amdgpu_vm *vm = bo_va->base.vm;
2207 	bool valid = true;
2208 
2209 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2210 
2211 	list_for_each_entry(mapping, &bo_va->valids, list) {
2212 		if (mapping->start == saddr)
2213 			break;
2214 	}
2215 
2216 	if (&mapping->list == &bo_va->valids) {
2217 		valid = false;
2218 
2219 		list_for_each_entry(mapping, &bo_va->invalids, list) {
2220 			if (mapping->start == saddr)
2221 				break;
2222 		}
2223 
2224 		if (&mapping->list == &bo_va->invalids)
2225 			return -ENOENT;
2226 	}
2227 
2228 	list_del(&mapping->list);
2229 	amdgpu_vm_it_remove(mapping, &vm->va);
2230 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2231 
2232 	if (valid)
2233 		list_add(&mapping->list, &vm->freed);
2234 	else
2235 		amdgpu_vm_free_mapping(adev, vm, mapping,
2236 				       bo_va->last_pt_update);
2237 
2238 	return 0;
2239 }
2240 
2241 /**
2242  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2243  *
2244  * @adev: amdgpu_device pointer
2245  * @vm: VM structure to use
2246  * @saddr: start of the range
2247  * @size: size of the range
2248  *
2249  * Remove all mappings in a range, split them as appropriate.
2250  * Returns 0 for success, error for failure.
2251  */
2252 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2253 				struct amdgpu_vm *vm,
2254 				uint64_t saddr, uint64_t size)
2255 {
2256 	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2257 	LIST_HEAD(removed);
2258 	uint64_t eaddr;
2259 
2260 	eaddr = saddr + size - 1;
2261 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2262 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
2263 
2264 	/* Allocate all the needed memory */
2265 	before = kzalloc(sizeof(*before), GFP_KERNEL);
2266 	if (!before)
2267 		return -ENOMEM;
2268 	INIT_LIST_HEAD(&before->list);
2269 
2270 	after = kzalloc(sizeof(*after), GFP_KERNEL);
2271 	if (!after) {
2272 		kfree(before);
2273 		return -ENOMEM;
2274 	}
2275 	INIT_LIST_HEAD(&after->list);
2276 
2277 	/* Now gather all removed mappings */
2278 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2279 	while (tmp) {
2280 		/* Remember mapping split at the start */
2281 		if (tmp->start < saddr) {
2282 			before->start = tmp->start;
2283 			before->last = saddr - 1;
2284 			before->offset = tmp->offset;
2285 			before->flags = tmp->flags;
2286 			list_add(&before->list, &tmp->list);
2287 		}
2288 
2289 		/* Remember mapping split at the end */
2290 		if (tmp->last > eaddr) {
2291 			after->start = eaddr + 1;
2292 			after->last = tmp->last;
2293 			after->offset = tmp->offset;
2294 			after->offset += after->start - tmp->start;
2295 			after->flags = tmp->flags;
2296 			list_add(&after->list, &tmp->list);
2297 		}
2298 
2299 		list_del(&tmp->list);
2300 		list_add(&tmp->list, &removed);
2301 
2302 		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2303 	}
2304 
2305 	/* And free them up */
2306 	list_for_each_entry_safe(tmp, next, &removed, list) {
2307 		amdgpu_vm_it_remove(tmp, &vm->va);
2308 		list_del(&tmp->list);
2309 
2310 		if (tmp->start < saddr)
2311 		    tmp->start = saddr;
2312 		if (tmp->last > eaddr)
2313 		    tmp->last = eaddr;
2314 
2315 		list_add(&tmp->list, &vm->freed);
2316 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
2317 	}
2318 
2319 	/* Insert partial mapping before the range */
2320 	if (!list_empty(&before->list)) {
2321 		amdgpu_vm_it_insert(before, &vm->va);
2322 		if (before->flags & AMDGPU_PTE_PRT)
2323 			amdgpu_vm_prt_get(adev);
2324 	} else {
2325 		kfree(before);
2326 	}
2327 
2328 	/* Insert partial mapping after the range */
2329 	if (!list_empty(&after->list)) {
2330 		amdgpu_vm_it_insert(after, &vm->va);
2331 		if (after->flags & AMDGPU_PTE_PRT)
2332 			amdgpu_vm_prt_get(adev);
2333 	} else {
2334 		kfree(after);
2335 	}
2336 
2337 	return 0;
2338 }
2339 
2340 /**
2341  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2342  *
2343  * @adev: amdgpu_device pointer
2344  * @bo_va: requested bo_va
2345  *
2346  * Remove @bo_va->bo from the requested vm.
2347  *
2348  * Object have to be reserved!
2349  */
2350 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2351 		      struct amdgpu_bo_va *bo_va)
2352 {
2353 	struct amdgpu_bo_va_mapping *mapping, *next;
2354 	struct amdgpu_vm *vm = bo_va->base.vm;
2355 
2356 	list_del(&bo_va->base.bo_list);
2357 
2358 	spin_lock(&vm->status_lock);
2359 	list_del(&bo_va->base.vm_status);
2360 	spin_unlock(&vm->status_lock);
2361 
2362 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2363 		list_del(&mapping->list);
2364 		amdgpu_vm_it_remove(mapping, &vm->va);
2365 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2366 		list_add(&mapping->list, &vm->freed);
2367 	}
2368 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2369 		list_del(&mapping->list);
2370 		amdgpu_vm_it_remove(mapping, &vm->va);
2371 		amdgpu_vm_free_mapping(adev, vm, mapping,
2372 				       bo_va->last_pt_update);
2373 	}
2374 
2375 	dma_fence_put(bo_va->last_pt_update);
2376 	kfree(bo_va);
2377 }
2378 
2379 /**
2380  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2381  *
2382  * @adev: amdgpu_device pointer
2383  * @vm: requested vm
2384  * @bo: amdgpu buffer object
2385  *
2386  * Mark @bo as invalid.
2387  */
2388 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2389 			     struct amdgpu_bo *bo)
2390 {
2391 	struct amdgpu_vm_bo_base *bo_base;
2392 
2393 	list_for_each_entry(bo_base, &bo->va, bo_list) {
2394 		spin_lock(&bo_base->vm->status_lock);
2395 		if (list_empty(&bo_base->vm_status))
2396 			list_add(&bo_base->vm_status,
2397 				 &bo_base->vm->moved);
2398 		spin_unlock(&bo_base->vm->status_lock);
2399 	}
2400 }
2401 
2402 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2403 {
2404 	/* Total bits covered by PD + PTs */
2405 	unsigned bits = ilog2(vm_size) + 18;
2406 
2407 	/* Make sure the PD is 4K in size up to 8GB address space.
2408 	   Above that split equal between PD and PTs */
2409 	if (vm_size <= 8)
2410 		return (bits - 9);
2411 	else
2412 		return ((bits + 3) / 2);
2413 }
2414 
2415 /**
2416  * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
2417  *
2418  * @adev: amdgpu_device pointer
2419  * @fragment_size_default: the default fragment size if it's set auto
2420  */
2421 void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
2422 {
2423 	if (amdgpu_vm_fragment_size == -1)
2424 		adev->vm_manager.fragment_size = fragment_size_default;
2425 	else
2426 		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2427 }
2428 
2429 /**
2430  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2431  *
2432  * @adev: amdgpu_device pointer
2433  * @vm_size: the default vm size if it's set auto
2434  */
2435 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
2436 {
2437 	/* adjust vm size firstly */
2438 	if (amdgpu_vm_size == -1)
2439 		adev->vm_manager.vm_size = vm_size;
2440 	else
2441 		adev->vm_manager.vm_size = amdgpu_vm_size;
2442 
2443 	/* block size depends on vm size */
2444 	if (amdgpu_vm_block_size == -1)
2445 		adev->vm_manager.block_size =
2446 			amdgpu_vm_get_block_size(adev->vm_manager.vm_size);
2447 	else
2448 		adev->vm_manager.block_size = amdgpu_vm_block_size;
2449 
2450 	amdgpu_vm_set_fragment_size(adev, fragment_size_default);
2451 
2452 	DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
2453 		adev->vm_manager.vm_size, adev->vm_manager.block_size,
2454 		adev->vm_manager.fragment_size);
2455 }
2456 
2457 /**
2458  * amdgpu_vm_init - initialize a vm instance
2459  *
2460  * @adev: amdgpu_device pointer
2461  * @vm: requested vm
2462  * @vm_context: Indicates if it GFX or Compute context
2463  *
2464  * Init @vm fields.
2465  */
2466 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2467 		   int vm_context)
2468 {
2469 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2470 		AMDGPU_VM_PTE_COUNT(adev) * 8);
2471 	unsigned ring_instance;
2472 	struct amdgpu_ring *ring;
2473 	struct amd_sched_rq *rq;
2474 	int r, i;
2475 	u64 flags;
2476 	uint64_t init_pde_value = 0;
2477 
2478 	vm->va = RB_ROOT_CACHED;
2479 	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
2480 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2481 		vm->reserved_vmid[i] = NULL;
2482 	spin_lock_init(&vm->status_lock);
2483 	INIT_LIST_HEAD(&vm->moved);
2484 	INIT_LIST_HEAD(&vm->cleared);
2485 	INIT_LIST_HEAD(&vm->freed);
2486 
2487 	/* create scheduler entity for page table updates */
2488 
2489 	ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2490 	ring_instance %= adev->vm_manager.vm_pte_num_rings;
2491 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
2492 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
2493 	r = amd_sched_entity_init(&ring->sched, &vm->entity,
2494 				  rq, amdgpu_sched_jobs);
2495 	if (r)
2496 		return r;
2497 
2498 	vm->pte_support_ats = false;
2499 
2500 	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2501 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2502 						AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2503 
2504 		if (adev->asic_type == CHIP_RAVEN) {
2505 			vm->pte_support_ats = true;
2506 			init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
2507 		}
2508 	} else
2509 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2510 						AMDGPU_VM_USE_CPU_FOR_GFX);
2511 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
2512 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
2513 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2514 		  "CPU update of VM recommended only for large BAR system\n");
2515 	vm->last_dir_update = NULL;
2516 
2517 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
2518 			AMDGPU_GEM_CREATE_VRAM_CLEARED;
2519 	if (vm->use_cpu_for_update)
2520 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2521 	else
2522 		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
2523 				AMDGPU_GEM_CREATE_SHADOW);
2524 
2525 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
2526 			     AMDGPU_GEM_DOMAIN_VRAM,
2527 			     flags,
2528 			     NULL, NULL, init_pde_value, &vm->root.bo);
2529 	if (r)
2530 		goto error_free_sched_entity;
2531 
2532 	r = amdgpu_bo_reserve(vm->root.bo, false);
2533 	if (r)
2534 		goto error_free_root;
2535 
2536 	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
2537 
2538 	if (vm->use_cpu_for_update) {
2539 		r = amdgpu_bo_kmap(vm->root.bo, NULL);
2540 		if (r)
2541 			goto error_free_root;
2542 	}
2543 
2544 	amdgpu_bo_unreserve(vm->root.bo);
2545 
2546 	return 0;
2547 
2548 error_free_root:
2549 	amdgpu_bo_unref(&vm->root.bo->shadow);
2550 	amdgpu_bo_unref(&vm->root.bo);
2551 	vm->root.bo = NULL;
2552 
2553 error_free_sched_entity:
2554 	amd_sched_entity_fini(&ring->sched, &vm->entity);
2555 
2556 	return r;
2557 }
2558 
2559 /**
2560  * amdgpu_vm_free_levels - free PD/PT levels
2561  *
2562  * @level: PD/PT starting level to free
2563  *
2564  * Free the page directory or page table level and all sub levels.
2565  */
2566 static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
2567 {
2568 	unsigned i;
2569 
2570 	if (level->bo) {
2571 		amdgpu_bo_unref(&level->bo->shadow);
2572 		amdgpu_bo_unref(&level->bo);
2573 	}
2574 
2575 	if (level->entries)
2576 		for (i = 0; i <= level->last_entry_used; i++)
2577 			amdgpu_vm_free_levels(&level->entries[i]);
2578 
2579 	kvfree(level->entries);
2580 }
2581 
2582 /**
2583  * amdgpu_vm_fini - tear down a vm instance
2584  *
2585  * @adev: amdgpu_device pointer
2586  * @vm: requested vm
2587  *
2588  * Tear down @vm.
2589  * Unbind the VM and remove all bos from the vm bo list
2590  */
2591 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2592 {
2593 	struct amdgpu_bo_va_mapping *mapping, *tmp;
2594 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
2595 	int i;
2596 
2597 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
2598 
2599 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2600 		dev_err(adev->dev, "still active bo inside vm\n");
2601 	}
2602 	rbtree_postorder_for_each_entry_safe(mapping, tmp,
2603 					     &vm->va.rb_root, rb) {
2604 		list_del(&mapping->list);
2605 		amdgpu_vm_it_remove(mapping, &vm->va);
2606 		kfree(mapping);
2607 	}
2608 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2609 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2610 			amdgpu_vm_prt_fini(adev, vm);
2611 			prt_fini_needed = false;
2612 		}
2613 
2614 		list_del(&mapping->list);
2615 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2616 	}
2617 
2618 	amdgpu_vm_free_levels(&vm->root);
2619 	dma_fence_put(vm->last_dir_update);
2620 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2621 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
2622 }
2623 
2624 /**
2625  * amdgpu_vm_manager_init - init the VM manager
2626  *
2627  * @adev: amdgpu_device pointer
2628  *
2629  * Initialize the VM manager structures
2630  */
2631 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2632 {
2633 	unsigned i, j;
2634 
2635 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
2636 		struct amdgpu_vm_id_manager *id_mgr =
2637 			&adev->vm_manager.id_mgr[i];
2638 
2639 		mutex_init(&id_mgr->lock);
2640 		INIT_LIST_HEAD(&id_mgr->ids_lru);
2641 		atomic_set(&id_mgr->reserved_vmid_num, 0);
2642 
2643 		/* skip over VMID 0, since it is the system VM */
2644 		for (j = 1; j < id_mgr->num_ids; ++j) {
2645 			amdgpu_vm_reset_id(adev, i, j);
2646 			amdgpu_sync_create(&id_mgr->ids[i].active);
2647 			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
2648 		}
2649 	}
2650 
2651 	adev->vm_manager.fence_context =
2652 		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2653 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2654 		adev->vm_manager.seqno[i] = 0;
2655 
2656 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2657 	atomic64_set(&adev->vm_manager.client_counter, 0);
2658 	spin_lock_init(&adev->vm_manager.prt_lock);
2659 	atomic_set(&adev->vm_manager.num_prt_users, 0);
2660 
2661 	/* If not overridden by the user, by default, only in large BAR systems
2662 	 * Compute VM tables will be updated by CPU
2663 	 */
2664 #ifdef CONFIG_X86_64
2665 	if (amdgpu_vm_update_mode == -1) {
2666 		if (amdgpu_vm_is_large_bar(adev))
2667 			adev->vm_manager.vm_update_mode =
2668 				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2669 		else
2670 			adev->vm_manager.vm_update_mode = 0;
2671 	} else
2672 		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2673 #else
2674 	adev->vm_manager.vm_update_mode = 0;
2675 #endif
2676 
2677 }
2678 
2679 /**
2680  * amdgpu_vm_manager_fini - cleanup VM manager
2681  *
2682  * @adev: amdgpu_device pointer
2683  *
2684  * Cleanup the VM manager and free resources.
2685  */
2686 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2687 {
2688 	unsigned i, j;
2689 
2690 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
2691 		struct amdgpu_vm_id_manager *id_mgr =
2692 			&adev->vm_manager.id_mgr[i];
2693 
2694 		mutex_destroy(&id_mgr->lock);
2695 		for (j = 0; j < AMDGPU_NUM_VM; ++j) {
2696 			struct amdgpu_vm_id *id = &id_mgr->ids[j];
2697 
2698 			amdgpu_sync_free(&id->active);
2699 			dma_fence_put(id->flushed_updates);
2700 			dma_fence_put(id->last_flush);
2701 		}
2702 	}
2703 }
2704 
2705 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2706 {
2707 	union drm_amdgpu_vm *args = data;
2708 	struct amdgpu_device *adev = dev->dev_private;
2709 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
2710 	int r;
2711 
2712 	switch (args->in.op) {
2713 	case AMDGPU_VM_OP_RESERVE_VMID:
2714 		/* current, we only have requirement to reserve vmid from gfxhub */
2715 		r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
2716 						  AMDGPU_GFXHUB);
2717 		if (r)
2718 			return r;
2719 		break;
2720 	case AMDGPU_VM_OP_UNRESERVE_VMID:
2721 		amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
2722 		break;
2723 	default:
2724 		return -EINVAL;
2725 	}
2726 
2727 	return 0;
2728 }
2729