1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30  *    Dave Airlie
31  */
32 #include <drm/ttm/ttm_bo_api.h>
33 #include <drm/ttm/ttm_bo_driver.h>
34 #include <drm/ttm/ttm_placement.h>
35 #include <drm/ttm/ttm_module.h>
36 #include <drm/ttm/ttm_page_alloc.h>
37 #include <drm/drmP.h>
38 #include <drm/amdgpu_drm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swiotlb.h>
42 #include <linux/swap.h>
43 #include <linux/pagemap.h>
44 #include <linux/debugfs.h>
45 #include <linux/iommu.h>
46 #include "amdgpu.h"
47 #include "amdgpu_object.h"
48 #include "amdgpu_trace.h"
49 #include "bif/bif_4_1_d.h"
50 
51 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
52 
53 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
54 			     struct ttm_mem_reg *mem, unsigned num_pages,
55 			     uint64_t offset, unsigned window,
56 			     struct amdgpu_ring *ring,
57 			     uint64_t *addr);
58 
59 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
60 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
61 
62 /*
63  * Global memory.
64  */
65 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
66 {
67 	return ttm_mem_global_init(ref->object);
68 }
69 
70 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
71 {
72 	ttm_mem_global_release(ref->object);
73 }
74 
75 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
76 {
77 	struct drm_global_reference *global_ref;
78 	struct amdgpu_ring *ring;
79 	struct amd_sched_rq *rq;
80 	int r;
81 
82 	adev->mman.mem_global_referenced = false;
83 	global_ref = &adev->mman.mem_global_ref;
84 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
85 	global_ref->size = sizeof(struct ttm_mem_global);
86 	global_ref->init = &amdgpu_ttm_mem_global_init;
87 	global_ref->release = &amdgpu_ttm_mem_global_release;
88 	r = drm_global_item_ref(global_ref);
89 	if (r) {
90 		DRM_ERROR("Failed setting up TTM memory accounting "
91 			  "subsystem.\n");
92 		goto error_mem;
93 	}
94 
95 	adev->mman.bo_global_ref.mem_glob =
96 		adev->mman.mem_global_ref.object;
97 	global_ref = &adev->mman.bo_global_ref.ref;
98 	global_ref->global_type = DRM_GLOBAL_TTM_BO;
99 	global_ref->size = sizeof(struct ttm_bo_global);
100 	global_ref->init = &ttm_bo_global_init;
101 	global_ref->release = &ttm_bo_global_release;
102 	r = drm_global_item_ref(global_ref);
103 	if (r) {
104 		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
105 		goto error_bo;
106 	}
107 
108 	mutex_init(&adev->mman.gtt_window_lock);
109 
110 	ring = adev->mman.buffer_funcs_ring;
111 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
112 	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
113 				  rq, amdgpu_sched_jobs);
114 	if (r) {
115 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
116 		goto error_entity;
117 	}
118 
119 	adev->mman.mem_global_referenced = true;
120 
121 	return 0;
122 
123 error_entity:
124 	drm_global_item_unref(&adev->mman.bo_global_ref.ref);
125 error_bo:
126 	drm_global_item_unref(&adev->mman.mem_global_ref);
127 error_mem:
128 	return r;
129 }
130 
131 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
132 {
133 	if (adev->mman.mem_global_referenced) {
134 		amd_sched_entity_fini(adev->mman.entity.sched,
135 				      &adev->mman.entity);
136 		mutex_destroy(&adev->mman.gtt_window_lock);
137 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
138 		drm_global_item_unref(&adev->mman.mem_global_ref);
139 		adev->mman.mem_global_referenced = false;
140 	}
141 }
142 
143 static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
144 {
145 	return 0;
146 }
147 
148 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
149 				struct ttm_mem_type_manager *man)
150 {
151 	struct amdgpu_device *adev;
152 
153 	adev = amdgpu_ttm_adev(bdev);
154 
155 	switch (type) {
156 	case TTM_PL_SYSTEM:
157 		/* System memory */
158 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
159 		man->available_caching = TTM_PL_MASK_CACHING;
160 		man->default_caching = TTM_PL_FLAG_CACHED;
161 		break;
162 	case TTM_PL_TT:
163 		man->func = &amdgpu_gtt_mgr_func;
164 		man->gpu_offset = adev->mc.gart_start;
165 		man->available_caching = TTM_PL_MASK_CACHING;
166 		man->default_caching = TTM_PL_FLAG_CACHED;
167 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
168 		break;
169 	case TTM_PL_VRAM:
170 		/* "On-card" video ram */
171 		man->func = &amdgpu_vram_mgr_func;
172 		man->gpu_offset = adev->mc.vram_start;
173 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
174 			     TTM_MEMTYPE_FLAG_MAPPABLE;
175 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
176 		man->default_caching = TTM_PL_FLAG_WC;
177 		break;
178 	case AMDGPU_PL_GDS:
179 	case AMDGPU_PL_GWS:
180 	case AMDGPU_PL_OA:
181 		/* On-chip GDS memory*/
182 		man->func = &ttm_bo_manager_func;
183 		man->gpu_offset = 0;
184 		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
185 		man->available_caching = TTM_PL_FLAG_UNCACHED;
186 		man->default_caching = TTM_PL_FLAG_UNCACHED;
187 		break;
188 	default:
189 		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
190 		return -EINVAL;
191 	}
192 	return 0;
193 }
194 
195 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
196 				struct ttm_placement *placement)
197 {
198 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
199 	struct amdgpu_bo *abo;
200 	static const struct ttm_place placements = {
201 		.fpfn = 0,
202 		.lpfn = 0,
203 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
204 	};
205 
206 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
207 		placement->placement = &placements;
208 		placement->busy_placement = &placements;
209 		placement->num_placement = 1;
210 		placement->num_busy_placement = 1;
211 		return;
212 	}
213 	abo = ttm_to_amdgpu_bo(bo);
214 	switch (bo->mem.mem_type) {
215 	case TTM_PL_VRAM:
216 		if (adev->mman.buffer_funcs &&
217 		    adev->mman.buffer_funcs_ring &&
218 		    adev->mman.buffer_funcs_ring->ready == false) {
219 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
220 		} else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
221 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
222 			unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
223 			struct drm_mm_node *node = bo->mem.mm_node;
224 			unsigned long pages_left;
225 
226 			for (pages_left = bo->mem.num_pages;
227 			     pages_left;
228 			     pages_left -= node->size, node++) {
229 				if (node->start < fpfn)
230 					break;
231 			}
232 
233 			if (!pages_left)
234 				goto gtt;
235 
236 			/* Try evicting to the CPU inaccessible part of VRAM
237 			 * first, but only set GTT as busy placement, so this
238 			 * BO will be evicted to GTT rather than causing other
239 			 * BOs to be evicted from VRAM
240 			 */
241 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
242 							 AMDGPU_GEM_DOMAIN_GTT);
243 			abo->placements[0].fpfn = fpfn;
244 			abo->placements[0].lpfn = 0;
245 			abo->placement.busy_placement = &abo->placements[1];
246 			abo->placement.num_busy_placement = 1;
247 		} else {
248 gtt:
249 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
250 		}
251 		break;
252 	case TTM_PL_TT:
253 	default:
254 		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
255 	}
256 	*placement = abo->placement;
257 }
258 
259 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
260 {
261 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
262 
263 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
264 		return -EPERM;
265 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
266 					  filp->private_data);
267 }
268 
269 static void amdgpu_move_null(struct ttm_buffer_object *bo,
270 			     struct ttm_mem_reg *new_mem)
271 {
272 	struct ttm_mem_reg *old_mem = &bo->mem;
273 
274 	BUG_ON(old_mem->mm_node != NULL);
275 	*old_mem = *new_mem;
276 	new_mem->mm_node = NULL;
277 }
278 
279 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
280 				    struct drm_mm_node *mm_node,
281 				    struct ttm_mem_reg *mem)
282 {
283 	uint64_t addr = 0;
284 
285 	if (mem->mem_type != TTM_PL_TT ||
286 	    amdgpu_gtt_mgr_is_allocated(mem)) {
287 		addr = mm_node->start << PAGE_SHIFT;
288 		addr += bo->bdev->man[mem->mem_type].gpu_offset;
289 	}
290 	return addr;
291 }
292 
293 /**
294  * amdgpu_find_mm_node - Helper function finds the drm_mm_node
295  *  corresponding to @offset. It also modifies the offset to be
296  *  within the drm_mm_node returned
297  */
298 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
299 					       unsigned long *offset)
300 {
301 	struct drm_mm_node *mm_node = mem->mm_node;
302 
303 	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
304 		*offset -= (mm_node->size << PAGE_SHIFT);
305 		++mm_node;
306 	}
307 	return mm_node;
308 }
309 
310 /**
311  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
312  *
313  * The function copies @size bytes from {src->mem + src->offset} to
314  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
315  * move and different for a BO to BO copy.
316  *
317  * @f: Returns the last fence if multiple jobs are submitted.
318  */
319 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
320 			       struct amdgpu_copy_mem *src,
321 			       struct amdgpu_copy_mem *dst,
322 			       uint64_t size,
323 			       struct reservation_object *resv,
324 			       struct dma_fence **f)
325 {
326 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
327 	struct drm_mm_node *src_mm, *dst_mm;
328 	uint64_t src_node_start, dst_node_start, src_node_size,
329 		 dst_node_size, src_page_offset, dst_page_offset;
330 	struct dma_fence *fence = NULL;
331 	int r = 0;
332 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
333 					AMDGPU_GPU_PAGE_SIZE);
334 
335 	if (!ring->ready) {
336 		DRM_ERROR("Trying to move memory with ring turned off.\n");
337 		return -EINVAL;
338 	}
339 
340 	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
341 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
342 					     src->offset;
343 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
344 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
345 
346 	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
347 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
348 					     dst->offset;
349 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
350 	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
351 
352 	mutex_lock(&adev->mman.gtt_window_lock);
353 
354 	while (size) {
355 		unsigned long cur_size;
356 		uint64_t from = src_node_start, to = dst_node_start;
357 		struct dma_fence *next;
358 
359 		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
360 		 * begins at an offset, then adjust the size accordingly
361 		 */
362 		cur_size = min3(min(src_node_size, dst_node_size), size,
363 				GTT_MAX_BYTES);
364 		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
365 		    cur_size + dst_page_offset > GTT_MAX_BYTES)
366 			cur_size -= max(src_page_offset, dst_page_offset);
367 
368 		/* Map only what needs to be accessed. Map src to window 0 and
369 		 * dst to window 1
370 		 */
371 		if (src->mem->mem_type == TTM_PL_TT &&
372 		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
373 			r = amdgpu_map_buffer(src->bo, src->mem,
374 					PFN_UP(cur_size + src_page_offset),
375 					src_node_start, 0, ring,
376 					&from);
377 			if (r)
378 				goto error;
379 			/* Adjust the offset because amdgpu_map_buffer returns
380 			 * start of mapped page
381 			 */
382 			from += src_page_offset;
383 		}
384 
385 		if (dst->mem->mem_type == TTM_PL_TT &&
386 		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
387 			r = amdgpu_map_buffer(dst->bo, dst->mem,
388 					PFN_UP(cur_size + dst_page_offset),
389 					dst_node_start, 1, ring,
390 					&to);
391 			if (r)
392 				goto error;
393 			to += dst_page_offset;
394 		}
395 
396 		r = amdgpu_copy_buffer(ring, from, to, cur_size,
397 				       resv, &next, false, true);
398 		if (r)
399 			goto error;
400 
401 		dma_fence_put(fence);
402 		fence = next;
403 
404 		size -= cur_size;
405 		if (!size)
406 			break;
407 
408 		src_node_size -= cur_size;
409 		if (!src_node_size) {
410 			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
411 							     src->mem);
412 			src_node_size = (src_mm->size << PAGE_SHIFT);
413 		} else {
414 			src_node_start += cur_size;
415 			src_page_offset = src_node_start & (PAGE_SIZE - 1);
416 		}
417 		dst_node_size -= cur_size;
418 		if (!dst_node_size) {
419 			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
420 							     dst->mem);
421 			dst_node_size = (dst_mm->size << PAGE_SHIFT);
422 		} else {
423 			dst_node_start += cur_size;
424 			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
425 		}
426 	}
427 error:
428 	mutex_unlock(&adev->mman.gtt_window_lock);
429 	if (f)
430 		*f = dma_fence_get(fence);
431 	dma_fence_put(fence);
432 	return r;
433 }
434 
435 
436 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
437 			    bool evict, bool no_wait_gpu,
438 			    struct ttm_mem_reg *new_mem,
439 			    struct ttm_mem_reg *old_mem)
440 {
441 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
442 	struct amdgpu_copy_mem src, dst;
443 	struct dma_fence *fence = NULL;
444 	int r;
445 
446 	src.bo = bo;
447 	dst.bo = bo;
448 	src.mem = old_mem;
449 	dst.mem = new_mem;
450 	src.offset = 0;
451 	dst.offset = 0;
452 
453 	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
454 				       new_mem->num_pages << PAGE_SHIFT,
455 				       bo->resv, &fence);
456 	if (r)
457 		goto error;
458 
459 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
460 	dma_fence_put(fence);
461 	return r;
462 
463 error:
464 	if (fence)
465 		dma_fence_wait(fence, false);
466 	dma_fence_put(fence);
467 	return r;
468 }
469 
470 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
471 				bool evict, bool interruptible,
472 				bool no_wait_gpu,
473 				struct ttm_mem_reg *new_mem)
474 {
475 	struct amdgpu_device *adev;
476 	struct ttm_mem_reg *old_mem = &bo->mem;
477 	struct ttm_mem_reg tmp_mem;
478 	struct ttm_place placements;
479 	struct ttm_placement placement;
480 	int r;
481 
482 	adev = amdgpu_ttm_adev(bo->bdev);
483 	tmp_mem = *new_mem;
484 	tmp_mem.mm_node = NULL;
485 	placement.num_placement = 1;
486 	placement.placement = &placements;
487 	placement.num_busy_placement = 1;
488 	placement.busy_placement = &placements;
489 	placements.fpfn = 0;
490 	placements.lpfn = 0;
491 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
492 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
493 			     interruptible, no_wait_gpu);
494 	if (unlikely(r)) {
495 		return r;
496 	}
497 
498 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
499 	if (unlikely(r)) {
500 		goto out_cleanup;
501 	}
502 
503 	r = ttm_tt_bind(bo->ttm, &tmp_mem);
504 	if (unlikely(r)) {
505 		goto out_cleanup;
506 	}
507 	r = amdgpu_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
508 	if (unlikely(r)) {
509 		goto out_cleanup;
510 	}
511 	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);
512 out_cleanup:
513 	ttm_bo_mem_put(bo, &tmp_mem);
514 	return r;
515 }
516 
517 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
518 				bool evict, bool interruptible,
519 				bool no_wait_gpu,
520 				struct ttm_mem_reg *new_mem)
521 {
522 	struct amdgpu_device *adev;
523 	struct ttm_mem_reg *old_mem = &bo->mem;
524 	struct ttm_mem_reg tmp_mem;
525 	struct ttm_placement placement;
526 	struct ttm_place placements;
527 	int r;
528 
529 	adev = amdgpu_ttm_adev(bo->bdev);
530 	tmp_mem = *new_mem;
531 	tmp_mem.mm_node = NULL;
532 	placement.num_placement = 1;
533 	placement.placement = &placements;
534 	placement.num_busy_placement = 1;
535 	placement.busy_placement = &placements;
536 	placements.fpfn = 0;
537 	placements.lpfn = 0;
538 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
539 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
540 			     interruptible, no_wait_gpu);
541 	if (unlikely(r)) {
542 		return r;
543 	}
544 	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);
545 	if (unlikely(r)) {
546 		goto out_cleanup;
547 	}
548 	r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
549 	if (unlikely(r)) {
550 		goto out_cleanup;
551 	}
552 out_cleanup:
553 	ttm_bo_mem_put(bo, &tmp_mem);
554 	return r;
555 }
556 
557 static int amdgpu_bo_move(struct ttm_buffer_object *bo,
558 			bool evict, bool interruptible,
559 			bool no_wait_gpu,
560 			struct ttm_mem_reg *new_mem)
561 {
562 	struct amdgpu_device *adev;
563 	struct amdgpu_bo *abo;
564 	struct ttm_mem_reg *old_mem = &bo->mem;
565 	int r;
566 
567 	/* Can't move a pinned BO */
568 	abo = ttm_to_amdgpu_bo(bo);
569 	if (WARN_ON_ONCE(abo->pin_count > 0))
570 		return -EINVAL;
571 
572 	adev = amdgpu_ttm_adev(bo->bdev);
573 
574 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
575 		amdgpu_move_null(bo, new_mem);
576 		return 0;
577 	}
578 	if ((old_mem->mem_type == TTM_PL_TT &&
579 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
580 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
581 	     new_mem->mem_type == TTM_PL_TT)) {
582 		/* bind is enough */
583 		amdgpu_move_null(bo, new_mem);
584 		return 0;
585 	}
586 	if (adev->mman.buffer_funcs == NULL ||
587 	    adev->mman.buffer_funcs_ring == NULL ||
588 	    !adev->mman.buffer_funcs_ring->ready) {
589 		/* use memcpy */
590 		goto memcpy;
591 	}
592 
593 	if (old_mem->mem_type == TTM_PL_VRAM &&
594 	    new_mem->mem_type == TTM_PL_SYSTEM) {
595 		r = amdgpu_move_vram_ram(bo, evict, interruptible,
596 					no_wait_gpu, new_mem);
597 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
598 		   new_mem->mem_type == TTM_PL_VRAM) {
599 		r = amdgpu_move_ram_vram(bo, evict, interruptible,
600 					    no_wait_gpu, new_mem);
601 	} else {
602 		r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
603 	}
604 
605 	if (r) {
606 memcpy:
607 		r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);
608 		if (r) {
609 			return r;
610 		}
611 	}
612 
613 	if (bo->type == ttm_bo_type_device &&
614 	    new_mem->mem_type == TTM_PL_VRAM &&
615 	    old_mem->mem_type != TTM_PL_VRAM) {
616 		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
617 		 * accesses the BO after it's moved.
618 		 */
619 		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
620 	}
621 
622 	/* update statistics */
623 	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
624 	return 0;
625 }
626 
627 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
628 {
629 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
630 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
631 
632 	mem->bus.addr = NULL;
633 	mem->bus.offset = 0;
634 	mem->bus.size = mem->num_pages << PAGE_SHIFT;
635 	mem->bus.base = 0;
636 	mem->bus.is_iomem = false;
637 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
638 		return -EINVAL;
639 	switch (mem->mem_type) {
640 	case TTM_PL_SYSTEM:
641 		/* system memory */
642 		return 0;
643 	case TTM_PL_TT:
644 		break;
645 	case TTM_PL_VRAM:
646 		mem->bus.offset = mem->start << PAGE_SHIFT;
647 		/* check if it's visible */
648 		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
649 			return -EINVAL;
650 		mem->bus.base = adev->mc.aper_base;
651 		mem->bus.is_iomem = true;
652 		break;
653 	default:
654 		return -EINVAL;
655 	}
656 	return 0;
657 }
658 
659 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
660 {
661 }
662 
663 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
664 					   unsigned long page_offset)
665 {
666 	struct drm_mm_node *mm;
667 	unsigned long offset = (page_offset << PAGE_SHIFT);
668 
669 	mm = amdgpu_find_mm_node(&bo->mem, &offset);
670 	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
671 		(offset >> PAGE_SHIFT);
672 }
673 
674 /*
675  * TTM backend functions.
676  */
677 struct amdgpu_ttm_gup_task_list {
678 	struct list_head	list;
679 	struct task_struct	*task;
680 };
681 
682 struct amdgpu_ttm_tt {
683 	struct ttm_dma_tt	ttm;
684 	struct amdgpu_device	*adev;
685 	u64			offset;
686 	uint64_t		userptr;
687 	struct mm_struct	*usermm;
688 	uint32_t		userflags;
689 	spinlock_t              guptasklock;
690 	struct list_head        guptasks;
691 	atomic_t		mmu_invalidations;
692 	uint32_t		last_set_pages;
693 	struct list_head        list;
694 };
695 
696 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
697 {
698 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
699 	unsigned int flags = 0;
700 	unsigned pinned = 0;
701 	int r;
702 
703 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
704 		flags |= FOLL_WRITE;
705 
706 	down_read(&current->mm->mmap_sem);
707 
708 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
709 		/* check that we only use anonymous memory
710 		   to prevent problems with writeback */
711 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
712 		struct vm_area_struct *vma;
713 
714 		vma = find_vma(gtt->usermm, gtt->userptr);
715 		if (!vma || vma->vm_file || vma->vm_end < end) {
716 			up_read(&current->mm->mmap_sem);
717 			return -EPERM;
718 		}
719 	}
720 
721 	do {
722 		unsigned num_pages = ttm->num_pages - pinned;
723 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
724 		struct page **p = pages + pinned;
725 		struct amdgpu_ttm_gup_task_list guptask;
726 
727 		guptask.task = current;
728 		spin_lock(&gtt->guptasklock);
729 		list_add(&guptask.list, &gtt->guptasks);
730 		spin_unlock(&gtt->guptasklock);
731 
732 		r = get_user_pages(userptr, num_pages, flags, p, NULL);
733 
734 		spin_lock(&gtt->guptasklock);
735 		list_del(&guptask.list);
736 		spin_unlock(&gtt->guptasklock);
737 
738 		if (r < 0)
739 			goto release_pages;
740 
741 		pinned += r;
742 
743 	} while (pinned < ttm->num_pages);
744 
745 	up_read(&current->mm->mmap_sem);
746 	return 0;
747 
748 release_pages:
749 	release_pages(pages, pinned);
750 	up_read(&current->mm->mmap_sem);
751 	return r;
752 }
753 
754 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
755 {
756 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
757 	unsigned i;
758 
759 	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
760 	for (i = 0; i < ttm->num_pages; ++i) {
761 		if (ttm->pages[i])
762 			put_page(ttm->pages[i]);
763 
764 		ttm->pages[i] = pages ? pages[i] : NULL;
765 	}
766 }
767 
768 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
769 {
770 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
771 	unsigned i;
772 
773 	for (i = 0; i < ttm->num_pages; ++i) {
774 		struct page *page = ttm->pages[i];
775 
776 		if (!page)
777 			continue;
778 
779 		if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
780 			set_page_dirty(page);
781 
782 		mark_page_accessed(page);
783 	}
784 }
785 
786 /* prepare the sg table with the user pages */
787 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
788 {
789 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
790 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
791 	unsigned nents;
792 	int r;
793 
794 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
795 	enum dma_data_direction direction = write ?
796 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
797 
798 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
799 				      ttm->num_pages << PAGE_SHIFT,
800 				      GFP_KERNEL);
801 	if (r)
802 		goto release_sg;
803 
804 	r = -ENOMEM;
805 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
806 	if (nents != ttm->sg->nents)
807 		goto release_sg;
808 
809 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
810 					 gtt->ttm.dma_address, ttm->num_pages);
811 
812 	return 0;
813 
814 release_sg:
815 	kfree(ttm->sg);
816 	return r;
817 }
818 
819 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
820 {
821 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
822 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
823 
824 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
825 	enum dma_data_direction direction = write ?
826 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
827 
828 	/* double check that we don't free the table twice */
829 	if (!ttm->sg->sgl)
830 		return;
831 
832 	/* free the sg table and pages again */
833 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
834 
835 	amdgpu_ttm_tt_mark_user_pages(ttm);
836 
837 	sg_free_table(ttm->sg);
838 }
839 
840 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
841 				   struct ttm_mem_reg *bo_mem)
842 {
843 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
844 	uint64_t flags;
845 	int r = 0;
846 
847 	if (gtt->userptr) {
848 		r = amdgpu_ttm_tt_pin_userptr(ttm);
849 		if (r) {
850 			DRM_ERROR("failed to pin userptr\n");
851 			return r;
852 		}
853 	}
854 	if (!ttm->num_pages) {
855 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
856 		     ttm->num_pages, bo_mem, ttm);
857 	}
858 
859 	if (bo_mem->mem_type == AMDGPU_PL_GDS ||
860 	    bo_mem->mem_type == AMDGPU_PL_GWS ||
861 	    bo_mem->mem_type == AMDGPU_PL_OA)
862 		return -EINVAL;
863 
864 	if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
865 		return 0;
866 
867 	spin_lock(&gtt->adev->gtt_list_lock);
868 	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
869 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
870 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
871 		ttm->pages, gtt->ttm.dma_address, flags);
872 
873 	if (r) {
874 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
875 			  ttm->num_pages, gtt->offset);
876 		goto error_gart_bind;
877 	}
878 
879 	list_add_tail(&gtt->list, &gtt->adev->gtt_list);
880 error_gart_bind:
881 	spin_unlock(&gtt->adev->gtt_list_lock);
882 	return r;
883 }
884 
885 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
886 {
887 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
888 
889 	return gtt && !list_empty(&gtt->list);
890 }
891 
892 int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
893 {
894 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
895 	struct ttm_tt *ttm = bo->ttm;
896 	struct ttm_mem_reg tmp;
897 	struct ttm_placement placement;
898 	struct ttm_place placements;
899 	int r;
900 
901 	if (!ttm || amdgpu_ttm_is_bound(ttm))
902 		return 0;
903 
904 	tmp = bo->mem;
905 	tmp.mm_node = NULL;
906 	placement.num_placement = 1;
907 	placement.placement = &placements;
908 	placement.num_busy_placement = 1;
909 	placement.busy_placement = &placements;
910 	placements.fpfn = 0;
911 	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
912 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
913 		TTM_PL_FLAG_TT;
914 
915 	r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
916 	if (unlikely(r))
917 		return r;
918 
919 	r = ttm_bo_move_ttm(bo, true, false, &tmp);
920 	if (unlikely(r))
921 		ttm_bo_mem_put(bo, &tmp);
922 	else
923 		bo->offset = (bo->mem.start << PAGE_SHIFT) +
924 			bo->bdev->man[bo->mem.mem_type].gpu_offset;
925 
926 	return r;
927 }
928 
929 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
930 {
931 	struct amdgpu_ttm_tt *gtt, *tmp;
932 	struct ttm_mem_reg bo_mem;
933 	uint64_t flags;
934 	int r;
935 
936 	bo_mem.mem_type = TTM_PL_TT;
937 	spin_lock(&adev->gtt_list_lock);
938 	list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) {
939 		flags = amdgpu_ttm_tt_pte_flags(gtt->adev, &gtt->ttm.ttm, &bo_mem);
940 		r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
941 				     gtt->ttm.ttm.pages, gtt->ttm.dma_address,
942 				     flags);
943 		if (r) {
944 			spin_unlock(&adev->gtt_list_lock);
945 			DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
946 				  gtt->ttm.ttm.num_pages, gtt->offset);
947 			return r;
948 		}
949 	}
950 	spin_unlock(&adev->gtt_list_lock);
951 	return 0;
952 }
953 
954 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
955 {
956 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
957 	int r;
958 
959 	if (gtt->userptr)
960 		amdgpu_ttm_tt_unpin_userptr(ttm);
961 
962 	if (!amdgpu_ttm_is_bound(ttm))
963 		return 0;
964 
965 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
966 	spin_lock(&gtt->adev->gtt_list_lock);
967 	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
968 	if (r) {
969 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
970 			  gtt->ttm.ttm.num_pages, gtt->offset);
971 		goto error_unbind;
972 	}
973 	list_del_init(&gtt->list);
974 error_unbind:
975 	spin_unlock(&gtt->adev->gtt_list_lock);
976 	return r;
977 }
978 
979 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
980 {
981 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
982 
983 	ttm_dma_tt_fini(&gtt->ttm);
984 	kfree(gtt);
985 }
986 
987 static struct ttm_backend_func amdgpu_backend_func = {
988 	.bind = &amdgpu_ttm_backend_bind,
989 	.unbind = &amdgpu_ttm_backend_unbind,
990 	.destroy = &amdgpu_ttm_backend_destroy,
991 };
992 
993 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
994 				    unsigned long size, uint32_t page_flags,
995 				    struct page *dummy_read_page)
996 {
997 	struct amdgpu_device *adev;
998 	struct amdgpu_ttm_tt *gtt;
999 
1000 	adev = amdgpu_ttm_adev(bdev);
1001 
1002 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1003 	if (gtt == NULL) {
1004 		return NULL;
1005 	}
1006 	gtt->ttm.ttm.func = &amdgpu_backend_func;
1007 	gtt->adev = adev;
1008 	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
1009 		kfree(gtt);
1010 		return NULL;
1011 	}
1012 	INIT_LIST_HEAD(&gtt->list);
1013 	return &gtt->ttm.ttm;
1014 }
1015 
1016 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
1017 {
1018 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1019 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1020 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1021 
1022 	if (ttm->state != tt_unpopulated)
1023 		return 0;
1024 
1025 	if (gtt && gtt->userptr) {
1026 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1027 		if (!ttm->sg)
1028 			return -ENOMEM;
1029 
1030 		ttm->page_flags |= TTM_PAGE_FLAG_SG;
1031 		ttm->state = tt_unbound;
1032 		return 0;
1033 	}
1034 
1035 	if (slave && ttm->sg) {
1036 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1037 						 gtt->ttm.dma_address, ttm->num_pages);
1038 		ttm->state = tt_unbound;
1039 		return 0;
1040 	}
1041 
1042 #ifdef CONFIG_SWIOTLB
1043 	if (swiotlb_nr_tbl()) {
1044 		return ttm_dma_populate(&gtt->ttm, adev->dev);
1045 	}
1046 #endif
1047 
1048 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm);
1049 }
1050 
1051 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1052 {
1053 	struct amdgpu_device *adev;
1054 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1055 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1056 
1057 	if (gtt && gtt->userptr) {
1058 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1059 		kfree(ttm->sg);
1060 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1061 		return;
1062 	}
1063 
1064 	if (slave)
1065 		return;
1066 
1067 	adev = amdgpu_ttm_adev(ttm->bdev);
1068 
1069 #ifdef CONFIG_SWIOTLB
1070 	if (swiotlb_nr_tbl()) {
1071 		ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1072 		return;
1073 	}
1074 #endif
1075 
1076 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1077 }
1078 
1079 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1080 			      uint32_t flags)
1081 {
1082 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1083 
1084 	if (gtt == NULL)
1085 		return -EINVAL;
1086 
1087 	gtt->userptr = addr;
1088 	gtt->usermm = current->mm;
1089 	gtt->userflags = flags;
1090 	spin_lock_init(&gtt->guptasklock);
1091 	INIT_LIST_HEAD(&gtt->guptasks);
1092 	atomic_set(&gtt->mmu_invalidations, 0);
1093 	gtt->last_set_pages = 0;
1094 
1095 	return 0;
1096 }
1097 
1098 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1099 {
1100 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1101 
1102 	if (gtt == NULL)
1103 		return NULL;
1104 
1105 	return gtt->usermm;
1106 }
1107 
1108 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1109 				  unsigned long end)
1110 {
1111 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1112 	struct amdgpu_ttm_gup_task_list *entry;
1113 	unsigned long size;
1114 
1115 	if (gtt == NULL || !gtt->userptr)
1116 		return false;
1117 
1118 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1119 	if (gtt->userptr > end || gtt->userptr + size <= start)
1120 		return false;
1121 
1122 	spin_lock(&gtt->guptasklock);
1123 	list_for_each_entry(entry, &gtt->guptasks, list) {
1124 		if (entry->task == current) {
1125 			spin_unlock(&gtt->guptasklock);
1126 			return false;
1127 		}
1128 	}
1129 	spin_unlock(&gtt->guptasklock);
1130 
1131 	atomic_inc(&gtt->mmu_invalidations);
1132 
1133 	return true;
1134 }
1135 
1136 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1137 				       int *last_invalidated)
1138 {
1139 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1140 	int prev_invalidated = *last_invalidated;
1141 
1142 	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
1143 	return prev_invalidated != *last_invalidated;
1144 }
1145 
1146 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1147 {
1148 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1149 
1150 	if (gtt == NULL || !gtt->userptr)
1151 		return false;
1152 
1153 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1154 }
1155 
1156 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1157 {
1158 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1159 
1160 	if (gtt == NULL)
1161 		return false;
1162 
1163 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1164 }
1165 
1166 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1167 				 struct ttm_mem_reg *mem)
1168 {
1169 	uint64_t flags = 0;
1170 
1171 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1172 		flags |= AMDGPU_PTE_VALID;
1173 
1174 	if (mem && mem->mem_type == TTM_PL_TT) {
1175 		flags |= AMDGPU_PTE_SYSTEM;
1176 
1177 		if (ttm->caching_state == tt_cached)
1178 			flags |= AMDGPU_PTE_SNOOPED;
1179 	}
1180 
1181 	flags |= adev->gart.gart_pte_flags;
1182 	flags |= AMDGPU_PTE_READABLE;
1183 
1184 	if (!amdgpu_ttm_tt_is_readonly(ttm))
1185 		flags |= AMDGPU_PTE_WRITEABLE;
1186 
1187 	return flags;
1188 }
1189 
1190 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1191 					    const struct ttm_place *place)
1192 {
1193 	unsigned long num_pages = bo->mem.num_pages;
1194 	struct drm_mm_node *node = bo->mem.mm_node;
1195 
1196 	switch (bo->mem.mem_type) {
1197 	case TTM_PL_TT:
1198 		return true;
1199 
1200 	case TTM_PL_VRAM:
1201 		/* Check each drm MM node individually */
1202 		while (num_pages) {
1203 			if (place->fpfn < (node->start + node->size) &&
1204 			    !(place->lpfn && place->lpfn <= node->start))
1205 				return true;
1206 
1207 			num_pages -= node->size;
1208 			++node;
1209 		}
1210 		return false;
1211 
1212 	default:
1213 		break;
1214 	}
1215 
1216 	return ttm_bo_eviction_valuable(bo, place);
1217 }
1218 
1219 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1220 				    unsigned long offset,
1221 				    void *buf, int len, int write)
1222 {
1223 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1224 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1225 	struct drm_mm_node *nodes;
1226 	uint32_t value = 0;
1227 	int ret = 0;
1228 	uint64_t pos;
1229 	unsigned long flags;
1230 
1231 	if (bo->mem.mem_type != TTM_PL_VRAM)
1232 		return -EIO;
1233 
1234 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1235 	pos = (nodes->start << PAGE_SHIFT) + offset;
1236 
1237 	while (len && pos < adev->mc.mc_vram_size) {
1238 		uint64_t aligned_pos = pos & ~(uint64_t)3;
1239 		uint32_t bytes = 4 - (pos & 3);
1240 		uint32_t shift = (pos & 3) * 8;
1241 		uint32_t mask = 0xffffffff << shift;
1242 
1243 		if (len < bytes) {
1244 			mask &= 0xffffffff >> (bytes - len) * 8;
1245 			bytes = len;
1246 		}
1247 
1248 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1249 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1250 		WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1251 		if (!write || mask != 0xffffffff)
1252 			value = RREG32_NO_KIQ(mmMM_DATA);
1253 		if (write) {
1254 			value &= ~mask;
1255 			value |= (*(uint32_t *)buf << shift) & mask;
1256 			WREG32_NO_KIQ(mmMM_DATA, value);
1257 		}
1258 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1259 		if (!write) {
1260 			value = (value & mask) >> shift;
1261 			memcpy(buf, &value, bytes);
1262 		}
1263 
1264 		ret += bytes;
1265 		buf = (uint8_t *)buf + bytes;
1266 		pos += bytes;
1267 		len -= bytes;
1268 		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
1269 			++nodes;
1270 			pos = (nodes->start << PAGE_SHIFT);
1271 		}
1272 	}
1273 
1274 	return ret;
1275 }
1276 
1277 static struct ttm_bo_driver amdgpu_bo_driver = {
1278 	.ttm_tt_create = &amdgpu_ttm_tt_create,
1279 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1280 	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1281 	.invalidate_caches = &amdgpu_invalidate_caches,
1282 	.init_mem_type = &amdgpu_init_mem_type,
1283 	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1284 	.evict_flags = &amdgpu_evict_flags,
1285 	.move = &amdgpu_bo_move,
1286 	.verify_access = &amdgpu_verify_access,
1287 	.move_notify = &amdgpu_bo_move_notify,
1288 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1289 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1290 	.io_mem_free = &amdgpu_ttm_io_mem_free,
1291 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1292 	.access_memory = &amdgpu_ttm_access_memory
1293 };
1294 
1295 int amdgpu_ttm_init(struct amdgpu_device *adev)
1296 {
1297 	uint64_t gtt_size;
1298 	int r;
1299 	u64 vis_vram_limit;
1300 
1301 	r = amdgpu_ttm_global_init(adev);
1302 	if (r) {
1303 		return r;
1304 	}
1305 	/* No others user of address space so set it to 0 */
1306 	r = ttm_bo_device_init(&adev->mman.bdev,
1307 			       adev->mman.bo_global_ref.ref.object,
1308 			       &amdgpu_bo_driver,
1309 			       adev->ddev->anon_inode->i_mapping,
1310 			       DRM_FILE_PAGE_OFFSET,
1311 			       adev->need_dma32);
1312 	if (r) {
1313 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1314 		return r;
1315 	}
1316 	adev->mman.initialized = true;
1317 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1318 				adev->mc.real_vram_size >> PAGE_SHIFT);
1319 	if (r) {
1320 		DRM_ERROR("Failed initializing VRAM heap.\n");
1321 		return r;
1322 	}
1323 
1324 	/* Reduce size of CPU-visible VRAM if requested */
1325 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1326 	if (amdgpu_vis_vram_limit > 0 &&
1327 	    vis_vram_limit <= adev->mc.visible_vram_size)
1328 		adev->mc.visible_vram_size = vis_vram_limit;
1329 
1330 	/* Change the size here instead of the init above so only lpfn is affected */
1331 	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1332 
1333 	/*
1334 	 *The reserved vram for firmware must be pinned to the specified
1335 	 *place on the VRAM, so reserve it early.
1336 	 */
1337 	r = amdgpu_fw_reserve_vram_init(adev);
1338 	if (r) {
1339 		return r;
1340 	}
1341 
1342 	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
1343 				    AMDGPU_GEM_DOMAIN_VRAM,
1344 				    &adev->stolen_vga_memory,
1345 				    NULL, NULL);
1346 	if (r)
1347 		return r;
1348 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1349 		 (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
1350 
1351 	if (amdgpu_gtt_size == -1)
1352 		gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1353 			       adev->mc.mc_vram_size);
1354 	else
1355 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1356 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1357 	if (r) {
1358 		DRM_ERROR("Failed initializing GTT heap.\n");
1359 		return r;
1360 	}
1361 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1362 		 (unsigned)(gtt_size / (1024 * 1024)));
1363 
1364 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1365 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1366 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
1367 	adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
1368 	adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
1369 	adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
1370 	adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
1371 	adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
1372 	adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
1373 	/* GDS Memory */
1374 	if (adev->gds.mem.total_size) {
1375 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1376 				   adev->gds.mem.total_size >> PAGE_SHIFT);
1377 		if (r) {
1378 			DRM_ERROR("Failed initializing GDS heap.\n");
1379 			return r;
1380 		}
1381 	}
1382 
1383 	/* GWS */
1384 	if (adev->gds.gws.total_size) {
1385 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1386 				   adev->gds.gws.total_size >> PAGE_SHIFT);
1387 		if (r) {
1388 			DRM_ERROR("Failed initializing gws heap.\n");
1389 			return r;
1390 		}
1391 	}
1392 
1393 	/* OA */
1394 	if (adev->gds.oa.total_size) {
1395 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1396 				   adev->gds.oa.total_size >> PAGE_SHIFT);
1397 		if (r) {
1398 			DRM_ERROR("Failed initializing oa heap.\n");
1399 			return r;
1400 		}
1401 	}
1402 
1403 	r = amdgpu_ttm_debugfs_init(adev);
1404 	if (r) {
1405 		DRM_ERROR("Failed to init debugfs\n");
1406 		return r;
1407 	}
1408 	return 0;
1409 }
1410 
1411 void amdgpu_ttm_fini(struct amdgpu_device *adev)
1412 {
1413 	int r;
1414 
1415 	if (!adev->mman.initialized)
1416 		return;
1417 	amdgpu_ttm_debugfs_fini(adev);
1418 	if (adev->stolen_vga_memory) {
1419 		r = amdgpu_bo_reserve(adev->stolen_vga_memory, true);
1420 		if (r == 0) {
1421 			amdgpu_bo_unpin(adev->stolen_vga_memory);
1422 			amdgpu_bo_unreserve(adev->stolen_vga_memory);
1423 		}
1424 		amdgpu_bo_unref(&adev->stolen_vga_memory);
1425 	}
1426 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1427 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
1428 	if (adev->gds.mem.total_size)
1429 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1430 	if (adev->gds.gws.total_size)
1431 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1432 	if (adev->gds.oa.total_size)
1433 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
1434 	ttm_bo_device_release(&adev->mman.bdev);
1435 	amdgpu_gart_fini(adev);
1436 	amdgpu_ttm_global_fini(adev);
1437 	adev->mman.initialized = false;
1438 	DRM_INFO("amdgpu: ttm finalized\n");
1439 }
1440 
1441 /* this should only be called at bootup or when userspace
1442  * isn't running */
1443 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
1444 {
1445 	struct ttm_mem_type_manager *man;
1446 
1447 	if (!adev->mman.initialized)
1448 		return;
1449 
1450 	man = &adev->mman.bdev.man[TTM_PL_VRAM];
1451 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1452 	man->size = size >> PAGE_SHIFT;
1453 }
1454 
1455 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
1456 {
1457 	struct drm_file *file_priv;
1458 	struct amdgpu_device *adev;
1459 
1460 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
1461 		return -EINVAL;
1462 
1463 	file_priv = filp->private_data;
1464 	adev = file_priv->minor->dev->dev_private;
1465 	if (adev == NULL)
1466 		return -EINVAL;
1467 
1468 	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
1469 }
1470 
1471 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1472 			     struct ttm_mem_reg *mem, unsigned num_pages,
1473 			     uint64_t offset, unsigned window,
1474 			     struct amdgpu_ring *ring,
1475 			     uint64_t *addr)
1476 {
1477 	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
1478 	struct amdgpu_device *adev = ring->adev;
1479 	struct ttm_tt *ttm = bo->ttm;
1480 	struct amdgpu_job *job;
1481 	unsigned num_dw, num_bytes;
1482 	dma_addr_t *dma_address;
1483 	struct dma_fence *fence;
1484 	uint64_t src_addr, dst_addr;
1485 	uint64_t flags;
1486 	int r;
1487 
1488 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1489 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1490 
1491 	*addr = adev->mc.gart_start;
1492 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1493 		AMDGPU_GPU_PAGE_SIZE;
1494 
1495 	num_dw = adev->mman.buffer_funcs->copy_num_dw;
1496 	while (num_dw & 0x7)
1497 		num_dw++;
1498 
1499 	num_bytes = num_pages * 8;
1500 
1501 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
1502 	if (r)
1503 		return r;
1504 
1505 	src_addr = num_dw * 4;
1506 	src_addr += job->ibs[0].gpu_addr;
1507 
1508 	dst_addr = adev->gart.table_addr;
1509 	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
1510 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
1511 				dst_addr, num_bytes);
1512 
1513 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1514 	WARN_ON(job->ibs[0].length_dw > num_dw);
1515 
1516 	dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
1517 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
1518 	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
1519 			    &job->ibs[0].ptr[num_dw]);
1520 	if (r)
1521 		goto error_free;
1522 
1523 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1524 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
1525 	if (r)
1526 		goto error_free;
1527 
1528 	dma_fence_put(fence);
1529 
1530 	return r;
1531 
1532 error_free:
1533 	amdgpu_job_free(job);
1534 	return r;
1535 }
1536 
1537 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1538 		       uint64_t dst_offset, uint32_t byte_count,
1539 		       struct reservation_object *resv,
1540 		       struct dma_fence **fence, bool direct_submit,
1541 		       bool vm_needs_flush)
1542 {
1543 	struct amdgpu_device *adev = ring->adev;
1544 	struct amdgpu_job *job;
1545 
1546 	uint32_t max_bytes;
1547 	unsigned num_loops, num_dw;
1548 	unsigned i;
1549 	int r;
1550 
1551 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1552 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1553 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
1554 
1555 	/* for IB padding */
1556 	while (num_dw & 0x7)
1557 		num_dw++;
1558 
1559 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1560 	if (r)
1561 		return r;
1562 
1563 	job->vm_needs_flush = vm_needs_flush;
1564 	if (resv) {
1565 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1566 				     AMDGPU_FENCE_OWNER_UNDEFINED,
1567 				     false);
1568 		if (r) {
1569 			DRM_ERROR("sync failed (%d).\n", r);
1570 			goto error_free;
1571 		}
1572 	}
1573 
1574 	for (i = 0; i < num_loops; i++) {
1575 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1576 
1577 		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1578 					dst_offset, cur_size_in_bytes);
1579 
1580 		src_offset += cur_size_in_bytes;
1581 		dst_offset += cur_size_in_bytes;
1582 		byte_count -= cur_size_in_bytes;
1583 	}
1584 
1585 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1586 	WARN_ON(job->ibs[0].length_dw > num_dw);
1587 	if (direct_submit) {
1588 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
1589 				       NULL, fence);
1590 		job->fence = dma_fence_get(*fence);
1591 		if (r)
1592 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
1593 		amdgpu_job_free(job);
1594 	} else {
1595 		r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1596 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1597 		if (r)
1598 			goto error_free;
1599 	}
1600 
1601 	return r;
1602 
1603 error_free:
1604 	amdgpu_job_free(job);
1605 	return r;
1606 }
1607 
1608 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1609 		       uint64_t src_data,
1610 		       struct reservation_object *resv,
1611 		       struct dma_fence **fence)
1612 {
1613 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1614 	uint32_t max_bytes = 8 *
1615 			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
1616 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1617 
1618 	struct drm_mm_node *mm_node;
1619 	unsigned long num_pages;
1620 	unsigned int num_loops, num_dw;
1621 
1622 	struct amdgpu_job *job;
1623 	int r;
1624 
1625 	if (!ring->ready) {
1626 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
1627 		return -EINVAL;
1628 	}
1629 
1630 	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
1631 		r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
1632 		if (r)
1633 			return r;
1634 	}
1635 
1636 	num_pages = bo->tbo.num_pages;
1637 	mm_node = bo->tbo.mem.mm_node;
1638 	num_loops = 0;
1639 	while (num_pages) {
1640 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1641 
1642 		num_loops += DIV_ROUND_UP(byte_count, max_bytes);
1643 		num_pages -= mm_node->size;
1644 		++mm_node;
1645 	}
1646 
1647 	/* num of dwords for each SDMA_OP_PTEPDE cmd */
1648 	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1649 
1650 	/* for IB padding */
1651 	num_dw += 64;
1652 
1653 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1654 	if (r)
1655 		return r;
1656 
1657 	if (resv) {
1658 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1659 				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
1660 		if (r) {
1661 			DRM_ERROR("sync failed (%d).\n", r);
1662 			goto error_free;
1663 		}
1664 	}
1665 
1666 	num_pages = bo->tbo.num_pages;
1667 	mm_node = bo->tbo.mem.mm_node;
1668 
1669 	while (num_pages) {
1670 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1671 		uint64_t dst_addr;
1672 
1673 		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
1674 
1675 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
1676 		while (byte_count) {
1677 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1678 
1679 			amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
1680 					dst_addr, 0,
1681 					cur_size_in_bytes >> 3, 0,
1682 					src_data);
1683 
1684 			dst_addr += cur_size_in_bytes;
1685 			byte_count -= cur_size_in_bytes;
1686 		}
1687 
1688 		num_pages -= mm_node->size;
1689 		++mm_node;
1690 	}
1691 
1692 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1693 	WARN_ON(job->ibs[0].length_dw > num_dw);
1694 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1695 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1696 	if (r)
1697 		goto error_free;
1698 
1699 	return 0;
1700 
1701 error_free:
1702 	amdgpu_job_free(job);
1703 	return r;
1704 }
1705 
1706 #if defined(CONFIG_DEBUG_FS)
1707 
1708 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1709 {
1710 	struct drm_info_node *node = (struct drm_info_node *)m->private;
1711 	unsigned ttm_pl = *(int *)node->info_ent->data;
1712 	struct drm_device *dev = node->minor->dev;
1713 	struct amdgpu_device *adev = dev->dev_private;
1714 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
1715 	struct drm_printer p = drm_seq_file_printer(m);
1716 
1717 	man->func->debug(man, &p);
1718 	return 0;
1719 }
1720 
1721 static int ttm_pl_vram = TTM_PL_VRAM;
1722 static int ttm_pl_tt = TTM_PL_TT;
1723 
1724 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
1725 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
1726 	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
1727 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
1728 #ifdef CONFIG_SWIOTLB
1729 	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
1730 #endif
1731 };
1732 
1733 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1734 				    size_t size, loff_t *pos)
1735 {
1736 	struct amdgpu_device *adev = file_inode(f)->i_private;
1737 	ssize_t result = 0;
1738 	int r;
1739 
1740 	if (size & 0x3 || *pos & 0x3)
1741 		return -EINVAL;
1742 
1743 	if (*pos >= adev->mc.mc_vram_size)
1744 		return -ENXIO;
1745 
1746 	while (size) {
1747 		unsigned long flags;
1748 		uint32_t value;
1749 
1750 		if (*pos >= adev->mc.mc_vram_size)
1751 			return result;
1752 
1753 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1754 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1755 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1756 		value = RREG32_NO_KIQ(mmMM_DATA);
1757 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1758 
1759 		r = put_user(value, (uint32_t *)buf);
1760 		if (r)
1761 			return r;
1762 
1763 		result += 4;
1764 		buf += 4;
1765 		*pos += 4;
1766 		size -= 4;
1767 	}
1768 
1769 	return result;
1770 }
1771 
1772 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1773 				    size_t size, loff_t *pos)
1774 {
1775 	struct amdgpu_device *adev = file_inode(f)->i_private;
1776 	ssize_t result = 0;
1777 	int r;
1778 
1779 	if (size & 0x3 || *pos & 0x3)
1780 		return -EINVAL;
1781 
1782 	if (*pos >= adev->mc.mc_vram_size)
1783 		return -ENXIO;
1784 
1785 	while (size) {
1786 		unsigned long flags;
1787 		uint32_t value;
1788 
1789 		if (*pos >= adev->mc.mc_vram_size)
1790 			return result;
1791 
1792 		r = get_user(value, (uint32_t *)buf);
1793 		if (r)
1794 			return r;
1795 
1796 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1797 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1798 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1799 		WREG32_NO_KIQ(mmMM_DATA, value);
1800 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1801 
1802 		result += 4;
1803 		buf += 4;
1804 		*pos += 4;
1805 		size -= 4;
1806 	}
1807 
1808 	return result;
1809 }
1810 
1811 static const struct file_operations amdgpu_ttm_vram_fops = {
1812 	.owner = THIS_MODULE,
1813 	.read = amdgpu_ttm_vram_read,
1814 	.write = amdgpu_ttm_vram_write,
1815 	.llseek = default_llseek,
1816 };
1817 
1818 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1819 
1820 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1821 				   size_t size, loff_t *pos)
1822 {
1823 	struct amdgpu_device *adev = file_inode(f)->i_private;
1824 	ssize_t result = 0;
1825 	int r;
1826 
1827 	while (size) {
1828 		loff_t p = *pos / PAGE_SIZE;
1829 		unsigned off = *pos & ~PAGE_MASK;
1830 		size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
1831 		struct page *page;
1832 		void *ptr;
1833 
1834 		if (p >= adev->gart.num_cpu_pages)
1835 			return result;
1836 
1837 		page = adev->gart.pages[p];
1838 		if (page) {
1839 			ptr = kmap(page);
1840 			ptr += off;
1841 
1842 			r = copy_to_user(buf, ptr, cur_size);
1843 			kunmap(adev->gart.pages[p]);
1844 		} else
1845 			r = clear_user(buf, cur_size);
1846 
1847 		if (r)
1848 			return -EFAULT;
1849 
1850 		result += cur_size;
1851 		buf += cur_size;
1852 		*pos += cur_size;
1853 		size -= cur_size;
1854 	}
1855 
1856 	return result;
1857 }
1858 
1859 static const struct file_operations amdgpu_ttm_gtt_fops = {
1860 	.owner = THIS_MODULE,
1861 	.read = amdgpu_ttm_gtt_read,
1862 	.llseek = default_llseek
1863 };
1864 
1865 #endif
1866 
1867 static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
1868 				   size_t size, loff_t *pos)
1869 {
1870 	struct amdgpu_device *adev = file_inode(f)->i_private;
1871 	int r;
1872 	uint64_t phys;
1873 	struct iommu_domain *dom;
1874 
1875 	// always return 8 bytes
1876 	if (size != 8)
1877 		return -EINVAL;
1878 
1879 	// only accept page addresses
1880 	if (*pos & 0xFFF)
1881 		return -EINVAL;
1882 
1883 	dom = iommu_get_domain_for_dev(adev->dev);
1884 	if (dom)
1885 		phys = iommu_iova_to_phys(dom, *pos);
1886 	else
1887 		phys = *pos;
1888 
1889 	r = copy_to_user(buf, &phys, 8);
1890 	if (r)
1891 		return -EFAULT;
1892 
1893 	return 8;
1894 }
1895 
1896 static const struct file_operations amdgpu_ttm_iova_fops = {
1897 	.owner = THIS_MODULE,
1898 	.read = amdgpu_iova_to_phys_read,
1899 	.llseek = default_llseek
1900 };
1901 
1902 static const struct {
1903 	char *name;
1904 	const struct file_operations *fops;
1905 	int domain;
1906 } ttm_debugfs_entries[] = {
1907 	{ "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
1908 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1909 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
1910 #endif
1911 	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
1912 };
1913 
1914 #endif
1915 
1916 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1917 {
1918 #if defined(CONFIG_DEBUG_FS)
1919 	unsigned count;
1920 
1921 	struct drm_minor *minor = adev->ddev->primary;
1922 	struct dentry *ent, *root = minor->debugfs_root;
1923 
1924 	for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
1925 		ent = debugfs_create_file(
1926 				ttm_debugfs_entries[count].name,
1927 				S_IFREG | S_IRUGO, root,
1928 				adev,
1929 				ttm_debugfs_entries[count].fops);
1930 		if (IS_ERR(ent))
1931 			return PTR_ERR(ent);
1932 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
1933 			i_size_write(ent->d_inode, adev->mc.mc_vram_size);
1934 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
1935 			i_size_write(ent->d_inode, adev->mc.gart_size);
1936 		adev->mman.debugfs_entries[count] = ent;
1937 	}
1938 
1939 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
1940 
1941 #ifdef CONFIG_SWIOTLB
1942 	if (!swiotlb_nr_tbl())
1943 		--count;
1944 #endif
1945 
1946 	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
1947 #else
1948 	return 0;
1949 #endif
1950 }
1951 
1952 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
1953 {
1954 #if defined(CONFIG_DEBUG_FS)
1955 	unsigned i;
1956 
1957 	for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
1958 		debugfs_remove(adev->mman.debugfs_entries[i]);
1959 #endif
1960 }
1961