1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30  *    Dave Airlie
31  */
32 #include <drm/ttm/ttm_bo_api.h>
33 #include <drm/ttm/ttm_bo_driver.h>
34 #include <drm/ttm/ttm_placement.h>
35 #include <drm/ttm/ttm_module.h>
36 #include <drm/ttm/ttm_page_alloc.h>
37 #include <drm/drmP.h>
38 #include <drm/amdgpu_drm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swiotlb.h>
42 #include <linux/swap.h>
43 #include <linux/pagemap.h>
44 #include <linux/debugfs.h>
45 #include <linux/iommu.h>
46 #include "amdgpu.h"
47 #include "amdgpu_object.h"
48 #include "amdgpu_trace.h"
49 #include "bif/bif_4_1_d.h"
50 
51 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
52 
53 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
54 			     struct ttm_mem_reg *mem, unsigned num_pages,
55 			     uint64_t offset, unsigned window,
56 			     struct amdgpu_ring *ring,
57 			     uint64_t *addr);
58 
59 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
60 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
61 
62 /*
63  * Global memory.
64  */
65 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
66 {
67 	return ttm_mem_global_init(ref->object);
68 }
69 
70 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
71 {
72 	ttm_mem_global_release(ref->object);
73 }
74 
75 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
76 {
77 	struct drm_global_reference *global_ref;
78 	struct amdgpu_ring *ring;
79 	struct drm_sched_rq *rq;
80 	int r;
81 
82 	adev->mman.mem_global_referenced = false;
83 	global_ref = &adev->mman.mem_global_ref;
84 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
85 	global_ref->size = sizeof(struct ttm_mem_global);
86 	global_ref->init = &amdgpu_ttm_mem_global_init;
87 	global_ref->release = &amdgpu_ttm_mem_global_release;
88 	r = drm_global_item_ref(global_ref);
89 	if (r) {
90 		DRM_ERROR("Failed setting up TTM memory accounting "
91 			  "subsystem.\n");
92 		goto error_mem;
93 	}
94 
95 	adev->mman.bo_global_ref.mem_glob =
96 		adev->mman.mem_global_ref.object;
97 	global_ref = &adev->mman.bo_global_ref.ref;
98 	global_ref->global_type = DRM_GLOBAL_TTM_BO;
99 	global_ref->size = sizeof(struct ttm_bo_global);
100 	global_ref->init = &ttm_bo_global_init;
101 	global_ref->release = &ttm_bo_global_release;
102 	r = drm_global_item_ref(global_ref);
103 	if (r) {
104 		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
105 		goto error_bo;
106 	}
107 
108 	mutex_init(&adev->mman.gtt_window_lock);
109 
110 	ring = adev->mman.buffer_funcs_ring;
111 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
112 	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
113 				  rq, amdgpu_sched_jobs, NULL);
114 	if (r) {
115 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
116 		goto error_entity;
117 	}
118 
119 	adev->mman.mem_global_referenced = true;
120 
121 	return 0;
122 
123 error_entity:
124 	drm_global_item_unref(&adev->mman.bo_global_ref.ref);
125 error_bo:
126 	drm_global_item_unref(&adev->mman.mem_global_ref);
127 error_mem:
128 	return r;
129 }
130 
131 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
132 {
133 	if (adev->mman.mem_global_referenced) {
134 		drm_sched_entity_fini(adev->mman.entity.sched,
135 				      &adev->mman.entity);
136 		mutex_destroy(&adev->mman.gtt_window_lock);
137 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
138 		drm_global_item_unref(&adev->mman.mem_global_ref);
139 		adev->mman.mem_global_referenced = false;
140 	}
141 }
142 
143 static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
144 {
145 	return 0;
146 }
147 
148 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
149 				struct ttm_mem_type_manager *man)
150 {
151 	struct amdgpu_device *adev;
152 
153 	adev = amdgpu_ttm_adev(bdev);
154 
155 	switch (type) {
156 	case TTM_PL_SYSTEM:
157 		/* System memory */
158 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
159 		man->available_caching = TTM_PL_MASK_CACHING;
160 		man->default_caching = TTM_PL_FLAG_CACHED;
161 		break;
162 	case TTM_PL_TT:
163 		man->func = &amdgpu_gtt_mgr_func;
164 		man->gpu_offset = adev->gmc.gart_start;
165 		man->available_caching = TTM_PL_MASK_CACHING;
166 		man->default_caching = TTM_PL_FLAG_CACHED;
167 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
168 		break;
169 	case TTM_PL_VRAM:
170 		/* "On-card" video ram */
171 		man->func = &amdgpu_vram_mgr_func;
172 		man->gpu_offset = adev->gmc.vram_start;
173 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
174 			     TTM_MEMTYPE_FLAG_MAPPABLE;
175 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
176 		man->default_caching = TTM_PL_FLAG_WC;
177 		break;
178 	case AMDGPU_PL_GDS:
179 	case AMDGPU_PL_GWS:
180 	case AMDGPU_PL_OA:
181 		/* On-chip GDS memory*/
182 		man->func = &ttm_bo_manager_func;
183 		man->gpu_offset = 0;
184 		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
185 		man->available_caching = TTM_PL_FLAG_UNCACHED;
186 		man->default_caching = TTM_PL_FLAG_UNCACHED;
187 		break;
188 	default:
189 		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
190 		return -EINVAL;
191 	}
192 	return 0;
193 }
194 
195 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
196 				struct ttm_placement *placement)
197 {
198 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
199 	struct amdgpu_bo *abo;
200 	static const struct ttm_place placements = {
201 		.fpfn = 0,
202 		.lpfn = 0,
203 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
204 	};
205 
206 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
207 		placement->placement = &placements;
208 		placement->busy_placement = &placements;
209 		placement->num_placement = 1;
210 		placement->num_busy_placement = 1;
211 		return;
212 	}
213 	abo = ttm_to_amdgpu_bo(bo);
214 	switch (bo->mem.mem_type) {
215 	case TTM_PL_VRAM:
216 		if (adev->mman.buffer_funcs &&
217 		    adev->mman.buffer_funcs_ring &&
218 		    adev->mman.buffer_funcs_ring->ready == false) {
219 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
220 		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
221 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
222 			unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
223 			struct drm_mm_node *node = bo->mem.mm_node;
224 			unsigned long pages_left;
225 
226 			for (pages_left = bo->mem.num_pages;
227 			     pages_left;
228 			     pages_left -= node->size, node++) {
229 				if (node->start < fpfn)
230 					break;
231 			}
232 
233 			if (!pages_left)
234 				goto gtt;
235 
236 			/* Try evicting to the CPU inaccessible part of VRAM
237 			 * first, but only set GTT as busy placement, so this
238 			 * BO will be evicted to GTT rather than causing other
239 			 * BOs to be evicted from VRAM
240 			 */
241 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
242 							 AMDGPU_GEM_DOMAIN_GTT);
243 			abo->placements[0].fpfn = fpfn;
244 			abo->placements[0].lpfn = 0;
245 			abo->placement.busy_placement = &abo->placements[1];
246 			abo->placement.num_busy_placement = 1;
247 		} else {
248 gtt:
249 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
250 		}
251 		break;
252 	case TTM_PL_TT:
253 	default:
254 		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
255 	}
256 	*placement = abo->placement;
257 }
258 
259 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
260 {
261 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
262 
263 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
264 		return -EPERM;
265 	return drm_vma_node_verify_access(&abo->gem_base.vma_node,
266 					  filp->private_data);
267 }
268 
269 static void amdgpu_move_null(struct ttm_buffer_object *bo,
270 			     struct ttm_mem_reg *new_mem)
271 {
272 	struct ttm_mem_reg *old_mem = &bo->mem;
273 
274 	BUG_ON(old_mem->mm_node != NULL);
275 	*old_mem = *new_mem;
276 	new_mem->mm_node = NULL;
277 }
278 
279 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
280 				    struct drm_mm_node *mm_node,
281 				    struct ttm_mem_reg *mem)
282 {
283 	uint64_t addr = 0;
284 
285 	if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
286 		addr = mm_node->start << PAGE_SHIFT;
287 		addr += bo->bdev->man[mem->mem_type].gpu_offset;
288 	}
289 	return addr;
290 }
291 
292 /**
293  * amdgpu_find_mm_node - Helper function finds the drm_mm_node
294  *  corresponding to @offset. It also modifies the offset to be
295  *  within the drm_mm_node returned
296  */
297 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
298 					       unsigned long *offset)
299 {
300 	struct drm_mm_node *mm_node = mem->mm_node;
301 
302 	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
303 		*offset -= (mm_node->size << PAGE_SHIFT);
304 		++mm_node;
305 	}
306 	return mm_node;
307 }
308 
309 /**
310  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
311  *
312  * The function copies @size bytes from {src->mem + src->offset} to
313  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
314  * move and different for a BO to BO copy.
315  *
316  * @f: Returns the last fence if multiple jobs are submitted.
317  */
318 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
319 			       struct amdgpu_copy_mem *src,
320 			       struct amdgpu_copy_mem *dst,
321 			       uint64_t size,
322 			       struct reservation_object *resv,
323 			       struct dma_fence **f)
324 {
325 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
326 	struct drm_mm_node *src_mm, *dst_mm;
327 	uint64_t src_node_start, dst_node_start, src_node_size,
328 		 dst_node_size, src_page_offset, dst_page_offset;
329 	struct dma_fence *fence = NULL;
330 	int r = 0;
331 	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
332 					AMDGPU_GPU_PAGE_SIZE);
333 
334 	if (!ring->ready) {
335 		DRM_ERROR("Trying to move memory with ring turned off.\n");
336 		return -EINVAL;
337 	}
338 
339 	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
340 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
341 					     src->offset;
342 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
343 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
344 
345 	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
346 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
347 					     dst->offset;
348 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
349 	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
350 
351 	mutex_lock(&adev->mman.gtt_window_lock);
352 
353 	while (size) {
354 		unsigned long cur_size;
355 		uint64_t from = src_node_start, to = dst_node_start;
356 		struct dma_fence *next;
357 
358 		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
359 		 * begins at an offset, then adjust the size accordingly
360 		 */
361 		cur_size = min3(min(src_node_size, dst_node_size), size,
362 				GTT_MAX_BYTES);
363 		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
364 		    cur_size + dst_page_offset > GTT_MAX_BYTES)
365 			cur_size -= max(src_page_offset, dst_page_offset);
366 
367 		/* Map only what needs to be accessed. Map src to window 0 and
368 		 * dst to window 1
369 		 */
370 		if (src->mem->mem_type == TTM_PL_TT &&
371 		    !amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
372 			r = amdgpu_map_buffer(src->bo, src->mem,
373 					PFN_UP(cur_size + src_page_offset),
374 					src_node_start, 0, ring,
375 					&from);
376 			if (r)
377 				goto error;
378 			/* Adjust the offset because amdgpu_map_buffer returns
379 			 * start of mapped page
380 			 */
381 			from += src_page_offset;
382 		}
383 
384 		if (dst->mem->mem_type == TTM_PL_TT &&
385 		    !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
386 			r = amdgpu_map_buffer(dst->bo, dst->mem,
387 					PFN_UP(cur_size + dst_page_offset),
388 					dst_node_start, 1, ring,
389 					&to);
390 			if (r)
391 				goto error;
392 			to += dst_page_offset;
393 		}
394 
395 		r = amdgpu_copy_buffer(ring, from, to, cur_size,
396 				       resv, &next, false, true);
397 		if (r)
398 			goto error;
399 
400 		dma_fence_put(fence);
401 		fence = next;
402 
403 		size -= cur_size;
404 		if (!size)
405 			break;
406 
407 		src_node_size -= cur_size;
408 		if (!src_node_size) {
409 			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
410 							     src->mem);
411 			src_node_size = (src_mm->size << PAGE_SHIFT);
412 		} else {
413 			src_node_start += cur_size;
414 			src_page_offset = src_node_start & (PAGE_SIZE - 1);
415 		}
416 		dst_node_size -= cur_size;
417 		if (!dst_node_size) {
418 			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
419 							     dst->mem);
420 			dst_node_size = (dst_mm->size << PAGE_SHIFT);
421 		} else {
422 			dst_node_start += cur_size;
423 			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
424 		}
425 	}
426 error:
427 	mutex_unlock(&adev->mman.gtt_window_lock);
428 	if (f)
429 		*f = dma_fence_get(fence);
430 	dma_fence_put(fence);
431 	return r;
432 }
433 
434 
435 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
436 			    bool evict, bool no_wait_gpu,
437 			    struct ttm_mem_reg *new_mem,
438 			    struct ttm_mem_reg *old_mem)
439 {
440 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
441 	struct amdgpu_copy_mem src, dst;
442 	struct dma_fence *fence = NULL;
443 	int r;
444 
445 	src.bo = bo;
446 	dst.bo = bo;
447 	src.mem = old_mem;
448 	dst.mem = new_mem;
449 	src.offset = 0;
450 	dst.offset = 0;
451 
452 	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
453 				       new_mem->num_pages << PAGE_SHIFT,
454 				       bo->resv, &fence);
455 	if (r)
456 		goto error;
457 
458 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
459 	dma_fence_put(fence);
460 	return r;
461 
462 error:
463 	if (fence)
464 		dma_fence_wait(fence, false);
465 	dma_fence_put(fence);
466 	return r;
467 }
468 
469 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
470 				struct ttm_operation_ctx *ctx,
471 				struct ttm_mem_reg *new_mem)
472 {
473 	struct amdgpu_device *adev;
474 	struct ttm_mem_reg *old_mem = &bo->mem;
475 	struct ttm_mem_reg tmp_mem;
476 	struct ttm_place placements;
477 	struct ttm_placement placement;
478 	int r;
479 
480 	adev = amdgpu_ttm_adev(bo->bdev);
481 	tmp_mem = *new_mem;
482 	tmp_mem.mm_node = NULL;
483 	placement.num_placement = 1;
484 	placement.placement = &placements;
485 	placement.num_busy_placement = 1;
486 	placement.busy_placement = &placements;
487 	placements.fpfn = 0;
488 	placements.lpfn = 0;
489 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
490 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
491 	if (unlikely(r)) {
492 		return r;
493 	}
494 
495 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
496 	if (unlikely(r)) {
497 		goto out_cleanup;
498 	}
499 
500 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
501 	if (unlikely(r)) {
502 		goto out_cleanup;
503 	}
504 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
505 	if (unlikely(r)) {
506 		goto out_cleanup;
507 	}
508 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
509 out_cleanup:
510 	ttm_bo_mem_put(bo, &tmp_mem);
511 	return r;
512 }
513 
514 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
515 				struct ttm_operation_ctx *ctx,
516 				struct ttm_mem_reg *new_mem)
517 {
518 	struct amdgpu_device *adev;
519 	struct ttm_mem_reg *old_mem = &bo->mem;
520 	struct ttm_mem_reg tmp_mem;
521 	struct ttm_placement placement;
522 	struct ttm_place placements;
523 	int r;
524 
525 	adev = amdgpu_ttm_adev(bo->bdev);
526 	tmp_mem = *new_mem;
527 	tmp_mem.mm_node = NULL;
528 	placement.num_placement = 1;
529 	placement.placement = &placements;
530 	placement.num_busy_placement = 1;
531 	placement.busy_placement = &placements;
532 	placements.fpfn = 0;
533 	placements.lpfn = 0;
534 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
535 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
536 	if (unlikely(r)) {
537 		return r;
538 	}
539 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
540 	if (unlikely(r)) {
541 		goto out_cleanup;
542 	}
543 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
544 	if (unlikely(r)) {
545 		goto out_cleanup;
546 	}
547 out_cleanup:
548 	ttm_bo_mem_put(bo, &tmp_mem);
549 	return r;
550 }
551 
552 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
553 			  struct ttm_operation_ctx *ctx,
554 			  struct ttm_mem_reg *new_mem)
555 {
556 	struct amdgpu_device *adev;
557 	struct amdgpu_bo *abo;
558 	struct ttm_mem_reg *old_mem = &bo->mem;
559 	int r;
560 
561 	/* Can't move a pinned BO */
562 	abo = ttm_to_amdgpu_bo(bo);
563 	if (WARN_ON_ONCE(abo->pin_count > 0))
564 		return -EINVAL;
565 
566 	adev = amdgpu_ttm_adev(bo->bdev);
567 
568 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
569 		amdgpu_move_null(bo, new_mem);
570 		return 0;
571 	}
572 	if ((old_mem->mem_type == TTM_PL_TT &&
573 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
574 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
575 	     new_mem->mem_type == TTM_PL_TT)) {
576 		/* bind is enough */
577 		amdgpu_move_null(bo, new_mem);
578 		return 0;
579 	}
580 	if (adev->mman.buffer_funcs == NULL ||
581 	    adev->mman.buffer_funcs_ring == NULL ||
582 	    !adev->mman.buffer_funcs_ring->ready) {
583 		/* use memcpy */
584 		goto memcpy;
585 	}
586 
587 	if (old_mem->mem_type == TTM_PL_VRAM &&
588 	    new_mem->mem_type == TTM_PL_SYSTEM) {
589 		r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
590 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
591 		   new_mem->mem_type == TTM_PL_VRAM) {
592 		r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
593 	} else {
594 		r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
595 				     new_mem, old_mem);
596 	}
597 
598 	if (r) {
599 memcpy:
600 		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
601 		if (r) {
602 			return r;
603 		}
604 	}
605 
606 	if (bo->type == ttm_bo_type_device &&
607 	    new_mem->mem_type == TTM_PL_VRAM &&
608 	    old_mem->mem_type != TTM_PL_VRAM) {
609 		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
610 		 * accesses the BO after it's moved.
611 		 */
612 		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
613 	}
614 
615 	/* update statistics */
616 	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
617 	return 0;
618 }
619 
620 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
621 {
622 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
623 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
624 
625 	mem->bus.addr = NULL;
626 	mem->bus.offset = 0;
627 	mem->bus.size = mem->num_pages << PAGE_SHIFT;
628 	mem->bus.base = 0;
629 	mem->bus.is_iomem = false;
630 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
631 		return -EINVAL;
632 	switch (mem->mem_type) {
633 	case TTM_PL_SYSTEM:
634 		/* system memory */
635 		return 0;
636 	case TTM_PL_TT:
637 		break;
638 	case TTM_PL_VRAM:
639 		mem->bus.offset = mem->start << PAGE_SHIFT;
640 		/* check if it's visible */
641 		if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
642 			return -EINVAL;
643 		mem->bus.base = adev->gmc.aper_base;
644 		mem->bus.is_iomem = true;
645 		break;
646 	default:
647 		return -EINVAL;
648 	}
649 	return 0;
650 }
651 
652 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
653 {
654 }
655 
656 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
657 					   unsigned long page_offset)
658 {
659 	struct drm_mm_node *mm;
660 	unsigned long offset = (page_offset << PAGE_SHIFT);
661 
662 	mm = amdgpu_find_mm_node(&bo->mem, &offset);
663 	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
664 		(offset >> PAGE_SHIFT);
665 }
666 
667 /*
668  * TTM backend functions.
669  */
670 struct amdgpu_ttm_gup_task_list {
671 	struct list_head	list;
672 	struct task_struct	*task;
673 };
674 
675 struct amdgpu_ttm_tt {
676 	struct ttm_dma_tt	ttm;
677 	struct amdgpu_device	*adev;
678 	u64			offset;
679 	uint64_t		userptr;
680 	struct mm_struct	*usermm;
681 	uint32_t		userflags;
682 	spinlock_t              guptasklock;
683 	struct list_head        guptasks;
684 	atomic_t		mmu_invalidations;
685 	uint32_t		last_set_pages;
686 };
687 
688 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
689 {
690 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
691 	unsigned int flags = 0;
692 	unsigned pinned = 0;
693 	int r;
694 
695 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
696 		flags |= FOLL_WRITE;
697 
698 	down_read(&current->mm->mmap_sem);
699 
700 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
701 		/* check that we only use anonymous memory
702 		   to prevent problems with writeback */
703 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
704 		struct vm_area_struct *vma;
705 
706 		vma = find_vma(gtt->usermm, gtt->userptr);
707 		if (!vma || vma->vm_file || vma->vm_end < end) {
708 			up_read(&current->mm->mmap_sem);
709 			return -EPERM;
710 		}
711 	}
712 
713 	do {
714 		unsigned num_pages = ttm->num_pages - pinned;
715 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
716 		struct page **p = pages + pinned;
717 		struct amdgpu_ttm_gup_task_list guptask;
718 
719 		guptask.task = current;
720 		spin_lock(&gtt->guptasklock);
721 		list_add(&guptask.list, &gtt->guptasks);
722 		spin_unlock(&gtt->guptasklock);
723 
724 		r = get_user_pages(userptr, num_pages, flags, p, NULL);
725 
726 		spin_lock(&gtt->guptasklock);
727 		list_del(&guptask.list);
728 		spin_unlock(&gtt->guptasklock);
729 
730 		if (r < 0)
731 			goto release_pages;
732 
733 		pinned += r;
734 
735 	} while (pinned < ttm->num_pages);
736 
737 	up_read(&current->mm->mmap_sem);
738 	return 0;
739 
740 release_pages:
741 	release_pages(pages, pinned);
742 	up_read(&current->mm->mmap_sem);
743 	return r;
744 }
745 
746 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
747 {
748 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
749 	unsigned i;
750 
751 	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
752 	for (i = 0; i < ttm->num_pages; ++i) {
753 		if (ttm->pages[i])
754 			put_page(ttm->pages[i]);
755 
756 		ttm->pages[i] = pages ? pages[i] : NULL;
757 	}
758 }
759 
760 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
761 {
762 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
763 	unsigned i;
764 
765 	for (i = 0; i < ttm->num_pages; ++i) {
766 		struct page *page = ttm->pages[i];
767 
768 		if (!page)
769 			continue;
770 
771 		if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
772 			set_page_dirty(page);
773 
774 		mark_page_accessed(page);
775 	}
776 }
777 
778 /* prepare the sg table with the user pages */
779 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
780 {
781 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
782 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
783 	unsigned nents;
784 	int r;
785 
786 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
787 	enum dma_data_direction direction = write ?
788 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
789 
790 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
791 				      ttm->num_pages << PAGE_SHIFT,
792 				      GFP_KERNEL);
793 	if (r)
794 		goto release_sg;
795 
796 	r = -ENOMEM;
797 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
798 	if (nents != ttm->sg->nents)
799 		goto release_sg;
800 
801 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
802 					 gtt->ttm.dma_address, ttm->num_pages);
803 
804 	return 0;
805 
806 release_sg:
807 	kfree(ttm->sg);
808 	return r;
809 }
810 
811 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
812 {
813 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
814 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
815 
816 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
817 	enum dma_data_direction direction = write ?
818 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
819 
820 	/* double check that we don't free the table twice */
821 	if (!ttm->sg->sgl)
822 		return;
823 
824 	/* free the sg table and pages again */
825 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
826 
827 	amdgpu_ttm_tt_mark_user_pages(ttm);
828 
829 	sg_free_table(ttm->sg);
830 }
831 
832 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
833 				   struct ttm_mem_reg *bo_mem)
834 {
835 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
836 	uint64_t flags;
837 	int r = 0;
838 
839 	if (gtt->userptr) {
840 		r = amdgpu_ttm_tt_pin_userptr(ttm);
841 		if (r) {
842 			DRM_ERROR("failed to pin userptr\n");
843 			return r;
844 		}
845 	}
846 	if (!ttm->num_pages) {
847 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
848 		     ttm->num_pages, bo_mem, ttm);
849 	}
850 
851 	if (bo_mem->mem_type == AMDGPU_PL_GDS ||
852 	    bo_mem->mem_type == AMDGPU_PL_GWS ||
853 	    bo_mem->mem_type == AMDGPU_PL_OA)
854 		return -EINVAL;
855 
856 	if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
857 		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
858 		return 0;
859 	}
860 
861 	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
862 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
863 	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
864 		ttm->pages, gtt->ttm.dma_address, flags);
865 
866 	if (r)
867 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
868 			  ttm->num_pages, gtt->offset);
869 	return r;
870 }
871 
872 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
873 {
874 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
875 	struct ttm_operation_ctx ctx = { false, false };
876 	struct amdgpu_ttm_tt *gtt = (void*)bo->ttm;
877 	struct ttm_mem_reg tmp;
878 	struct ttm_placement placement;
879 	struct ttm_place placements;
880 	uint64_t flags;
881 	int r;
882 
883 	if (bo->mem.mem_type != TTM_PL_TT ||
884 	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
885 		return 0;
886 
887 	tmp = bo->mem;
888 	tmp.mm_node = NULL;
889 	placement.num_placement = 1;
890 	placement.placement = &placements;
891 	placement.num_busy_placement = 1;
892 	placement.busy_placement = &placements;
893 	placements.fpfn = 0;
894 	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
895 	placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
896 		TTM_PL_FLAG_TT;
897 
898 	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
899 	if (unlikely(r))
900 		return r;
901 
902 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
903 	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
904 	r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
905 			     bo->ttm->pages, gtt->ttm.dma_address, flags);
906 	if (unlikely(r)) {
907 		ttm_bo_mem_put(bo, &tmp);
908 		return r;
909 	}
910 
911 	ttm_bo_mem_put(bo, &bo->mem);
912 	bo->mem = tmp;
913 	bo->offset = (bo->mem.start << PAGE_SHIFT) +
914 		bo->bdev->man[bo->mem.mem_type].gpu_offset;
915 
916 	return 0;
917 }
918 
919 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
920 {
921 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
922 	struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
923 	uint64_t flags;
924 	int r;
925 
926 	if (!gtt)
927 		return 0;
928 
929 	flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
930 	r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
931 			     gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
932 	if (r)
933 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
934 			  gtt->ttm.ttm.num_pages, gtt->offset);
935 	return r;
936 }
937 
938 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
939 {
940 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
941 	int r;
942 
943 	if (gtt->userptr)
944 		amdgpu_ttm_tt_unpin_userptr(ttm);
945 
946 	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
947 		return 0;
948 
949 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
950 	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
951 	if (r)
952 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
953 			  gtt->ttm.ttm.num_pages, gtt->offset);
954 	return r;
955 }
956 
957 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
958 {
959 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
960 
961 	ttm_dma_tt_fini(&gtt->ttm);
962 	kfree(gtt);
963 }
964 
965 static struct ttm_backend_func amdgpu_backend_func = {
966 	.bind = &amdgpu_ttm_backend_bind,
967 	.unbind = &amdgpu_ttm_backend_unbind,
968 	.destroy = &amdgpu_ttm_backend_destroy,
969 };
970 
971 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
972 				    unsigned long size, uint32_t page_flags,
973 				    struct page *dummy_read_page)
974 {
975 	struct amdgpu_device *adev;
976 	struct amdgpu_ttm_tt *gtt;
977 
978 	adev = amdgpu_ttm_adev(bdev);
979 
980 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
981 	if (gtt == NULL) {
982 		return NULL;
983 	}
984 	gtt->ttm.ttm.func = &amdgpu_backend_func;
985 	gtt->adev = adev;
986 	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
987 		kfree(gtt);
988 		return NULL;
989 	}
990 	return &gtt->ttm.ttm;
991 }
992 
993 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
994 			struct ttm_operation_ctx *ctx)
995 {
996 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
997 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
998 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
999 
1000 	if (ttm->state != tt_unpopulated)
1001 		return 0;
1002 
1003 	if (gtt && gtt->userptr) {
1004 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1005 		if (!ttm->sg)
1006 			return -ENOMEM;
1007 
1008 		ttm->page_flags |= TTM_PAGE_FLAG_SG;
1009 		ttm->state = tt_unbound;
1010 		return 0;
1011 	}
1012 
1013 	if (slave && ttm->sg) {
1014 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1015 						 gtt->ttm.dma_address, ttm->num_pages);
1016 		ttm->state = tt_unbound;
1017 		return 0;
1018 	}
1019 
1020 #ifdef CONFIG_SWIOTLB
1021 	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1022 		return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
1023 	}
1024 #endif
1025 
1026 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1027 }
1028 
1029 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1030 {
1031 	struct amdgpu_device *adev;
1032 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1033 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1034 
1035 	if (gtt && gtt->userptr) {
1036 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1037 		kfree(ttm->sg);
1038 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1039 		return;
1040 	}
1041 
1042 	if (slave)
1043 		return;
1044 
1045 	adev = amdgpu_ttm_adev(ttm->bdev);
1046 
1047 #ifdef CONFIG_SWIOTLB
1048 	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1049 		ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1050 		return;
1051 	}
1052 #endif
1053 
1054 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1055 }
1056 
1057 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1058 			      uint32_t flags)
1059 {
1060 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1061 
1062 	if (gtt == NULL)
1063 		return -EINVAL;
1064 
1065 	gtt->userptr = addr;
1066 	gtt->usermm = current->mm;
1067 	gtt->userflags = flags;
1068 	spin_lock_init(&gtt->guptasklock);
1069 	INIT_LIST_HEAD(&gtt->guptasks);
1070 	atomic_set(&gtt->mmu_invalidations, 0);
1071 	gtt->last_set_pages = 0;
1072 
1073 	return 0;
1074 }
1075 
1076 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1077 {
1078 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1079 
1080 	if (gtt == NULL)
1081 		return NULL;
1082 
1083 	return gtt->usermm;
1084 }
1085 
1086 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1087 				  unsigned long end)
1088 {
1089 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1090 	struct amdgpu_ttm_gup_task_list *entry;
1091 	unsigned long size;
1092 
1093 	if (gtt == NULL || !gtt->userptr)
1094 		return false;
1095 
1096 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1097 	if (gtt->userptr > end || gtt->userptr + size <= start)
1098 		return false;
1099 
1100 	spin_lock(&gtt->guptasklock);
1101 	list_for_each_entry(entry, &gtt->guptasks, list) {
1102 		if (entry->task == current) {
1103 			spin_unlock(&gtt->guptasklock);
1104 			return false;
1105 		}
1106 	}
1107 	spin_unlock(&gtt->guptasklock);
1108 
1109 	atomic_inc(&gtt->mmu_invalidations);
1110 
1111 	return true;
1112 }
1113 
1114 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1115 				       int *last_invalidated)
1116 {
1117 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1118 	int prev_invalidated = *last_invalidated;
1119 
1120 	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
1121 	return prev_invalidated != *last_invalidated;
1122 }
1123 
1124 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1125 {
1126 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1127 
1128 	if (gtt == NULL || !gtt->userptr)
1129 		return false;
1130 
1131 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1132 }
1133 
1134 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1135 {
1136 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1137 
1138 	if (gtt == NULL)
1139 		return false;
1140 
1141 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1142 }
1143 
1144 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1145 				 struct ttm_mem_reg *mem)
1146 {
1147 	uint64_t flags = 0;
1148 
1149 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1150 		flags |= AMDGPU_PTE_VALID;
1151 
1152 	if (mem && mem->mem_type == TTM_PL_TT) {
1153 		flags |= AMDGPU_PTE_SYSTEM;
1154 
1155 		if (ttm->caching_state == tt_cached)
1156 			flags |= AMDGPU_PTE_SNOOPED;
1157 	}
1158 
1159 	flags |= adev->gart.gart_pte_flags;
1160 	flags |= AMDGPU_PTE_READABLE;
1161 
1162 	if (!amdgpu_ttm_tt_is_readonly(ttm))
1163 		flags |= AMDGPU_PTE_WRITEABLE;
1164 
1165 	return flags;
1166 }
1167 
1168 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1169 					    const struct ttm_place *place)
1170 {
1171 	unsigned long num_pages = bo->mem.num_pages;
1172 	struct drm_mm_node *node = bo->mem.mm_node;
1173 
1174 	switch (bo->mem.mem_type) {
1175 	case TTM_PL_TT:
1176 		return true;
1177 
1178 	case TTM_PL_VRAM:
1179 		/* Check each drm MM node individually */
1180 		while (num_pages) {
1181 			if (place->fpfn < (node->start + node->size) &&
1182 			    !(place->lpfn && place->lpfn <= node->start))
1183 				return true;
1184 
1185 			num_pages -= node->size;
1186 			++node;
1187 		}
1188 		return false;
1189 
1190 	default:
1191 		break;
1192 	}
1193 
1194 	return ttm_bo_eviction_valuable(bo, place);
1195 }
1196 
1197 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1198 				    unsigned long offset,
1199 				    void *buf, int len, int write)
1200 {
1201 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1202 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1203 	struct drm_mm_node *nodes;
1204 	uint32_t value = 0;
1205 	int ret = 0;
1206 	uint64_t pos;
1207 	unsigned long flags;
1208 
1209 	if (bo->mem.mem_type != TTM_PL_VRAM)
1210 		return -EIO;
1211 
1212 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1213 	pos = (nodes->start << PAGE_SHIFT) + offset;
1214 
1215 	while (len && pos < adev->gmc.mc_vram_size) {
1216 		uint64_t aligned_pos = pos & ~(uint64_t)3;
1217 		uint32_t bytes = 4 - (pos & 3);
1218 		uint32_t shift = (pos & 3) * 8;
1219 		uint32_t mask = 0xffffffff << shift;
1220 
1221 		if (len < bytes) {
1222 			mask &= 0xffffffff >> (bytes - len) * 8;
1223 			bytes = len;
1224 		}
1225 
1226 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1227 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1228 		WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1229 		if (!write || mask != 0xffffffff)
1230 			value = RREG32_NO_KIQ(mmMM_DATA);
1231 		if (write) {
1232 			value &= ~mask;
1233 			value |= (*(uint32_t *)buf << shift) & mask;
1234 			WREG32_NO_KIQ(mmMM_DATA, value);
1235 		}
1236 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1237 		if (!write) {
1238 			value = (value & mask) >> shift;
1239 			memcpy(buf, &value, bytes);
1240 		}
1241 
1242 		ret += bytes;
1243 		buf = (uint8_t *)buf + bytes;
1244 		pos += bytes;
1245 		len -= bytes;
1246 		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
1247 			++nodes;
1248 			pos = (nodes->start << PAGE_SHIFT);
1249 		}
1250 	}
1251 
1252 	return ret;
1253 }
1254 
1255 static struct ttm_bo_driver amdgpu_bo_driver = {
1256 	.ttm_tt_create = &amdgpu_ttm_tt_create,
1257 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1258 	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1259 	.invalidate_caches = &amdgpu_invalidate_caches,
1260 	.init_mem_type = &amdgpu_init_mem_type,
1261 	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1262 	.evict_flags = &amdgpu_evict_flags,
1263 	.move = &amdgpu_bo_move,
1264 	.verify_access = &amdgpu_verify_access,
1265 	.move_notify = &amdgpu_bo_move_notify,
1266 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1267 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1268 	.io_mem_free = &amdgpu_ttm_io_mem_free,
1269 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1270 	.access_memory = &amdgpu_ttm_access_memory
1271 };
1272 
1273 /*
1274  * Firmware Reservation functions
1275  */
1276 /**
1277  * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1278  *
1279  * @adev: amdgpu_device pointer
1280  *
1281  * free fw reserved vram if it has been reserved.
1282  */
1283 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1284 {
1285 	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
1286 		NULL, &adev->fw_vram_usage.va);
1287 }
1288 
1289 /**
1290  * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1291  *
1292  * @adev: amdgpu_device pointer
1293  *
1294  * create bo vram reservation from fw.
1295  */
1296 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1297 {
1298 	struct ttm_operation_ctx ctx = { false, false };
1299 	int r = 0;
1300 	int i;
1301 	u64 vram_size = adev->gmc.visible_vram_size;
1302 	u64 offset = adev->fw_vram_usage.start_offset;
1303 	u64 size = adev->fw_vram_usage.size;
1304 	struct amdgpu_bo *bo;
1305 
1306 	adev->fw_vram_usage.va = NULL;
1307 	adev->fw_vram_usage.reserved_bo = NULL;
1308 
1309 	if (adev->fw_vram_usage.size > 0 &&
1310 		adev->fw_vram_usage.size <= vram_size) {
1311 
1312 		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
1313 			PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
1314 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1315 			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
1316 			&adev->fw_vram_usage.reserved_bo);
1317 		if (r)
1318 			goto error_create;
1319 
1320 		r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
1321 		if (r)
1322 			goto error_reserve;
1323 
1324 		/* remove the original mem node and create a new one at the
1325 		 * request position
1326 		 */
1327 		bo = adev->fw_vram_usage.reserved_bo;
1328 		offset = ALIGN(offset, PAGE_SIZE);
1329 		for (i = 0; i < bo->placement.num_placement; ++i) {
1330 			bo->placements[i].fpfn = offset >> PAGE_SHIFT;
1331 			bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
1332 		}
1333 
1334 		ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
1335 		r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
1336 				     &bo->tbo.mem, &ctx);
1337 		if (r)
1338 			goto error_pin;
1339 
1340 		r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
1341 			AMDGPU_GEM_DOMAIN_VRAM,
1342 			adev->fw_vram_usage.start_offset,
1343 			(adev->fw_vram_usage.start_offset +
1344 			adev->fw_vram_usage.size), NULL);
1345 		if (r)
1346 			goto error_pin;
1347 		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
1348 			&adev->fw_vram_usage.va);
1349 		if (r)
1350 			goto error_kmap;
1351 
1352 		amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1353 	}
1354 	return r;
1355 
1356 error_kmap:
1357 	amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
1358 error_pin:
1359 	amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
1360 error_reserve:
1361 	amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
1362 error_create:
1363 	adev->fw_vram_usage.va = NULL;
1364 	adev->fw_vram_usage.reserved_bo = NULL;
1365 	return r;
1366 }
1367 
1368 int amdgpu_ttm_init(struct amdgpu_device *adev)
1369 {
1370 	uint64_t gtt_size;
1371 	int r;
1372 	u64 vis_vram_limit;
1373 
1374 	r = amdgpu_ttm_global_init(adev);
1375 	if (r) {
1376 		return r;
1377 	}
1378 	/* No others user of address space so set it to 0 */
1379 	r = ttm_bo_device_init(&adev->mman.bdev,
1380 			       adev->mman.bo_global_ref.ref.object,
1381 			       &amdgpu_bo_driver,
1382 			       adev->ddev->anon_inode->i_mapping,
1383 			       DRM_FILE_PAGE_OFFSET,
1384 			       adev->need_dma32);
1385 	if (r) {
1386 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1387 		return r;
1388 	}
1389 	adev->mman.initialized = true;
1390 
1391 	/* We opt to avoid OOM on system pages allocations */
1392 	adev->mman.bdev.no_retry = true;
1393 
1394 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1395 				adev->gmc.real_vram_size >> PAGE_SHIFT);
1396 	if (r) {
1397 		DRM_ERROR("Failed initializing VRAM heap.\n");
1398 		return r;
1399 	}
1400 
1401 	/* Reduce size of CPU-visible VRAM if requested */
1402 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1403 	if (amdgpu_vis_vram_limit > 0 &&
1404 	    vis_vram_limit <= adev->gmc.visible_vram_size)
1405 		adev->gmc.visible_vram_size = vis_vram_limit;
1406 
1407 	/* Change the size here instead of the init above so only lpfn is affected */
1408 	amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size);
1409 
1410 	/*
1411 	 *The reserved vram for firmware must be pinned to the specified
1412 	 *place on the VRAM, so reserve it early.
1413 	 */
1414 	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1415 	if (r) {
1416 		return r;
1417 	}
1418 
1419 	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1420 				    AMDGPU_GEM_DOMAIN_VRAM,
1421 				    &adev->stolen_vga_memory,
1422 				    NULL, NULL);
1423 	if (r)
1424 		return r;
1425 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1426 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1427 
1428 	if (amdgpu_gtt_size == -1) {
1429 		struct sysinfo si;
1430 
1431 		si_meminfo(&si);
1432 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1433 			       adev->gmc.mc_vram_size),
1434 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
1435 	}
1436 	else
1437 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1438 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1439 	if (r) {
1440 		DRM_ERROR("Failed initializing GTT heap.\n");
1441 		return r;
1442 	}
1443 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1444 		 (unsigned)(gtt_size / (1024 * 1024)));
1445 
1446 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1447 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1448 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
1449 	adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
1450 	adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
1451 	adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
1452 	adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
1453 	adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
1454 	adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
1455 	/* GDS Memory */
1456 	if (adev->gds.mem.total_size) {
1457 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1458 				   adev->gds.mem.total_size >> PAGE_SHIFT);
1459 		if (r) {
1460 			DRM_ERROR("Failed initializing GDS heap.\n");
1461 			return r;
1462 		}
1463 	}
1464 
1465 	/* GWS */
1466 	if (adev->gds.gws.total_size) {
1467 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1468 				   adev->gds.gws.total_size >> PAGE_SHIFT);
1469 		if (r) {
1470 			DRM_ERROR("Failed initializing gws heap.\n");
1471 			return r;
1472 		}
1473 	}
1474 
1475 	/* OA */
1476 	if (adev->gds.oa.total_size) {
1477 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1478 				   adev->gds.oa.total_size >> PAGE_SHIFT);
1479 		if (r) {
1480 			DRM_ERROR("Failed initializing oa heap.\n");
1481 			return r;
1482 		}
1483 	}
1484 
1485 	r = amdgpu_ttm_debugfs_init(adev);
1486 	if (r) {
1487 		DRM_ERROR("Failed to init debugfs\n");
1488 		return r;
1489 	}
1490 	return 0;
1491 }
1492 
1493 void amdgpu_ttm_fini(struct amdgpu_device *adev)
1494 {
1495 	if (!adev->mman.initialized)
1496 		return;
1497 
1498 	amdgpu_ttm_debugfs_fini(adev);
1499 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1500 	amdgpu_ttm_fw_reserve_vram_fini(adev);
1501 
1502 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1503 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
1504 	if (adev->gds.mem.total_size)
1505 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
1506 	if (adev->gds.gws.total_size)
1507 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1508 	if (adev->gds.oa.total_size)
1509 		ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
1510 	ttm_bo_device_release(&adev->mman.bdev);
1511 	amdgpu_ttm_global_fini(adev);
1512 	adev->mman.initialized = false;
1513 	DRM_INFO("amdgpu: ttm finalized\n");
1514 }
1515 
1516 /* this should only be called at bootup or when userspace
1517  * isn't running */
1518 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
1519 {
1520 	struct ttm_mem_type_manager *man;
1521 
1522 	if (!adev->mman.initialized)
1523 		return;
1524 
1525 	man = &adev->mman.bdev.man[TTM_PL_VRAM];
1526 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1527 	man->size = size >> PAGE_SHIFT;
1528 }
1529 
1530 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
1531 {
1532 	struct drm_file *file_priv;
1533 	struct amdgpu_device *adev;
1534 
1535 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
1536 		return -EINVAL;
1537 
1538 	file_priv = filp->private_data;
1539 	adev = file_priv->minor->dev->dev_private;
1540 	if (adev == NULL)
1541 		return -EINVAL;
1542 
1543 	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
1544 }
1545 
1546 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1547 			     struct ttm_mem_reg *mem, unsigned num_pages,
1548 			     uint64_t offset, unsigned window,
1549 			     struct amdgpu_ring *ring,
1550 			     uint64_t *addr)
1551 {
1552 	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
1553 	struct amdgpu_device *adev = ring->adev;
1554 	struct ttm_tt *ttm = bo->ttm;
1555 	struct amdgpu_job *job;
1556 	unsigned num_dw, num_bytes;
1557 	dma_addr_t *dma_address;
1558 	struct dma_fence *fence;
1559 	uint64_t src_addr, dst_addr;
1560 	uint64_t flags;
1561 	int r;
1562 
1563 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1564 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1565 
1566 	*addr = adev->gmc.gart_start;
1567 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1568 		AMDGPU_GPU_PAGE_SIZE;
1569 
1570 	num_dw = adev->mman.buffer_funcs->copy_num_dw;
1571 	while (num_dw & 0x7)
1572 		num_dw++;
1573 
1574 	num_bytes = num_pages * 8;
1575 
1576 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
1577 	if (r)
1578 		return r;
1579 
1580 	src_addr = num_dw * 4;
1581 	src_addr += job->ibs[0].gpu_addr;
1582 
1583 	dst_addr = adev->gart.table_addr;
1584 	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
1585 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
1586 				dst_addr, num_bytes);
1587 
1588 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1589 	WARN_ON(job->ibs[0].length_dw > num_dw);
1590 
1591 	dma_address = &gtt->ttm.dma_address[offset >> PAGE_SHIFT];
1592 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem);
1593 	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
1594 			    &job->ibs[0].ptr[num_dw]);
1595 	if (r)
1596 		goto error_free;
1597 
1598 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1599 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
1600 	if (r)
1601 		goto error_free;
1602 
1603 	dma_fence_put(fence);
1604 
1605 	return r;
1606 
1607 error_free:
1608 	amdgpu_job_free(job);
1609 	return r;
1610 }
1611 
1612 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1613 		       uint64_t dst_offset, uint32_t byte_count,
1614 		       struct reservation_object *resv,
1615 		       struct dma_fence **fence, bool direct_submit,
1616 		       bool vm_needs_flush)
1617 {
1618 	struct amdgpu_device *adev = ring->adev;
1619 	struct amdgpu_job *job;
1620 
1621 	uint32_t max_bytes;
1622 	unsigned num_loops, num_dw;
1623 	unsigned i;
1624 	int r;
1625 
1626 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1627 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1628 	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
1629 
1630 	/* for IB padding */
1631 	while (num_dw & 0x7)
1632 		num_dw++;
1633 
1634 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1635 	if (r)
1636 		return r;
1637 
1638 	job->vm_needs_flush = vm_needs_flush;
1639 	if (resv) {
1640 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1641 				     AMDGPU_FENCE_OWNER_UNDEFINED,
1642 				     false);
1643 		if (r) {
1644 			DRM_ERROR("sync failed (%d).\n", r);
1645 			goto error_free;
1646 		}
1647 	}
1648 
1649 	for (i = 0; i < num_loops; i++) {
1650 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1651 
1652 		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1653 					dst_offset, cur_size_in_bytes);
1654 
1655 		src_offset += cur_size_in_bytes;
1656 		dst_offset += cur_size_in_bytes;
1657 		byte_count -= cur_size_in_bytes;
1658 	}
1659 
1660 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1661 	WARN_ON(job->ibs[0].length_dw > num_dw);
1662 	if (direct_submit) {
1663 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
1664 				       NULL, fence);
1665 		job->fence = dma_fence_get(*fence);
1666 		if (r)
1667 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
1668 		amdgpu_job_free(job);
1669 	} else {
1670 		r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1671 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1672 		if (r)
1673 			goto error_free;
1674 	}
1675 
1676 	return r;
1677 
1678 error_free:
1679 	amdgpu_job_free(job);
1680 	return r;
1681 }
1682 
1683 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1684 		       uint64_t src_data,
1685 		       struct reservation_object *resv,
1686 		       struct dma_fence **fence)
1687 {
1688 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1689 	uint32_t max_bytes = 8 *
1690 			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
1691 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1692 
1693 	struct drm_mm_node *mm_node;
1694 	unsigned long num_pages;
1695 	unsigned int num_loops, num_dw;
1696 
1697 	struct amdgpu_job *job;
1698 	int r;
1699 
1700 	if (!ring->ready) {
1701 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
1702 		return -EINVAL;
1703 	}
1704 
1705 	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
1706 		r = amdgpu_ttm_alloc_gart(&bo->tbo);
1707 		if (r)
1708 			return r;
1709 	}
1710 
1711 	num_pages = bo->tbo.num_pages;
1712 	mm_node = bo->tbo.mem.mm_node;
1713 	num_loops = 0;
1714 	while (num_pages) {
1715 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1716 
1717 		num_loops += DIV_ROUND_UP(byte_count, max_bytes);
1718 		num_pages -= mm_node->size;
1719 		++mm_node;
1720 	}
1721 
1722 	/* num of dwords for each SDMA_OP_PTEPDE cmd */
1723 	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1724 
1725 	/* for IB padding */
1726 	num_dw += 64;
1727 
1728 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1729 	if (r)
1730 		return r;
1731 
1732 	if (resv) {
1733 		r = amdgpu_sync_resv(adev, &job->sync, resv,
1734 				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
1735 		if (r) {
1736 			DRM_ERROR("sync failed (%d).\n", r);
1737 			goto error_free;
1738 		}
1739 	}
1740 
1741 	num_pages = bo->tbo.num_pages;
1742 	mm_node = bo->tbo.mem.mm_node;
1743 
1744 	while (num_pages) {
1745 		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1746 		uint64_t dst_addr;
1747 
1748 		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
1749 
1750 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
1751 		while (byte_count) {
1752 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1753 
1754 			amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
1755 					dst_addr, 0,
1756 					cur_size_in_bytes >> 3, 0,
1757 					src_data);
1758 
1759 			dst_addr += cur_size_in_bytes;
1760 			byte_count -= cur_size_in_bytes;
1761 		}
1762 
1763 		num_pages -= mm_node->size;
1764 		++mm_node;
1765 	}
1766 
1767 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1768 	WARN_ON(job->ibs[0].length_dw > num_dw);
1769 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1770 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1771 	if (r)
1772 		goto error_free;
1773 
1774 	return 0;
1775 
1776 error_free:
1777 	amdgpu_job_free(job);
1778 	return r;
1779 }
1780 
1781 #if defined(CONFIG_DEBUG_FS)
1782 
1783 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1784 {
1785 	struct drm_info_node *node = (struct drm_info_node *)m->private;
1786 	unsigned ttm_pl = *(int *)node->info_ent->data;
1787 	struct drm_device *dev = node->minor->dev;
1788 	struct amdgpu_device *adev = dev->dev_private;
1789 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
1790 	struct drm_printer p = drm_seq_file_printer(m);
1791 
1792 	man->func->debug(man, &p);
1793 	return 0;
1794 }
1795 
1796 static int ttm_pl_vram = TTM_PL_VRAM;
1797 static int ttm_pl_tt = TTM_PL_TT;
1798 
1799 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
1800 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
1801 	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
1802 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
1803 #ifdef CONFIG_SWIOTLB
1804 	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
1805 #endif
1806 };
1807 
1808 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1809 				    size_t size, loff_t *pos)
1810 {
1811 	struct amdgpu_device *adev = file_inode(f)->i_private;
1812 	ssize_t result = 0;
1813 	int r;
1814 
1815 	if (size & 0x3 || *pos & 0x3)
1816 		return -EINVAL;
1817 
1818 	if (*pos >= adev->gmc.mc_vram_size)
1819 		return -ENXIO;
1820 
1821 	while (size) {
1822 		unsigned long flags;
1823 		uint32_t value;
1824 
1825 		if (*pos >= adev->gmc.mc_vram_size)
1826 			return result;
1827 
1828 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1829 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1830 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1831 		value = RREG32_NO_KIQ(mmMM_DATA);
1832 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1833 
1834 		r = put_user(value, (uint32_t *)buf);
1835 		if (r)
1836 			return r;
1837 
1838 		result += 4;
1839 		buf += 4;
1840 		*pos += 4;
1841 		size -= 4;
1842 	}
1843 
1844 	return result;
1845 }
1846 
1847 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1848 				    size_t size, loff_t *pos)
1849 {
1850 	struct amdgpu_device *adev = file_inode(f)->i_private;
1851 	ssize_t result = 0;
1852 	int r;
1853 
1854 	if (size & 0x3 || *pos & 0x3)
1855 		return -EINVAL;
1856 
1857 	if (*pos >= adev->gmc.mc_vram_size)
1858 		return -ENXIO;
1859 
1860 	while (size) {
1861 		unsigned long flags;
1862 		uint32_t value;
1863 
1864 		if (*pos >= adev->gmc.mc_vram_size)
1865 			return result;
1866 
1867 		r = get_user(value, (uint32_t *)buf);
1868 		if (r)
1869 			return r;
1870 
1871 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1872 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1873 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1874 		WREG32_NO_KIQ(mmMM_DATA, value);
1875 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1876 
1877 		result += 4;
1878 		buf += 4;
1879 		*pos += 4;
1880 		size -= 4;
1881 	}
1882 
1883 	return result;
1884 }
1885 
1886 static const struct file_operations amdgpu_ttm_vram_fops = {
1887 	.owner = THIS_MODULE,
1888 	.read = amdgpu_ttm_vram_read,
1889 	.write = amdgpu_ttm_vram_write,
1890 	.llseek = default_llseek,
1891 };
1892 
1893 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1894 
1895 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1896 				   size_t size, loff_t *pos)
1897 {
1898 	struct amdgpu_device *adev = file_inode(f)->i_private;
1899 	ssize_t result = 0;
1900 	int r;
1901 
1902 	while (size) {
1903 		loff_t p = *pos / PAGE_SIZE;
1904 		unsigned off = *pos & ~PAGE_MASK;
1905 		size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
1906 		struct page *page;
1907 		void *ptr;
1908 
1909 		if (p >= adev->gart.num_cpu_pages)
1910 			return result;
1911 
1912 		page = adev->gart.pages[p];
1913 		if (page) {
1914 			ptr = kmap(page);
1915 			ptr += off;
1916 
1917 			r = copy_to_user(buf, ptr, cur_size);
1918 			kunmap(adev->gart.pages[p]);
1919 		} else
1920 			r = clear_user(buf, cur_size);
1921 
1922 		if (r)
1923 			return -EFAULT;
1924 
1925 		result += cur_size;
1926 		buf += cur_size;
1927 		*pos += cur_size;
1928 		size -= cur_size;
1929 	}
1930 
1931 	return result;
1932 }
1933 
1934 static const struct file_operations amdgpu_ttm_gtt_fops = {
1935 	.owner = THIS_MODULE,
1936 	.read = amdgpu_ttm_gtt_read,
1937 	.llseek = default_llseek
1938 };
1939 
1940 #endif
1941 
1942 static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
1943 				   size_t size, loff_t *pos)
1944 {
1945 	struct amdgpu_device *adev = file_inode(f)->i_private;
1946 	int r;
1947 	uint64_t phys;
1948 	struct iommu_domain *dom;
1949 
1950 	// always return 8 bytes
1951 	if (size != 8)
1952 		return -EINVAL;
1953 
1954 	// only accept page addresses
1955 	if (*pos & 0xFFF)
1956 		return -EINVAL;
1957 
1958 	dom = iommu_get_domain_for_dev(adev->dev);
1959 	if (dom)
1960 		phys = iommu_iova_to_phys(dom, *pos);
1961 	else
1962 		phys = *pos;
1963 
1964 	r = copy_to_user(buf, &phys, 8);
1965 	if (r)
1966 		return -EFAULT;
1967 
1968 	return 8;
1969 }
1970 
1971 static const struct file_operations amdgpu_ttm_iova_fops = {
1972 	.owner = THIS_MODULE,
1973 	.read = amdgpu_iova_to_phys_read,
1974 	.llseek = default_llseek
1975 };
1976 
1977 static const struct {
1978 	char *name;
1979 	const struct file_operations *fops;
1980 	int domain;
1981 } ttm_debugfs_entries[] = {
1982 	{ "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
1983 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1984 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
1985 #endif
1986 	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
1987 };
1988 
1989 #endif
1990 
1991 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1992 {
1993 #if defined(CONFIG_DEBUG_FS)
1994 	unsigned count;
1995 
1996 	struct drm_minor *minor = adev->ddev->primary;
1997 	struct dentry *ent, *root = minor->debugfs_root;
1998 
1999 	for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
2000 		ent = debugfs_create_file(
2001 				ttm_debugfs_entries[count].name,
2002 				S_IFREG | S_IRUGO, root,
2003 				adev,
2004 				ttm_debugfs_entries[count].fops);
2005 		if (IS_ERR(ent))
2006 			return PTR_ERR(ent);
2007 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
2008 			i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
2009 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
2010 			i_size_write(ent->d_inode, adev->gmc.gart_size);
2011 		adev->mman.debugfs_entries[count] = ent;
2012 	}
2013 
2014 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
2015 
2016 #ifdef CONFIG_SWIOTLB
2017 	if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
2018 		--count;
2019 #endif
2020 
2021 	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
2022 #else
2023 	return 0;
2024 #endif
2025 }
2026 
2027 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
2028 {
2029 #if defined(CONFIG_DEBUG_FS)
2030 	unsigned i;
2031 
2032 	for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
2033 		debugfs_remove(adev->mman.debugfs_entries[i]);
2034 #endif
2035 }
2036