1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30  *    Dave Airlie
31  */
32 
33 #include <linux/dma-mapping.h>
34 #include <linux/iommu.h>
35 #include <linux/hmm.h>
36 #include <linux/pagemap.h>
37 #include <linux/sched/task.h>
38 #include <linux/sched/mm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swap.h>
42 #include <linux/swiotlb.h>
43 #include <linux/dma-buf.h>
44 #include <linux/sizes.h>
45 
46 #include <drm/ttm/ttm_bo_api.h>
47 #include <drm/ttm/ttm_bo_driver.h>
48 #include <drm/ttm/ttm_placement.h>
49 #include <drm/ttm/ttm_module.h>
50 #include <drm/ttm/ttm_page_alloc.h>
51 
52 #include <drm/drm_debugfs.h>
53 #include <drm/amdgpu_drm.h>
54 
55 #include "amdgpu.h"
56 #include "amdgpu_object.h"
57 #include "amdgpu_trace.h"
58 #include "amdgpu_amdkfd.h"
59 #include "amdgpu_sdma.h"
60 #include "amdgpu_ras.h"
61 #include "amdgpu_atomfirmware.h"
62 #include "bif/bif_4_1_d.h"
63 
64 #define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128
65 
66 
67 /**
68  * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
69  * memory request.
70  *
71  * @bdev: The TTM BO device object (contains a reference to amdgpu_device)
72  * @type: The type of memory requested
73  * @man: The memory type manager for each domain
74  *
75  * This is called by ttm_bo_init_mm() when a buffer object is being
76  * initialized.
77  */
78 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
79 				struct ttm_mem_type_manager *man)
80 {
81 	struct amdgpu_device *adev;
82 
83 	adev = amdgpu_ttm_adev(bdev);
84 
85 	switch (type) {
86 	case TTM_PL_SYSTEM:
87 		/* System memory */
88 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
89 		man->available_caching = TTM_PL_MASK_CACHING;
90 		man->default_caching = TTM_PL_FLAG_CACHED;
91 		break;
92 	case TTM_PL_TT:
93 		/* GTT memory  */
94 		man->func = &amdgpu_gtt_mgr_func;
95 		man->available_caching = TTM_PL_MASK_CACHING;
96 		man->default_caching = TTM_PL_FLAG_CACHED;
97 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
98 		break;
99 	case TTM_PL_VRAM:
100 		/* "On-card" video ram */
101 		man->func = &amdgpu_vram_mgr_func;
102 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
103 			     TTM_MEMTYPE_FLAG_MAPPABLE;
104 		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
105 		man->default_caching = TTM_PL_FLAG_WC;
106 		break;
107 	case AMDGPU_PL_GDS:
108 	case AMDGPU_PL_GWS:
109 	case AMDGPU_PL_OA:
110 		/* On-chip GDS memory*/
111 		man->func = &ttm_bo_manager_func;
112 		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
113 		man->available_caching = TTM_PL_FLAG_UNCACHED;
114 		man->default_caching = TTM_PL_FLAG_UNCACHED;
115 		break;
116 	default:
117 		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
118 		return -EINVAL;
119 	}
120 	return 0;
121 }
122 
123 /**
124  * amdgpu_evict_flags - Compute placement flags
125  *
126  * @bo: The buffer object to evict
127  * @placement: Possible destination(s) for evicted BO
128  *
129  * Fill in placement data when ttm_bo_evict() is called
130  */
131 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
132 				struct ttm_placement *placement)
133 {
134 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
135 	struct amdgpu_bo *abo;
136 	static const struct ttm_place placements = {
137 		.fpfn = 0,
138 		.lpfn = 0,
139 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
140 	};
141 
142 	/* Don't handle scatter gather BOs */
143 	if (bo->type == ttm_bo_type_sg) {
144 		placement->num_placement = 0;
145 		placement->num_busy_placement = 0;
146 		return;
147 	}
148 
149 	/* Object isn't an AMDGPU object so ignore */
150 	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
151 		placement->placement = &placements;
152 		placement->busy_placement = &placements;
153 		placement->num_placement = 1;
154 		placement->num_busy_placement = 1;
155 		return;
156 	}
157 
158 	abo = ttm_to_amdgpu_bo(bo);
159 	switch (bo->mem.mem_type) {
160 	case AMDGPU_PL_GDS:
161 	case AMDGPU_PL_GWS:
162 	case AMDGPU_PL_OA:
163 		placement->num_placement = 0;
164 		placement->num_busy_placement = 0;
165 		return;
166 
167 	case TTM_PL_VRAM:
168 		if (!adev->mman.buffer_funcs_enabled) {
169 			/* Move to system memory */
170 			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
171 		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
172 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
173 			   amdgpu_bo_in_cpu_visible_vram(abo)) {
174 
175 			/* Try evicting to the CPU inaccessible part of VRAM
176 			 * first, but only set GTT as busy placement, so this
177 			 * BO will be evicted to GTT rather than causing other
178 			 * BOs to be evicted from VRAM
179 			 */
180 			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
181 							 AMDGPU_GEM_DOMAIN_GTT);
182 			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
183 			abo->placements[0].lpfn = 0;
184 			abo->placement.busy_placement = &abo->placements[1];
185 			abo->placement.num_busy_placement = 1;
186 		} else {
187 			/* Move to GTT memory */
188 			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
189 		}
190 		break;
191 	case TTM_PL_TT:
192 	default:
193 		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
194 		break;
195 	}
196 	*placement = abo->placement;
197 }
198 
199 /**
200  * amdgpu_verify_access - Verify access for a mmap call
201  *
202  * @bo:	The buffer object to map
203  * @filp: The file pointer from the process performing the mmap
204  *
205  * This is called by ttm_bo_mmap() to verify whether a process
206  * has the right to mmap a BO to their process space.
207  */
208 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
209 {
210 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
211 
212 	/*
213 	 * Don't verify access for KFD BOs. They don't have a GEM
214 	 * object associated with them.
215 	 */
216 	if (abo->kfd_bo)
217 		return 0;
218 
219 	if (amdgpu_ttm_tt_get_usermm(bo->ttm))
220 		return -EPERM;
221 	return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
222 					  filp->private_data);
223 }
224 
225 /**
226  * amdgpu_move_null - Register memory for a buffer object
227  *
228  * @bo: The bo to assign the memory to
229  * @new_mem: The memory to be assigned.
230  *
231  * Assign the memory from new_mem to the memory of the buffer object bo.
232  */
233 static void amdgpu_move_null(struct ttm_buffer_object *bo,
234 			     struct ttm_mem_reg *new_mem)
235 {
236 	struct ttm_mem_reg *old_mem = &bo->mem;
237 
238 	BUG_ON(old_mem->mm_node != NULL);
239 	*old_mem = *new_mem;
240 	new_mem->mm_node = NULL;
241 }
242 
243 /**
244  * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer.
245  *
246  * @bo: The bo to assign the memory to.
247  * @mm_node: Memory manager node for drm allocator.
248  * @mem: The region where the bo resides.
249  *
250  */
251 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
252 				    struct drm_mm_node *mm_node,
253 				    struct ttm_mem_reg *mem)
254 {
255 	uint64_t addr = 0;
256 
257 	if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
258 		addr = mm_node->start << PAGE_SHIFT;
259 		addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev),
260 						mem->mem_type);
261 	}
262 	return addr;
263 }
264 
265 /**
266  * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to
267  * @offset. It also modifies the offset to be within the drm_mm_node returned
268  *
269  * @mem: The region where the bo resides.
270  * @offset: The offset that drm_mm_node is used for finding.
271  *
272  */
273 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
274 					       uint64_t *offset)
275 {
276 	struct drm_mm_node *mm_node = mem->mm_node;
277 
278 	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
279 		*offset -= (mm_node->size << PAGE_SHIFT);
280 		++mm_node;
281 	}
282 	return mm_node;
283 }
284 
285 /**
286  * amdgpu_ttm_map_buffer - Map memory into the GART windows
287  * @bo: buffer object to map
288  * @mem: memory object to map
289  * @mm_node: drm_mm node object to map
290  * @num_pages: number of pages to map
291  * @offset: offset into @mm_node where to start
292  * @window: which GART window to use
293  * @ring: DMA ring to use for the copy
294  * @tmz: if we should setup a TMZ enabled mapping
295  * @addr: resulting address inside the MC address space
296  *
297  * Setup one of the GART windows to access a specific piece of memory or return
298  * the physical address for local memory.
299  */
300 static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
301 				 struct ttm_mem_reg *mem,
302 				 struct drm_mm_node *mm_node,
303 				 unsigned num_pages, uint64_t offset,
304 				 unsigned window, struct amdgpu_ring *ring,
305 				 bool tmz, uint64_t *addr)
306 {
307 	struct amdgpu_device *adev = ring->adev;
308 	struct amdgpu_job *job;
309 	unsigned num_dw, num_bytes;
310 	struct dma_fence *fence;
311 	uint64_t src_addr, dst_addr;
312 	void *cpu_addr;
313 	uint64_t flags;
314 	unsigned int i;
315 	int r;
316 
317 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
318 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
319 
320 	/* Map only what can't be accessed directly */
321 	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
322 		*addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset;
323 		return 0;
324 	}
325 
326 	*addr = adev->gmc.gart_start;
327 	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
328 		AMDGPU_GPU_PAGE_SIZE;
329 	*addr += offset & ~PAGE_MASK;
330 
331 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
332 	num_bytes = num_pages * 8;
333 
334 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
335 				     AMDGPU_IB_POOL_DELAYED, &job);
336 	if (r)
337 		return r;
338 
339 	src_addr = num_dw * 4;
340 	src_addr += job->ibs[0].gpu_addr;
341 
342 	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
343 	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
344 	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
345 				dst_addr, num_bytes, false);
346 
347 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
348 	WARN_ON(job->ibs[0].length_dw > num_dw);
349 
350 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
351 	if (tmz)
352 		flags |= AMDGPU_PTE_TMZ;
353 
354 	cpu_addr = &job->ibs[0].ptr[num_dw];
355 
356 	if (mem->mem_type == TTM_PL_TT) {
357 		struct ttm_dma_tt *dma;
358 		dma_addr_t *dma_address;
359 
360 		dma = container_of(bo->ttm, struct ttm_dma_tt, ttm);
361 		dma_address = &dma->dma_address[offset >> PAGE_SHIFT];
362 		r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,
363 				    cpu_addr);
364 		if (r)
365 			goto error_free;
366 	} else {
367 		dma_addr_t dma_address;
368 
369 		dma_address = (mm_node->start << PAGE_SHIFT) + offset;
370 		dma_address += adev->vm_manager.vram_base_offset;
371 
372 		for (i = 0; i < num_pages; ++i) {
373 			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
374 					    &dma_address, flags, cpu_addr);
375 			if (r)
376 				goto error_free;
377 
378 			dma_address += PAGE_SIZE;
379 		}
380 	}
381 
382 	r = amdgpu_job_submit(job, &adev->mman.entity,
383 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
384 	if (r)
385 		goto error_free;
386 
387 	dma_fence_put(fence);
388 
389 	return r;
390 
391 error_free:
392 	amdgpu_job_free(job);
393 	return r;
394 }
395 
396 /**
397  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
398  * @adev: amdgpu device
399  * @src: buffer/address where to read from
400  * @dst: buffer/address where to write to
401  * @size: number of bytes to copy
402  * @tmz: if a secure copy should be used
403  * @resv: resv object to sync to
404  * @f: Returns the last fence if multiple jobs are submitted.
405  *
406  * The function copies @size bytes from {src->mem + src->offset} to
407  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
408  * move and different for a BO to BO copy.
409  *
410  */
411 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
412 			       const struct amdgpu_copy_mem *src,
413 			       const struct amdgpu_copy_mem *dst,
414 			       uint64_t size, bool tmz,
415 			       struct dma_resv *resv,
416 			       struct dma_fence **f)
417 {
418 	const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
419 					AMDGPU_GPU_PAGE_SIZE);
420 
421 	uint64_t src_node_size, dst_node_size, src_offset, dst_offset;
422 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
423 	struct drm_mm_node *src_mm, *dst_mm;
424 	struct dma_fence *fence = NULL;
425 	int r = 0;
426 
427 	if (!adev->mman.buffer_funcs_enabled) {
428 		DRM_ERROR("Trying to move memory with ring turned off.\n");
429 		return -EINVAL;
430 	}
431 
432 	src_offset = src->offset;
433 	src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
434 	src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
435 
436 	dst_offset = dst->offset;
437 	dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
438 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
439 
440 	mutex_lock(&adev->mman.gtt_window_lock);
441 
442 	while (size) {
443 		uint32_t src_page_offset = src_offset & ~PAGE_MASK;
444 		uint32_t dst_page_offset = dst_offset & ~PAGE_MASK;
445 		struct dma_fence *next;
446 		uint32_t cur_size;
447 		uint64_t from, to;
448 
449 		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
450 		 * begins at an offset, then adjust the size accordingly
451 		 */
452 		cur_size = max(src_page_offset, dst_page_offset);
453 		cur_size = min(min3(src_node_size, dst_node_size, size),
454 			       (uint64_t)(GTT_MAX_BYTES - cur_size));
455 
456 		/* Map src to window 0 and dst to window 1. */
457 		r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm,
458 					  PFN_UP(cur_size + src_page_offset),
459 					  src_offset, 0, ring, tmz, &from);
460 		if (r)
461 			goto error;
462 
463 		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm,
464 					  PFN_UP(cur_size + dst_page_offset),
465 					  dst_offset, 1, ring, tmz, &to);
466 		if (r)
467 			goto error;
468 
469 		r = amdgpu_copy_buffer(ring, from, to, cur_size,
470 				       resv, &next, false, true, tmz);
471 		if (r)
472 			goto error;
473 
474 		dma_fence_put(fence);
475 		fence = next;
476 
477 		size -= cur_size;
478 		if (!size)
479 			break;
480 
481 		src_node_size -= cur_size;
482 		if (!src_node_size) {
483 			++src_mm;
484 			src_node_size = src_mm->size << PAGE_SHIFT;
485 			src_offset = 0;
486 		} else {
487 			src_offset += cur_size;
488 		}
489 
490 		dst_node_size -= cur_size;
491 		if (!dst_node_size) {
492 			++dst_mm;
493 			dst_node_size = dst_mm->size << PAGE_SHIFT;
494 			dst_offset = 0;
495 		} else {
496 			dst_offset += cur_size;
497 		}
498 	}
499 error:
500 	mutex_unlock(&adev->mman.gtt_window_lock);
501 	if (f)
502 		*f = dma_fence_get(fence);
503 	dma_fence_put(fence);
504 	return r;
505 }
506 
507 /**
508  * amdgpu_move_blit - Copy an entire buffer to another buffer
509  *
510  * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
511  * help move buffers to and from VRAM.
512  */
513 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
514 			    bool evict, bool no_wait_gpu,
515 			    struct ttm_mem_reg *new_mem,
516 			    struct ttm_mem_reg *old_mem)
517 {
518 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
519 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
520 	struct amdgpu_copy_mem src, dst;
521 	struct dma_fence *fence = NULL;
522 	int r;
523 
524 	src.bo = bo;
525 	dst.bo = bo;
526 	src.mem = old_mem;
527 	dst.mem = new_mem;
528 	src.offset = 0;
529 	dst.offset = 0;
530 
531 	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
532 				       new_mem->num_pages << PAGE_SHIFT,
533 				       amdgpu_bo_encrypted(abo),
534 				       bo->base.resv, &fence);
535 	if (r)
536 		goto error;
537 
538 	/* clear the space being freed */
539 	if (old_mem->mem_type == TTM_PL_VRAM &&
540 	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
541 		struct dma_fence *wipe_fence = NULL;
542 
543 		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
544 				       NULL, &wipe_fence);
545 		if (r) {
546 			goto error;
547 		} else if (wipe_fence) {
548 			dma_fence_put(fence);
549 			fence = wipe_fence;
550 		}
551 	}
552 
553 	/* Always block for VM page tables before committing the new location */
554 	if (bo->type == ttm_bo_type_kernel)
555 		r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
556 	else
557 		r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
558 	dma_fence_put(fence);
559 	return r;
560 
561 error:
562 	if (fence)
563 		dma_fence_wait(fence, false);
564 	dma_fence_put(fence);
565 	return r;
566 }
567 
568 /**
569  * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
570  *
571  * Called by amdgpu_bo_move().
572  */
573 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
574 				struct ttm_operation_ctx *ctx,
575 				struct ttm_mem_reg *new_mem)
576 {
577 	struct ttm_mem_reg *old_mem = &bo->mem;
578 	struct ttm_mem_reg tmp_mem;
579 	struct ttm_place placements;
580 	struct ttm_placement placement;
581 	int r;
582 
583 	/* create space/pages for new_mem in GTT space */
584 	tmp_mem = *new_mem;
585 	tmp_mem.mm_node = NULL;
586 	placement.num_placement = 1;
587 	placement.placement = &placements;
588 	placement.num_busy_placement = 1;
589 	placement.busy_placement = &placements;
590 	placements.fpfn = 0;
591 	placements.lpfn = 0;
592 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
593 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
594 	if (unlikely(r)) {
595 		pr_err("Failed to find GTT space for blit from VRAM\n");
596 		return r;
597 	}
598 
599 	/* set caching flags */
600 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
601 	if (unlikely(r)) {
602 		goto out_cleanup;
603 	}
604 
605 	/* Bind the memory to the GTT space */
606 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
607 	if (unlikely(r)) {
608 		goto out_cleanup;
609 	}
610 
611 	/* blit VRAM to GTT */
612 	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);
613 	if (unlikely(r)) {
614 		goto out_cleanup;
615 	}
616 
617 	/* move BO (in tmp_mem) to new_mem */
618 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
619 out_cleanup:
620 	ttm_bo_mem_put(bo, &tmp_mem);
621 	return r;
622 }
623 
624 /**
625  * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
626  *
627  * Called by amdgpu_bo_move().
628  */
629 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
630 				struct ttm_operation_ctx *ctx,
631 				struct ttm_mem_reg *new_mem)
632 {
633 	struct ttm_mem_reg *old_mem = &bo->mem;
634 	struct ttm_mem_reg tmp_mem;
635 	struct ttm_placement placement;
636 	struct ttm_place placements;
637 	int r;
638 
639 	/* make space in GTT for old_mem buffer */
640 	tmp_mem = *new_mem;
641 	tmp_mem.mm_node = NULL;
642 	placement.num_placement = 1;
643 	placement.placement = &placements;
644 	placement.num_busy_placement = 1;
645 	placement.busy_placement = &placements;
646 	placements.fpfn = 0;
647 	placements.lpfn = 0;
648 	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
649 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
650 	if (unlikely(r)) {
651 		pr_err("Failed to find GTT space for blit to VRAM\n");
652 		return r;
653 	}
654 
655 	/* move/bind old memory to GTT space */
656 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
657 	if (unlikely(r)) {
658 		goto out_cleanup;
659 	}
660 
661 	/* copy to VRAM */
662 	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);
663 	if (unlikely(r)) {
664 		goto out_cleanup;
665 	}
666 out_cleanup:
667 	ttm_bo_mem_put(bo, &tmp_mem);
668 	return r;
669 }
670 
671 /**
672  * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
673  *
674  * Called by amdgpu_bo_move()
675  */
676 static bool amdgpu_mem_visible(struct amdgpu_device *adev,
677 			       struct ttm_mem_reg *mem)
678 {
679 	struct drm_mm_node *nodes = mem->mm_node;
680 
681 	if (mem->mem_type == TTM_PL_SYSTEM ||
682 	    mem->mem_type == TTM_PL_TT)
683 		return true;
684 	if (mem->mem_type != TTM_PL_VRAM)
685 		return false;
686 
687 	/* ttm_mem_reg_ioremap only supports contiguous memory */
688 	if (nodes->size != mem->num_pages)
689 		return false;
690 
691 	return ((nodes->start + nodes->size) << PAGE_SHIFT)
692 		<= adev->gmc.visible_vram_size;
693 }
694 
695 /**
696  * amdgpu_bo_move - Move a buffer object to a new memory location
697  *
698  * Called by ttm_bo_handle_move_mem()
699  */
700 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
701 			  struct ttm_operation_ctx *ctx,
702 			  struct ttm_mem_reg *new_mem)
703 {
704 	struct amdgpu_device *adev;
705 	struct amdgpu_bo *abo;
706 	struct ttm_mem_reg *old_mem = &bo->mem;
707 	int r;
708 
709 	/* Can't move a pinned BO */
710 	abo = ttm_to_amdgpu_bo(bo);
711 	if (WARN_ON_ONCE(abo->pin_count > 0))
712 		return -EINVAL;
713 
714 	adev = amdgpu_ttm_adev(bo->bdev);
715 
716 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
717 		amdgpu_move_null(bo, new_mem);
718 		return 0;
719 	}
720 	if ((old_mem->mem_type == TTM_PL_TT &&
721 	     new_mem->mem_type == TTM_PL_SYSTEM) ||
722 	    (old_mem->mem_type == TTM_PL_SYSTEM &&
723 	     new_mem->mem_type == TTM_PL_TT)) {
724 		/* bind is enough */
725 		amdgpu_move_null(bo, new_mem);
726 		return 0;
727 	}
728 	if (old_mem->mem_type == AMDGPU_PL_GDS ||
729 	    old_mem->mem_type == AMDGPU_PL_GWS ||
730 	    old_mem->mem_type == AMDGPU_PL_OA ||
731 	    new_mem->mem_type == AMDGPU_PL_GDS ||
732 	    new_mem->mem_type == AMDGPU_PL_GWS ||
733 	    new_mem->mem_type == AMDGPU_PL_OA) {
734 		/* Nothing to save here */
735 		amdgpu_move_null(bo, new_mem);
736 		return 0;
737 	}
738 
739 	if (!adev->mman.buffer_funcs_enabled) {
740 		r = -ENODEV;
741 		goto memcpy;
742 	}
743 
744 	if (old_mem->mem_type == TTM_PL_VRAM &&
745 	    new_mem->mem_type == TTM_PL_SYSTEM) {
746 		r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
747 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
748 		   new_mem->mem_type == TTM_PL_VRAM) {
749 		r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
750 	} else {
751 		r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu,
752 				     new_mem, old_mem);
753 	}
754 
755 	if (r) {
756 memcpy:
757 		/* Check that all memory is CPU accessible */
758 		if (!amdgpu_mem_visible(adev, old_mem) ||
759 		    !amdgpu_mem_visible(adev, new_mem)) {
760 			pr_err("Move buffer fallback to memcpy unavailable\n");
761 			return r;
762 		}
763 
764 		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
765 		if (r)
766 			return r;
767 	}
768 
769 	if (bo->type == ttm_bo_type_device &&
770 	    new_mem->mem_type == TTM_PL_VRAM &&
771 	    old_mem->mem_type != TTM_PL_VRAM) {
772 		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
773 		 * accesses the BO after it's moved.
774 		 */
775 		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
776 	}
777 
778 	/* update statistics */
779 	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);
780 	return 0;
781 }
782 
783 /**
784  * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
785  *
786  * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
787  */
788 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
789 {
790 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
791 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
792 	struct drm_mm_node *mm_node = mem->mm_node;
793 
794 	mem->bus.addr = NULL;
795 	mem->bus.offset = 0;
796 	mem->bus.size = mem->num_pages << PAGE_SHIFT;
797 	mem->bus.base = 0;
798 	mem->bus.is_iomem = false;
799 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
800 		return -EINVAL;
801 	switch (mem->mem_type) {
802 	case TTM_PL_SYSTEM:
803 		/* system memory */
804 		return 0;
805 	case TTM_PL_TT:
806 		break;
807 	case TTM_PL_VRAM:
808 		mem->bus.offset = mem->start << PAGE_SHIFT;
809 		/* check if it's visible */
810 		if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
811 			return -EINVAL;
812 		/* Only physically contiguous buffers apply. In a contiguous
813 		 * buffer, size of the first mm_node would match the number of
814 		 * pages in ttm_mem_reg.
815 		 */
816 		if (adev->mman.aper_base_kaddr &&
817 		    (mm_node->size == mem->num_pages))
818 			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
819 					mem->bus.offset;
820 
821 		mem->bus.base = adev->gmc.aper_base;
822 		mem->bus.is_iomem = true;
823 		break;
824 	default:
825 		return -EINVAL;
826 	}
827 	return 0;
828 }
829 
830 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
831 {
832 }
833 
834 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
835 					   unsigned long page_offset)
836 {
837 	uint64_t offset = (page_offset << PAGE_SHIFT);
838 	struct drm_mm_node *mm;
839 
840 	mm = amdgpu_find_mm_node(&bo->mem, &offset);
841 	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
842 		(offset >> PAGE_SHIFT);
843 }
844 
845 /**
846  * amdgpu_ttm_domain_start - Returns GPU start address
847  * @adev: amdgpu device object
848  * @type: type of the memory
849  *
850  * Returns:
851  * GPU start address of a memory domain
852  */
853 
854 uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
855 {
856 	switch (type) {
857 	case TTM_PL_TT:
858 		return adev->gmc.gart_start;
859 	case TTM_PL_VRAM:
860 		return adev->gmc.vram_start;
861 	}
862 
863 	return 0;
864 }
865 
866 /*
867  * TTM backend functions.
868  */
869 struct amdgpu_ttm_tt {
870 	struct ttm_dma_tt	ttm;
871 	struct drm_gem_object	*gobj;
872 	u64			offset;
873 	uint64_t		userptr;
874 	struct task_struct	*usertask;
875 	uint32_t		userflags;
876 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
877 	struct hmm_range	*range;
878 #endif
879 };
880 
881 #ifdef CONFIG_DRM_AMDGPU_USERPTR
882 /**
883  * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
884  * memory and start HMM tracking CPU page table update
885  *
886  * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
887  * once afterwards to stop HMM tracking
888  */
889 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
890 {
891 	struct ttm_tt *ttm = bo->tbo.ttm;
892 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
893 	unsigned long start = gtt->userptr;
894 	struct vm_area_struct *vma;
895 	struct hmm_range *range;
896 	unsigned long timeout;
897 	struct mm_struct *mm;
898 	unsigned long i;
899 	int r = 0;
900 
901 	mm = bo->notifier.mm;
902 	if (unlikely(!mm)) {
903 		DRM_DEBUG_DRIVER("BO is not registered?\n");
904 		return -EFAULT;
905 	}
906 
907 	/* Another get_user_pages is running at the same time?? */
908 	if (WARN_ON(gtt->range))
909 		return -EFAULT;
910 
911 	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
912 		return -ESRCH;
913 
914 	range = kzalloc(sizeof(*range), GFP_KERNEL);
915 	if (unlikely(!range)) {
916 		r = -ENOMEM;
917 		goto out;
918 	}
919 	range->notifier = &bo->notifier;
920 	range->start = bo->notifier.interval_tree.start;
921 	range->end = bo->notifier.interval_tree.last + 1;
922 	range->default_flags = HMM_PFN_REQ_FAULT;
923 	if (!amdgpu_ttm_tt_is_readonly(ttm))
924 		range->default_flags |= HMM_PFN_REQ_WRITE;
925 
926 	range->hmm_pfns = kvmalloc_array(ttm->num_pages,
927 					 sizeof(*range->hmm_pfns), GFP_KERNEL);
928 	if (unlikely(!range->hmm_pfns)) {
929 		r = -ENOMEM;
930 		goto out_free_ranges;
931 	}
932 
933 	mmap_read_lock(mm);
934 	vma = find_vma(mm, start);
935 	if (unlikely(!vma || start < vma->vm_start)) {
936 		r = -EFAULT;
937 		goto out_unlock;
938 	}
939 	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
940 		vma->vm_file)) {
941 		r = -EPERM;
942 		goto out_unlock;
943 	}
944 	mmap_read_unlock(mm);
945 	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
946 
947 retry:
948 	range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
949 
950 	mmap_read_lock(mm);
951 	r = hmm_range_fault(range);
952 	mmap_read_unlock(mm);
953 	if (unlikely(r)) {
954 		/*
955 		 * FIXME: This timeout should encompass the retry from
956 		 * mmu_interval_read_retry() as well.
957 		 */
958 		if (r == -EBUSY && !time_after(jiffies, timeout))
959 			goto retry;
960 		goto out_free_pfns;
961 	}
962 
963 	/*
964 	 * Due to default_flags, all pages are HMM_PFN_VALID or
965 	 * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
966 	 * the notifier_lock, and mmu_interval_read_retry() must be done first.
967 	 */
968 	for (i = 0; i < ttm->num_pages; i++)
969 		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
970 
971 	gtt->range = range;
972 	mmput(mm);
973 
974 	return 0;
975 
976 out_unlock:
977 	mmap_read_unlock(mm);
978 out_free_pfns:
979 	kvfree(range->hmm_pfns);
980 out_free_ranges:
981 	kfree(range);
982 out:
983 	mmput(mm);
984 	return r;
985 }
986 
987 /**
988  * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
989  * Check if the pages backing this ttm range have been invalidated
990  *
991  * Returns: true if pages are still valid
992  */
993 bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
994 {
995 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
996 	bool r = false;
997 
998 	if (!gtt || !gtt->userptr)
999 		return false;
1000 
1001 	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
1002 		gtt->userptr, ttm->num_pages);
1003 
1004 	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
1005 		"No user pages to check\n");
1006 
1007 	if (gtt->range) {
1008 		/*
1009 		 * FIXME: Must always hold notifier_lock for this, and must
1010 		 * not ignore the return code.
1011 		 */
1012 		r = mmu_interval_read_retry(gtt->range->notifier,
1013 					 gtt->range->notifier_seq);
1014 		kvfree(gtt->range->hmm_pfns);
1015 		kfree(gtt->range);
1016 		gtt->range = NULL;
1017 	}
1018 
1019 	return !r;
1020 }
1021 #endif
1022 
1023 /**
1024  * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
1025  *
1026  * Called by amdgpu_cs_list_validate(). This creates the page list
1027  * that backs user memory and will ultimately be mapped into the device
1028  * address space.
1029  */
1030 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
1031 {
1032 	unsigned long i;
1033 
1034 	for (i = 0; i < ttm->num_pages; ++i)
1035 		ttm->pages[i] = pages ? pages[i] : NULL;
1036 }
1037 
1038 /**
1039  * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the user pages
1040  *
1041  * Called by amdgpu_ttm_backend_bind()
1042  **/
1043 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
1044 {
1045 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1046 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1047 	unsigned nents;
1048 	int r;
1049 
1050 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1051 	enum dma_data_direction direction = write ?
1052 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
1053 
1054 	/* Allocate an SG array and squash pages into it */
1055 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
1056 				      ttm->num_pages << PAGE_SHIFT,
1057 				      GFP_KERNEL);
1058 	if (r)
1059 		goto release_sg;
1060 
1061 	/* Map SG to device */
1062 	r = -ENOMEM;
1063 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
1064 	if (nents == 0)
1065 		goto release_sg;
1066 
1067 	/* convert SG to linear array of pages and dma addresses */
1068 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1069 					 gtt->ttm.dma_address, ttm->num_pages);
1070 
1071 	return 0;
1072 
1073 release_sg:
1074 	kfree(ttm->sg);
1075 	return r;
1076 }
1077 
1078 /**
1079  * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
1080  */
1081 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
1082 {
1083 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1084 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1085 
1086 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1087 	enum dma_data_direction direction = write ?
1088 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
1089 
1090 	/* double check that we don't free the table twice */
1091 	if (!ttm->sg->sgl)
1092 		return;
1093 
1094 	/* unmap the pages mapped to the device */
1095 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
1096 
1097 	sg_free_table(ttm->sg);
1098 
1099 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
1100 	if (gtt->range) {
1101 		unsigned long i;
1102 
1103 		for (i = 0; i < ttm->num_pages; i++) {
1104 			if (ttm->pages[i] !=
1105 			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
1106 				break;
1107 		}
1108 
1109 		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
1110 	}
1111 #endif
1112 }
1113 
1114 static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
1115 				struct ttm_buffer_object *tbo,
1116 				uint64_t flags)
1117 {
1118 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
1119 	struct ttm_tt *ttm = tbo->ttm;
1120 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1121 	int r;
1122 
1123 	if (amdgpu_bo_encrypted(abo))
1124 		flags |= AMDGPU_PTE_TMZ;
1125 
1126 	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
1127 		uint64_t page_idx = 1;
1128 
1129 		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
1130 				ttm->pages, gtt->ttm.dma_address, flags);
1131 		if (r)
1132 			goto gart_bind_fail;
1133 
1134 		/* The memory type of the first page defaults to UC. Now
1135 		 * modify the memory type to NC from the second page of
1136 		 * the BO onward.
1137 		 */
1138 		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1139 		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
1140 
1141 		r = amdgpu_gart_bind(adev,
1142 				gtt->offset + (page_idx << PAGE_SHIFT),
1143 				ttm->num_pages - page_idx,
1144 				&ttm->pages[page_idx],
1145 				&(gtt->ttm.dma_address[page_idx]), flags);
1146 	} else {
1147 		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1148 				     ttm->pages, gtt->ttm.dma_address, flags);
1149 	}
1150 
1151 gart_bind_fail:
1152 	if (r)
1153 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1154 			  ttm->num_pages, gtt->offset);
1155 
1156 	return r;
1157 }
1158 
1159 /**
1160  * amdgpu_ttm_backend_bind - Bind GTT memory
1161  *
1162  * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
1163  * This handles binding GTT memory to the device address space.
1164  */
1165 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
1166 				   struct ttm_mem_reg *bo_mem)
1167 {
1168 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1169 	struct amdgpu_ttm_tt *gtt = (void*)ttm;
1170 	uint64_t flags;
1171 	int r = 0;
1172 
1173 	if (gtt->userptr) {
1174 		r = amdgpu_ttm_tt_pin_userptr(ttm);
1175 		if (r) {
1176 			DRM_ERROR("failed to pin userptr\n");
1177 			return r;
1178 		}
1179 	}
1180 	if (!ttm->num_pages) {
1181 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
1182 		     ttm->num_pages, bo_mem, ttm);
1183 	}
1184 
1185 	if (bo_mem->mem_type == AMDGPU_PL_GDS ||
1186 	    bo_mem->mem_type == AMDGPU_PL_GWS ||
1187 	    bo_mem->mem_type == AMDGPU_PL_OA)
1188 		return -EINVAL;
1189 
1190 	if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
1191 		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
1192 		return 0;
1193 	}
1194 
1195 	/* compute PTE flags relevant to this BO memory */
1196 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
1197 
1198 	/* bind pages into GART page tables */
1199 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
1200 	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1201 		ttm->pages, gtt->ttm.dma_address, flags);
1202 
1203 	if (r)
1204 		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1205 			  ttm->num_pages, gtt->offset);
1206 	return r;
1207 }
1208 
1209 /**
1210  * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
1211  */
1212 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
1213 {
1214 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
1215 	struct ttm_operation_ctx ctx = { false, false };
1216 	struct amdgpu_ttm_tt *gtt = (void*)bo->ttm;
1217 	struct ttm_mem_reg tmp;
1218 	struct ttm_placement placement;
1219 	struct ttm_place placements;
1220 	uint64_t addr, flags;
1221 	int r;
1222 
1223 	if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
1224 		return 0;
1225 
1226 	addr = amdgpu_gmc_agp_addr(bo);
1227 	if (addr != AMDGPU_BO_INVALID_OFFSET) {
1228 		bo->mem.start = addr >> PAGE_SHIFT;
1229 	} else {
1230 
1231 		/* allocate GART space */
1232 		tmp = bo->mem;
1233 		tmp.mm_node = NULL;
1234 		placement.num_placement = 1;
1235 		placement.placement = &placements;
1236 		placement.num_busy_placement = 1;
1237 		placement.busy_placement = &placements;
1238 		placements.fpfn = 0;
1239 		placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
1240 		placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
1241 			TTM_PL_FLAG_TT;
1242 
1243 		r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
1244 		if (unlikely(r))
1245 			return r;
1246 
1247 		/* compute PTE flags for this buffer object */
1248 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1249 
1250 		/* Bind pages */
1251 		gtt->offset = (u64)tmp.start << PAGE_SHIFT;
1252 		r = amdgpu_ttm_gart_bind(adev, bo, flags);
1253 		if (unlikely(r)) {
1254 			ttm_bo_mem_put(bo, &tmp);
1255 			return r;
1256 		}
1257 
1258 		ttm_bo_mem_put(bo, &bo->mem);
1259 		bo->mem = tmp;
1260 	}
1261 
1262 	return 0;
1263 }
1264 
1265 /**
1266  * amdgpu_ttm_recover_gart - Rebind GTT pages
1267  *
1268  * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1269  * rebind GTT pages during a GPU reset.
1270  */
1271 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1272 {
1273 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1274 	uint64_t flags;
1275 	int r;
1276 
1277 	if (!tbo->ttm)
1278 		return 0;
1279 
1280 	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
1281 	r = amdgpu_ttm_gart_bind(adev, tbo, flags);
1282 
1283 	return r;
1284 }
1285 
1286 /**
1287  * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1288  *
1289  * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1290  * ttm_tt_destroy().
1291  */
1292 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
1293 {
1294 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1295 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1296 	int r;
1297 
1298 	/* if the pages have userptr pinning then clear that first */
1299 	if (gtt->userptr)
1300 		amdgpu_ttm_tt_unpin_userptr(ttm);
1301 
1302 	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1303 		return 0;
1304 
1305 	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
1306 	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1307 	if (r)
1308 		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
1309 			  gtt->ttm.ttm.num_pages, gtt->offset);
1310 	return r;
1311 }
1312 
1313 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
1314 {
1315 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1316 
1317 	if (gtt->usertask)
1318 		put_task_struct(gtt->usertask);
1319 
1320 	ttm_dma_tt_fini(&gtt->ttm);
1321 	kfree(gtt);
1322 }
1323 
1324 static struct ttm_backend_func amdgpu_backend_func = {
1325 	.bind = &amdgpu_ttm_backend_bind,
1326 	.unbind = &amdgpu_ttm_backend_unbind,
1327 	.destroy = &amdgpu_ttm_backend_destroy,
1328 };
1329 
1330 /**
1331  * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1332  *
1333  * @bo: The buffer object to create a GTT ttm_tt object around
1334  *
1335  * Called by ttm_tt_create().
1336  */
1337 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1338 					   uint32_t page_flags)
1339 {
1340 	struct amdgpu_ttm_tt *gtt;
1341 
1342 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
1343 	if (gtt == NULL) {
1344 		return NULL;
1345 	}
1346 	gtt->ttm.ttm.func = &amdgpu_backend_func;
1347 	gtt->gobj = &bo->base;
1348 
1349 	/* allocate space for the uninitialized page entries */
1350 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
1351 		kfree(gtt);
1352 		return NULL;
1353 	}
1354 	return &gtt->ttm.ttm;
1355 }
1356 
1357 /**
1358  * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1359  *
1360  * Map the pages of a ttm_tt object to an address space visible
1361  * to the underlying device.
1362  */
1363 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1364 			struct ttm_operation_ctx *ctx)
1365 {
1366 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
1367 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1368 
1369 	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1370 	if (gtt && gtt->userptr) {
1371 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1372 		if (!ttm->sg)
1373 			return -ENOMEM;
1374 
1375 		ttm->page_flags |= TTM_PAGE_FLAG_SG;
1376 		ttm->state = tt_unbound;
1377 		return 0;
1378 	}
1379 
1380 	if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
1381 		if (!ttm->sg) {
1382 			struct dma_buf_attachment *attach;
1383 			struct sg_table *sgt;
1384 
1385 			attach = gtt->gobj->import_attach;
1386 			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
1387 			if (IS_ERR(sgt))
1388 				return PTR_ERR(sgt);
1389 
1390 			ttm->sg = sgt;
1391 		}
1392 
1393 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1394 						 gtt->ttm.dma_address,
1395 						 ttm->num_pages);
1396 		ttm->state = tt_unbound;
1397 		return 0;
1398 	}
1399 
1400 #ifdef CONFIG_SWIOTLB
1401 	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1402 		return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
1403 	}
1404 #endif
1405 
1406 	/* fall back to generic helper to populate the page array
1407 	 * and map them to the device */
1408 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1409 }
1410 
1411 /**
1412  * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1413  *
1414  * Unmaps pages of a ttm_tt object from the device address space and
1415  * unpopulates the page array backing it.
1416  */
1417 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1418 {
1419 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1420 	struct amdgpu_device *adev;
1421 
1422 	if (gtt && gtt->userptr) {
1423 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
1424 		kfree(ttm->sg);
1425 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1426 		return;
1427 	}
1428 
1429 	if (ttm->sg && gtt->gobj->import_attach) {
1430 		struct dma_buf_attachment *attach;
1431 
1432 		attach = gtt->gobj->import_attach;
1433 		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
1434 		ttm->sg = NULL;
1435 		return;
1436 	}
1437 
1438 	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
1439 		return;
1440 
1441 	adev = amdgpu_ttm_adev(ttm->bdev);
1442 
1443 #ifdef CONFIG_SWIOTLB
1444 	if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1445 		ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1446 		return;
1447 	}
1448 #endif
1449 
1450 	/* fall back to generic helper to unmap and unpopulate array */
1451 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1452 }
1453 
1454 /**
1455  * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
1456  * task
1457  *
1458  * @ttm: The ttm_tt object to bind this userptr object to
1459  * @addr:  The address in the current tasks VM space to use
1460  * @flags: Requirements of userptr object.
1461  *
1462  * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
1463  * to current task
1464  */
1465 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1466 			      uint32_t flags)
1467 {
1468 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1469 
1470 	if (gtt == NULL)
1471 		return -EINVAL;
1472 
1473 	gtt->userptr = addr;
1474 	gtt->userflags = flags;
1475 
1476 	if (gtt->usertask)
1477 		put_task_struct(gtt->usertask);
1478 	gtt->usertask = current->group_leader;
1479 	get_task_struct(gtt->usertask);
1480 
1481 	return 0;
1482 }
1483 
1484 /**
1485  * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1486  */
1487 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1488 {
1489 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1490 
1491 	if (gtt == NULL)
1492 		return NULL;
1493 
1494 	if (gtt->usertask == NULL)
1495 		return NULL;
1496 
1497 	return gtt->usertask->mm;
1498 }
1499 
1500 /**
1501  * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
1502  * address range for the current task.
1503  *
1504  */
1505 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1506 				  unsigned long end)
1507 {
1508 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1509 	unsigned long size;
1510 
1511 	if (gtt == NULL || !gtt->userptr)
1512 		return false;
1513 
1514 	/* Return false if no part of the ttm_tt object lies within
1515 	 * the range
1516 	 */
1517 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1518 	if (gtt->userptr > end || gtt->userptr + size <= start)
1519 		return false;
1520 
1521 	return true;
1522 }
1523 
1524 /**
1525  * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1526  */
1527 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1528 {
1529 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1530 
1531 	if (gtt == NULL || !gtt->userptr)
1532 		return false;
1533 
1534 	return true;
1535 }
1536 
1537 /**
1538  * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1539  */
1540 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1541 {
1542 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1543 
1544 	if (gtt == NULL)
1545 		return false;
1546 
1547 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1548 }
1549 
1550 /**
1551  * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1552  *
1553  * @ttm: The ttm_tt object to compute the flags for
1554  * @mem: The memory registry backing this ttm_tt object
1555  *
1556  * Figure out the flags to use for a VM PDE (Page Directory Entry).
1557  */
1558 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
1559 {
1560 	uint64_t flags = 0;
1561 
1562 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
1563 		flags |= AMDGPU_PTE_VALID;
1564 
1565 	if (mem && mem->mem_type == TTM_PL_TT) {
1566 		flags |= AMDGPU_PTE_SYSTEM;
1567 
1568 		if (ttm->caching_state == tt_cached)
1569 			flags |= AMDGPU_PTE_SNOOPED;
1570 	}
1571 
1572 	return flags;
1573 }
1574 
1575 /**
1576  * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1577  *
1578  * @ttm: The ttm_tt object to compute the flags for
1579  * @mem: The memory registry backing this ttm_tt object
1580 
1581  * Figure out the flags to use for a VM PTE (Page Table Entry).
1582  */
1583 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1584 				 struct ttm_mem_reg *mem)
1585 {
1586 	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
1587 
1588 	flags |= adev->gart.gart_pte_flags;
1589 	flags |= AMDGPU_PTE_READABLE;
1590 
1591 	if (!amdgpu_ttm_tt_is_readonly(ttm))
1592 		flags |= AMDGPU_PTE_WRITEABLE;
1593 
1594 	return flags;
1595 }
1596 
1597 /**
1598  * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
1599  * object.
1600  *
1601  * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
1602  * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
1603  * it can find space for a new object and by ttm_bo_force_list_clean() which is
1604  * used to clean out a memory space.
1605  */
1606 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1607 					    const struct ttm_place *place)
1608 {
1609 	unsigned long num_pages = bo->mem.num_pages;
1610 	struct drm_mm_node *node = bo->mem.mm_node;
1611 	struct dma_resv_list *flist;
1612 	struct dma_fence *f;
1613 	int i;
1614 
1615 	if (bo->type == ttm_bo_type_kernel &&
1616 	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1617 		return false;
1618 
1619 	/* If bo is a KFD BO, check if the bo belongs to the current process.
1620 	 * If true, then return false as any KFD process needs all its BOs to
1621 	 * be resident to run successfully
1622 	 */
1623 	flist = dma_resv_get_list(bo->base.resv);
1624 	if (flist) {
1625 		for (i = 0; i < flist->shared_count; ++i) {
1626 			f = rcu_dereference_protected(flist->shared[i],
1627 				dma_resv_held(bo->base.resv));
1628 			if (amdkfd_fence_check_mm(f, current->mm))
1629 				return false;
1630 		}
1631 	}
1632 
1633 	switch (bo->mem.mem_type) {
1634 	case TTM_PL_TT:
1635 		if (amdgpu_bo_is_amdgpu_bo(bo) &&
1636 		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
1637 			return false;
1638 		return true;
1639 
1640 	case TTM_PL_VRAM:
1641 		/* Check each drm MM node individually */
1642 		while (num_pages) {
1643 			if (place->fpfn < (node->start + node->size) &&
1644 			    !(place->lpfn && place->lpfn <= node->start))
1645 				return true;
1646 
1647 			num_pages -= node->size;
1648 			++node;
1649 		}
1650 		return false;
1651 
1652 	default:
1653 		break;
1654 	}
1655 
1656 	return ttm_bo_eviction_valuable(bo, place);
1657 }
1658 
1659 /**
1660  * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1661  *
1662  * @bo:  The buffer object to read/write
1663  * @offset:  Offset into buffer object
1664  * @buf:  Secondary buffer to write/read from
1665  * @len: Length in bytes of access
1666  * @write:  true if writing
1667  *
1668  * This is used to access VRAM that backs a buffer object via MMIO
1669  * access for debugging purposes.
1670  */
1671 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1672 				    unsigned long offset,
1673 				    void *buf, int len, int write)
1674 {
1675 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1676 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1677 	struct drm_mm_node *nodes;
1678 	uint32_t value = 0;
1679 	int ret = 0;
1680 	uint64_t pos;
1681 	unsigned long flags;
1682 
1683 	if (bo->mem.mem_type != TTM_PL_VRAM)
1684 		return -EIO;
1685 
1686 	pos = offset;
1687 	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos);
1688 	pos += (nodes->start << PAGE_SHIFT);
1689 
1690 	while (len && pos < adev->gmc.mc_vram_size) {
1691 		uint64_t aligned_pos = pos & ~(uint64_t)3;
1692 		uint64_t bytes = 4 - (pos & 3);
1693 		uint32_t shift = (pos & 3) * 8;
1694 		uint32_t mask = 0xffffffff << shift;
1695 
1696 		if (len < bytes) {
1697 			mask &= 0xffffffff >> (bytes - len) * 8;
1698 			bytes = len;
1699 		}
1700 
1701 		if (mask != 0xffffffff) {
1702 			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1703 			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1704 			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1705 			if (!write || mask != 0xffffffff)
1706 				value = RREG32_NO_KIQ(mmMM_DATA);
1707 			if (write) {
1708 				value &= ~mask;
1709 				value |= (*(uint32_t *)buf << shift) & mask;
1710 				WREG32_NO_KIQ(mmMM_DATA, value);
1711 			}
1712 			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1713 			if (!write) {
1714 				value = (value & mask) >> shift;
1715 				memcpy(buf, &value, bytes);
1716 			}
1717 		} else {
1718 			bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
1719 			bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
1720 
1721 			amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
1722 						  bytes, write);
1723 		}
1724 
1725 		ret += bytes;
1726 		buf = (uint8_t *)buf + bytes;
1727 		pos += bytes;
1728 		len -= bytes;
1729 		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
1730 			++nodes;
1731 			pos = (nodes->start << PAGE_SHIFT);
1732 		}
1733 	}
1734 
1735 	return ret;
1736 }
1737 
1738 static struct ttm_bo_driver amdgpu_bo_driver = {
1739 	.ttm_tt_create = &amdgpu_ttm_tt_create,
1740 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
1741 	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1742 	.init_mem_type = &amdgpu_init_mem_type,
1743 	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
1744 	.evict_flags = &amdgpu_evict_flags,
1745 	.move = &amdgpu_bo_move,
1746 	.verify_access = &amdgpu_verify_access,
1747 	.move_notify = &amdgpu_bo_move_notify,
1748 	.release_notify = &amdgpu_bo_release_notify,
1749 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
1750 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1751 	.io_mem_free = &amdgpu_ttm_io_mem_free,
1752 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1753 	.access_memory = &amdgpu_ttm_access_memory,
1754 	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
1755 };
1756 
1757 /*
1758  * Firmware Reservation functions
1759  */
1760 /**
1761  * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1762  *
1763  * @adev: amdgpu_device pointer
1764  *
1765  * free fw reserved vram if it has been reserved.
1766  */
1767 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1768 {
1769 	amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo,
1770 		NULL, &adev->fw_vram_usage.va);
1771 }
1772 
1773 /**
1774  * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1775  *
1776  * @adev: amdgpu_device pointer
1777  *
1778  * create bo vram reservation from fw.
1779  */
1780 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1781 {
1782 	uint64_t vram_size = adev->gmc.visible_vram_size;
1783 
1784 	adev->fw_vram_usage.va = NULL;
1785 	adev->fw_vram_usage.reserved_bo = NULL;
1786 
1787 	if (adev->fw_vram_usage.size == 0 ||
1788 	    adev->fw_vram_usage.size > vram_size)
1789 		return 0;
1790 
1791 	return amdgpu_bo_create_kernel_at(adev,
1792 					  adev->fw_vram_usage.start_offset,
1793 					  adev->fw_vram_usage.size,
1794 					  AMDGPU_GEM_DOMAIN_VRAM,
1795 					  &adev->fw_vram_usage.reserved_bo,
1796 					  &adev->fw_vram_usage.va);
1797 }
1798 
1799 /*
1800  * Memoy training reservation functions
1801  */
1802 
1803 /**
1804  * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
1805  *
1806  * @adev: amdgpu_device pointer
1807  *
1808  * free memory training reserved vram if it has been reserved.
1809  */
1810 static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
1811 {
1812 	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1813 
1814 	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
1815 	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
1816 	ctx->c2p_bo = NULL;
1817 
1818 	return 0;
1819 }
1820 
1821 static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1822 {
1823 	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1824 
1825 	memset(ctx, 0, sizeof(*ctx));
1826 
1827 	ctx->c2p_train_data_offset =
1828 		ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M);
1829 	ctx->p2c_train_data_offset =
1830 		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
1831 	ctx->train_data_size =
1832 		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1833 
1834 	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
1835 			ctx->train_data_size,
1836 			ctx->p2c_train_data_offset,
1837 			ctx->c2p_train_data_offset);
1838 }
1839 
1840 /*
1841  * reserve TMR memory at the top of VRAM which holds
1842  * IP Discovery data and is protected by PSP.
1843  */
1844 static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1845 {
1846 	int ret;
1847 	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1848 	bool mem_train_support = false;
1849 
1850 	if (!amdgpu_sriov_vf(adev)) {
1851 		ret = amdgpu_mem_train_support(adev);
1852 		if (ret == 1)
1853 			mem_train_support = true;
1854 		else if (ret == -1)
1855 			return -EINVAL;
1856 		else
1857 			DRM_DEBUG("memory training does not support!\n");
1858 	}
1859 
1860 	/*
1861 	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
1862 	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
1863 	 *
1864 	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
1865 	 * discovery data and G6 memory training data respectively
1866 	 */
1867 	adev->discovery_tmr_size =
1868 		amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1869 	if (!adev->discovery_tmr_size)
1870 		adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1871 
1872 	if (mem_train_support) {
1873 		/* reserve vram for mem train according to TMR location */
1874 		amdgpu_ttm_training_data_block_init(adev);
1875 		ret = amdgpu_bo_create_kernel_at(adev,
1876 					 ctx->c2p_train_data_offset,
1877 					 ctx->train_data_size,
1878 					 AMDGPU_GEM_DOMAIN_VRAM,
1879 					 &ctx->c2p_bo,
1880 					 NULL);
1881 		if (ret) {
1882 			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
1883 			amdgpu_ttm_training_reserve_vram_fini(adev);
1884 			return ret;
1885 		}
1886 		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1887 	}
1888 
1889 	ret = amdgpu_bo_create_kernel_at(adev,
1890 				adev->gmc.real_vram_size - adev->discovery_tmr_size,
1891 				adev->discovery_tmr_size,
1892 				AMDGPU_GEM_DOMAIN_VRAM,
1893 				&adev->discovery_memory,
1894 				NULL);
1895 	if (ret) {
1896 		DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1897 		amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
1898 		return ret;
1899 	}
1900 
1901 	return 0;
1902 }
1903 
1904 /**
1905  * amdgpu_ttm_init - Init the memory management (ttm) as well as various
1906  * gtt/vram related fields.
1907  *
1908  * This initializes all of the memory space pools that the TTM layer
1909  * will need such as the GTT space (system memory mapped to the device),
1910  * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1911  * can be mapped per VMID.
1912  */
1913 int amdgpu_ttm_init(struct amdgpu_device *adev)
1914 {
1915 	uint64_t gtt_size;
1916 	int r;
1917 	u64 vis_vram_limit;
1918 	void *stolen_vga_buf;
1919 
1920 	mutex_init(&adev->mman.gtt_window_lock);
1921 
1922 	/* No others user of address space so set it to 0 */
1923 	r = ttm_bo_device_init(&adev->mman.bdev,
1924 			       &amdgpu_bo_driver,
1925 			       adev->ddev->anon_inode->i_mapping,
1926 			       adev->ddev->vma_offset_manager,
1927 			       dma_addressing_limited(adev->dev));
1928 	if (r) {
1929 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
1930 		return r;
1931 	}
1932 	adev->mman.initialized = true;
1933 
1934 	/* We opt to avoid OOM on system pages allocations */
1935 	adev->mman.bdev.no_retry = true;
1936 
1937 	/* Initialize VRAM pool with all of VRAM divided into pages */
1938 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1939 				adev->gmc.real_vram_size >> PAGE_SHIFT);
1940 	if (r) {
1941 		DRM_ERROR("Failed initializing VRAM heap.\n");
1942 		return r;
1943 	}
1944 
1945 	/* Reduce size of CPU-visible VRAM if requested */
1946 	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1947 	if (amdgpu_vis_vram_limit > 0 &&
1948 	    vis_vram_limit <= adev->gmc.visible_vram_size)
1949 		adev->gmc.visible_vram_size = vis_vram_limit;
1950 
1951 	/* Change the size here instead of the init above so only lpfn is affected */
1952 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
1953 #ifdef CONFIG_64BIT
1954 	adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1955 						adev->gmc.visible_vram_size);
1956 #endif
1957 
1958 	/*
1959 	 *The reserved vram for firmware must be pinned to the specified
1960 	 *place on the VRAM, so reserve it early.
1961 	 */
1962 	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1963 	if (r) {
1964 		return r;
1965 	}
1966 
1967 	/*
1968 	 * only NAVI10 and onwards ASIC support for IP discovery.
1969 	 * If IP discovery enabled, a block of memory should be
1970 	 * reserved for IP discovey.
1971 	 */
1972 	if (adev->discovery_bin) {
1973 		r = amdgpu_ttm_reserve_tmr(adev);
1974 		if (r)
1975 			return r;
1976 	}
1977 
1978 	/* allocate memory as required for VGA
1979 	 * This is used for VGA emulation and pre-OS scanout buffers to
1980 	 * avoid display artifacts while transitioning between pre-OS
1981 	 * and driver.  */
1982 	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1983 				    AMDGPU_GEM_DOMAIN_VRAM,
1984 				    &adev->stolen_vga_memory,
1985 				    NULL, &stolen_vga_buf);
1986 	if (r)
1987 		return r;
1988 
1989 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1990 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1991 
1992 	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
1993 	 * or whatever the user passed on module init */
1994 	if (amdgpu_gtt_size == -1) {
1995 		struct sysinfo si;
1996 
1997 		si_meminfo(&si);
1998 		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1999 			       adev->gmc.mc_vram_size),
2000 			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
2001 	}
2002 	else
2003 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
2004 
2005 	/* Initialize GTT memory pool */
2006 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
2007 	if (r) {
2008 		DRM_ERROR("Failed initializing GTT heap.\n");
2009 		return r;
2010 	}
2011 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
2012 		 (unsigned)(gtt_size / (1024 * 1024)));
2013 
2014 	/* Initialize various on-chip memory pools */
2015 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
2016 			   adev->gds.gds_size);
2017 	if (r) {
2018 		DRM_ERROR("Failed initializing GDS heap.\n");
2019 		return r;
2020 	}
2021 
2022 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
2023 			   adev->gds.gws_size);
2024 	if (r) {
2025 		DRM_ERROR("Failed initializing gws heap.\n");
2026 		return r;
2027 	}
2028 
2029 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
2030 			   adev->gds.oa_size);
2031 	if (r) {
2032 		DRM_ERROR("Failed initializing oa heap.\n");
2033 		return r;
2034 	}
2035 
2036 	return 0;
2037 }
2038 
2039 /**
2040  * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm
2041  */
2042 void amdgpu_ttm_late_init(struct amdgpu_device *adev)
2043 {
2044 	void *stolen_vga_buf;
2045 	/* return the VGA stolen memory (if any) back to VRAM */
2046 	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);
2047 }
2048 
2049 /**
2050  * amdgpu_ttm_fini - De-initialize the TTM memory pools
2051  */
2052 void amdgpu_ttm_fini(struct amdgpu_device *adev)
2053 {
2054 	if (!adev->mman.initialized)
2055 		return;
2056 
2057 	amdgpu_ttm_training_reserve_vram_fini(adev);
2058 	/* return the IP Discovery TMR memory back to VRAM */
2059 	amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
2060 	amdgpu_ttm_fw_reserve_vram_fini(adev);
2061 
2062 	if (adev->mman.aper_base_kaddr)
2063 		iounmap(adev->mman.aper_base_kaddr);
2064 	adev->mman.aper_base_kaddr = NULL;
2065 
2066 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
2067 	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
2068 	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
2069 	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
2070 	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
2071 	ttm_bo_device_release(&adev->mman.bdev);
2072 	adev->mman.initialized = false;
2073 	DRM_INFO("amdgpu: ttm finalized\n");
2074 }
2075 
2076 /**
2077  * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
2078  *
2079  * @adev: amdgpu_device pointer
2080  * @enable: true when we can use buffer functions.
2081  *
2082  * Enable/disable use of buffer functions during suspend/resume. This should
2083  * only be called at bootup or when userspace isn't running.
2084  */
2085 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
2086 {
2087 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
2088 	uint64_t size;
2089 	int r;
2090 
2091 	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
2092 	    adev->mman.buffer_funcs_enabled == enable)
2093 		return;
2094 
2095 	if (enable) {
2096 		struct amdgpu_ring *ring;
2097 		struct drm_gpu_scheduler *sched;
2098 
2099 		ring = adev->mman.buffer_funcs_ring;
2100 		sched = &ring->sched;
2101 		r = drm_sched_entity_init(&adev->mman.entity,
2102 				          DRM_SCHED_PRIORITY_KERNEL, &sched,
2103 					  1, NULL);
2104 		if (r) {
2105 			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
2106 				  r);
2107 			return;
2108 		}
2109 	} else {
2110 		drm_sched_entity_destroy(&adev->mman.entity);
2111 		dma_fence_put(man->move);
2112 		man->move = NULL;
2113 	}
2114 
2115 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
2116 	if (enable)
2117 		size = adev->gmc.real_vram_size;
2118 	else
2119 		size = adev->gmc.visible_vram_size;
2120 	man->size = size >> PAGE_SHIFT;
2121 	adev->mman.buffer_funcs_enabled = enable;
2122 }
2123 
2124 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
2125 {
2126 	struct drm_file *file_priv = filp->private_data;
2127 	struct amdgpu_device *adev = file_priv->minor->dev->dev_private;
2128 
2129 	if (adev == NULL)
2130 		return -EINVAL;
2131 
2132 	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
2133 }
2134 
2135 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
2136 		       uint64_t dst_offset, uint32_t byte_count,
2137 		       struct dma_resv *resv,
2138 		       struct dma_fence **fence, bool direct_submit,
2139 		       bool vm_needs_flush, bool tmz)
2140 {
2141 	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
2142 		AMDGPU_IB_POOL_DELAYED;
2143 	struct amdgpu_device *adev = ring->adev;
2144 	struct amdgpu_job *job;
2145 
2146 	uint32_t max_bytes;
2147 	unsigned num_loops, num_dw;
2148 	unsigned i;
2149 	int r;
2150 
2151 	if (direct_submit && !ring->sched.ready) {
2152 		DRM_ERROR("Trying to move memory with ring turned off.\n");
2153 		return -EINVAL;
2154 	}
2155 
2156 	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
2157 	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
2158 	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
2159 
2160 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
2161 	if (r)
2162 		return r;
2163 
2164 	if (vm_needs_flush) {
2165 		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
2166 		job->vm_needs_flush = true;
2167 	}
2168 	if (resv) {
2169 		r = amdgpu_sync_resv(adev, &job->sync, resv,
2170 				     AMDGPU_SYNC_ALWAYS,
2171 				     AMDGPU_FENCE_OWNER_UNDEFINED);
2172 		if (r) {
2173 			DRM_ERROR("sync failed (%d).\n", r);
2174 			goto error_free;
2175 		}
2176 	}
2177 
2178 	for (i = 0; i < num_loops; i++) {
2179 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
2180 
2181 		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2182 					dst_offset, cur_size_in_bytes, tmz);
2183 
2184 		src_offset += cur_size_in_bytes;
2185 		dst_offset += cur_size_in_bytes;
2186 		byte_count -= cur_size_in_bytes;
2187 	}
2188 
2189 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2190 	WARN_ON(job->ibs[0].length_dw > num_dw);
2191 	if (direct_submit)
2192 		r = amdgpu_job_submit_direct(job, ring, fence);
2193 	else
2194 		r = amdgpu_job_submit(job, &adev->mman.entity,
2195 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2196 	if (r)
2197 		goto error_free;
2198 
2199 	return r;
2200 
2201 error_free:
2202 	amdgpu_job_free(job);
2203 	DRM_ERROR("Error scheduling IBs (%d)\n", r);
2204 	return r;
2205 }
2206 
2207 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
2208 		       uint32_t src_data,
2209 		       struct dma_resv *resv,
2210 		       struct dma_fence **fence)
2211 {
2212 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2213 	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2214 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2215 
2216 	struct drm_mm_node *mm_node;
2217 	unsigned long num_pages;
2218 	unsigned int num_loops, num_dw;
2219 
2220 	struct amdgpu_job *job;
2221 	int r;
2222 
2223 	if (!adev->mman.buffer_funcs_enabled) {
2224 		DRM_ERROR("Trying to clear memory with ring turned off.\n");
2225 		return -EINVAL;
2226 	}
2227 
2228 	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
2229 		r = amdgpu_ttm_alloc_gart(&bo->tbo);
2230 		if (r)
2231 			return r;
2232 	}
2233 
2234 	num_pages = bo->tbo.num_pages;
2235 	mm_node = bo->tbo.mem.mm_node;
2236 	num_loops = 0;
2237 	while (num_pages) {
2238 		uint64_t byte_count = mm_node->size << PAGE_SHIFT;
2239 
2240 		num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes);
2241 		num_pages -= mm_node->size;
2242 		++mm_node;
2243 	}
2244 	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
2245 
2246 	/* for IB padding */
2247 	num_dw += 64;
2248 
2249 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
2250 				     &job);
2251 	if (r)
2252 		return r;
2253 
2254 	if (resv) {
2255 		r = amdgpu_sync_resv(adev, &job->sync, resv,
2256 				     AMDGPU_SYNC_ALWAYS,
2257 				     AMDGPU_FENCE_OWNER_UNDEFINED);
2258 		if (r) {
2259 			DRM_ERROR("sync failed (%d).\n", r);
2260 			goto error_free;
2261 		}
2262 	}
2263 
2264 	num_pages = bo->tbo.num_pages;
2265 	mm_node = bo->tbo.mem.mm_node;
2266 
2267 	while (num_pages) {
2268 		uint64_t byte_count = mm_node->size << PAGE_SHIFT;
2269 		uint64_t dst_addr;
2270 
2271 		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
2272 		while (byte_count) {
2273 			uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count,
2274 							   max_bytes);
2275 
2276 			amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
2277 						dst_addr, cur_size_in_bytes);
2278 
2279 			dst_addr += cur_size_in_bytes;
2280 			byte_count -= cur_size_in_bytes;
2281 		}
2282 
2283 		num_pages -= mm_node->size;
2284 		++mm_node;
2285 	}
2286 
2287 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
2288 	WARN_ON(job->ibs[0].length_dw > num_dw);
2289 	r = amdgpu_job_submit(job, &adev->mman.entity,
2290 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2291 	if (r)
2292 		goto error_free;
2293 
2294 	return 0;
2295 
2296 error_free:
2297 	amdgpu_job_free(job);
2298 	return r;
2299 }
2300 
2301 #if defined(CONFIG_DEBUG_FS)
2302 
2303 static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
2304 {
2305 	struct drm_info_node *node = (struct drm_info_node *)m->private;
2306 	unsigned ttm_pl = (uintptr_t)node->info_ent->data;
2307 	struct drm_device *dev = node->minor->dev;
2308 	struct amdgpu_device *adev = dev->dev_private;
2309 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
2310 	struct drm_printer p = drm_seq_file_printer(m);
2311 
2312 	man->func->debug(man, &p);
2313 	return 0;
2314 }
2315 
2316 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
2317 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
2318 	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
2319 	{"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
2320 	{"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
2321 	{"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
2322 	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
2323 #ifdef CONFIG_SWIOTLB
2324 	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
2325 #endif
2326 };
2327 
2328 /**
2329  * amdgpu_ttm_vram_read - Linear read access to VRAM
2330  *
2331  * Accesses VRAM via MMIO for debugging purposes.
2332  */
2333 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
2334 				    size_t size, loff_t *pos)
2335 {
2336 	struct amdgpu_device *adev = file_inode(f)->i_private;
2337 	ssize_t result = 0;
2338 
2339 	if (size & 0x3 || *pos & 0x3)
2340 		return -EINVAL;
2341 
2342 	if (*pos >= adev->gmc.mc_vram_size)
2343 		return -ENXIO;
2344 
2345 	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
2346 	while (size) {
2347 		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
2348 		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
2349 
2350 		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2351 		if (copy_to_user(buf, value, bytes))
2352 			return -EFAULT;
2353 
2354 		result += bytes;
2355 		buf += bytes;
2356 		*pos += bytes;
2357 		size -= bytes;
2358 	}
2359 
2360 	return result;
2361 }
2362 
2363 /**
2364  * amdgpu_ttm_vram_write - Linear write access to VRAM
2365  *
2366  * Accesses VRAM via MMIO for debugging purposes.
2367  */
2368 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
2369 				    size_t size, loff_t *pos)
2370 {
2371 	struct amdgpu_device *adev = file_inode(f)->i_private;
2372 	ssize_t result = 0;
2373 	int r;
2374 
2375 	if (size & 0x3 || *pos & 0x3)
2376 		return -EINVAL;
2377 
2378 	if (*pos >= adev->gmc.mc_vram_size)
2379 		return -ENXIO;
2380 
2381 	while (size) {
2382 		unsigned long flags;
2383 		uint32_t value;
2384 
2385 		if (*pos >= adev->gmc.mc_vram_size)
2386 			return result;
2387 
2388 		r = get_user(value, (uint32_t *)buf);
2389 		if (r)
2390 			return r;
2391 
2392 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
2393 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
2394 		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
2395 		WREG32_NO_KIQ(mmMM_DATA, value);
2396 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
2397 
2398 		result += 4;
2399 		buf += 4;
2400 		*pos += 4;
2401 		size -= 4;
2402 	}
2403 
2404 	return result;
2405 }
2406 
2407 static const struct file_operations amdgpu_ttm_vram_fops = {
2408 	.owner = THIS_MODULE,
2409 	.read = amdgpu_ttm_vram_read,
2410 	.write = amdgpu_ttm_vram_write,
2411 	.llseek = default_llseek,
2412 };
2413 
2414 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
2415 
2416 /**
2417  * amdgpu_ttm_gtt_read - Linear read access to GTT memory
2418  */
2419 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
2420 				   size_t size, loff_t *pos)
2421 {
2422 	struct amdgpu_device *adev = file_inode(f)->i_private;
2423 	ssize_t result = 0;
2424 	int r;
2425 
2426 	while (size) {
2427 		loff_t p = *pos / PAGE_SIZE;
2428 		unsigned off = *pos & ~PAGE_MASK;
2429 		size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
2430 		struct page *page;
2431 		void *ptr;
2432 
2433 		if (p >= adev->gart.num_cpu_pages)
2434 			return result;
2435 
2436 		page = adev->gart.pages[p];
2437 		if (page) {
2438 			ptr = kmap(page);
2439 			ptr += off;
2440 
2441 			r = copy_to_user(buf, ptr, cur_size);
2442 			kunmap(adev->gart.pages[p]);
2443 		} else
2444 			r = clear_user(buf, cur_size);
2445 
2446 		if (r)
2447 			return -EFAULT;
2448 
2449 		result += cur_size;
2450 		buf += cur_size;
2451 		*pos += cur_size;
2452 		size -= cur_size;
2453 	}
2454 
2455 	return result;
2456 }
2457 
2458 static const struct file_operations amdgpu_ttm_gtt_fops = {
2459 	.owner = THIS_MODULE,
2460 	.read = amdgpu_ttm_gtt_read,
2461 	.llseek = default_llseek
2462 };
2463 
2464 #endif
2465 
2466 /**
2467  * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2468  *
2469  * This function is used to read memory that has been mapped to the
2470  * GPU and the known addresses are not physical addresses but instead
2471  * bus addresses (e.g., what you'd put in an IB or ring buffer).
2472  */
2473 static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2474 				 size_t size, loff_t *pos)
2475 {
2476 	struct amdgpu_device *adev = file_inode(f)->i_private;
2477 	struct iommu_domain *dom;
2478 	ssize_t result = 0;
2479 	int r;
2480 
2481 	/* retrieve the IOMMU domain if any for this device */
2482 	dom = iommu_get_domain_for_dev(adev->dev);
2483 
2484 	while (size) {
2485 		phys_addr_t addr = *pos & PAGE_MASK;
2486 		loff_t off = *pos & ~PAGE_MASK;
2487 		size_t bytes = PAGE_SIZE - off;
2488 		unsigned long pfn;
2489 		struct page *p;
2490 		void *ptr;
2491 
2492 		bytes = bytes < size ? bytes : size;
2493 
2494 		/* Translate the bus address to a physical address.  If
2495 		 * the domain is NULL it means there is no IOMMU active
2496 		 * and the address translation is the identity
2497 		 */
2498 		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2499 
2500 		pfn = addr >> PAGE_SHIFT;
2501 		if (!pfn_valid(pfn))
2502 			return -EPERM;
2503 
2504 		p = pfn_to_page(pfn);
2505 		if (p->mapping != adev->mman.bdev.dev_mapping)
2506 			return -EPERM;
2507 
2508 		ptr = kmap(p);
2509 		r = copy_to_user(buf, ptr + off, bytes);
2510 		kunmap(p);
2511 		if (r)
2512 			return -EFAULT;
2513 
2514 		size -= bytes;
2515 		*pos += bytes;
2516 		result += bytes;
2517 	}
2518 
2519 	return result;
2520 }
2521 
2522 /**
2523  * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2524  *
2525  * This function is used to write memory that has been mapped to the
2526  * GPU and the known addresses are not physical addresses but instead
2527  * bus addresses (e.g., what you'd put in an IB or ring buffer).
2528  */
2529 static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2530 				 size_t size, loff_t *pos)
2531 {
2532 	struct amdgpu_device *adev = file_inode(f)->i_private;
2533 	struct iommu_domain *dom;
2534 	ssize_t result = 0;
2535 	int r;
2536 
2537 	dom = iommu_get_domain_for_dev(adev->dev);
2538 
2539 	while (size) {
2540 		phys_addr_t addr = *pos & PAGE_MASK;
2541 		loff_t off = *pos & ~PAGE_MASK;
2542 		size_t bytes = PAGE_SIZE - off;
2543 		unsigned long pfn;
2544 		struct page *p;
2545 		void *ptr;
2546 
2547 		bytes = bytes < size ? bytes : size;
2548 
2549 		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2550 
2551 		pfn = addr >> PAGE_SHIFT;
2552 		if (!pfn_valid(pfn))
2553 			return -EPERM;
2554 
2555 		p = pfn_to_page(pfn);
2556 		if (p->mapping != adev->mman.bdev.dev_mapping)
2557 			return -EPERM;
2558 
2559 		ptr = kmap(p);
2560 		r = copy_from_user(ptr + off, buf, bytes);
2561 		kunmap(p);
2562 		if (r)
2563 			return -EFAULT;
2564 
2565 		size -= bytes;
2566 		*pos += bytes;
2567 		result += bytes;
2568 	}
2569 
2570 	return result;
2571 }
2572 
2573 static const struct file_operations amdgpu_ttm_iomem_fops = {
2574 	.owner = THIS_MODULE,
2575 	.read = amdgpu_iomem_read,
2576 	.write = amdgpu_iomem_write,
2577 	.llseek = default_llseek
2578 };
2579 
2580 static const struct {
2581 	char *name;
2582 	const struct file_operations *fops;
2583 	int domain;
2584 } ttm_debugfs_entries[] = {
2585 	{ "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
2586 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
2587 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
2588 #endif
2589 	{ "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
2590 };
2591 
2592 #endif
2593 
2594 int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2595 {
2596 #if defined(CONFIG_DEBUG_FS)
2597 	unsigned count;
2598 
2599 	struct drm_minor *minor = adev->ddev->primary;
2600 	struct dentry *ent, *root = minor->debugfs_root;
2601 
2602 	for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
2603 		ent = debugfs_create_file(
2604 				ttm_debugfs_entries[count].name,
2605 				S_IFREG | S_IRUGO, root,
2606 				adev,
2607 				ttm_debugfs_entries[count].fops);
2608 		if (IS_ERR(ent))
2609 			return PTR_ERR(ent);
2610 		if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
2611 			i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
2612 		else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
2613 			i_size_write(ent->d_inode, adev->gmc.gart_size);
2614 		adev->mman.debugfs_entries[count] = ent;
2615 	}
2616 
2617 	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
2618 
2619 #ifdef CONFIG_SWIOTLB
2620 	if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
2621 		--count;
2622 #endif
2623 
2624 	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
2625 #else
2626 	return 0;
2627 #endif
2628 }
2629