1 /*
2  * Copyright 2018 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/io-64-nonatomic-lo-hi.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_ras.h"
31 #include "amdgpu_xgmi.h"
32 
33 /**
34  * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
35  *
36  * @bo: the BO to get the PDE for
37  * @level: the level in the PD hirarchy
38  * @addr: resulting addr
39  * @flags: resulting flags
40  *
41  * Get the address and flags to be used for a PDE (Page Directory Entry).
42  */
43 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
44 			       uint64_t *addr, uint64_t *flags)
45 {
46 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
47 	struct ttm_dma_tt *ttm;
48 
49 	switch (bo->tbo.mem.mem_type) {
50 	case TTM_PL_TT:
51 		ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
52 		*addr = ttm->dma_address[0];
53 		break;
54 	case TTM_PL_VRAM:
55 		*addr = amdgpu_bo_gpu_offset(bo);
56 		break;
57 	default:
58 		*addr = 0;
59 		break;
60 	}
61 	*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
62 	amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
63 }
64 
65 /**
66  * amdgpu_gmc_pd_addr - return the address of the root directory
67  *
68  */
69 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
70 {
71 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
72 	uint64_t pd_addr;
73 
74 	/* TODO: move that into ASIC specific code */
75 	if (adev->asic_type >= CHIP_VEGA10) {
76 		uint64_t flags = AMDGPU_PTE_VALID;
77 
78 		amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
79 		pd_addr |= flags;
80 	} else {
81 		pd_addr = amdgpu_bo_gpu_offset(bo);
82 	}
83 	return pd_addr;
84 }
85 
86 /**
87  * amdgpu_gmc_set_pte_pde - update the page tables using CPU
88  *
89  * @adev: amdgpu_device pointer
90  * @cpu_pt_addr: cpu address of the page table
91  * @gpu_page_idx: entry in the page table to update
92  * @addr: dst addr to write into pte/pde
93  * @flags: access flags
94  *
95  * Update the page tables using CPU.
96  */
97 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
98 				uint32_t gpu_page_idx, uint64_t addr,
99 				uint64_t flags)
100 {
101 	void __iomem *ptr = (void *)cpu_pt_addr;
102 	uint64_t value;
103 
104 	/*
105 	 * The following is for PTE only. GART does not have PDEs.
106 	*/
107 	value = addr & 0x0000FFFFFFFFF000ULL;
108 	value |= flags;
109 	writeq(value, ptr + (gpu_page_idx * 8));
110 	return 0;
111 }
112 
113 /**
114  * amdgpu_gmc_agp_addr - return the address in the AGP address space
115  *
116  * @tbo: TTM BO which needs the address, must be in GTT domain
117  *
118  * Tries to figure out how to access the BO through the AGP aperture. Returns
119  * AMDGPU_BO_INVALID_OFFSET if that is not possible.
120  */
121 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
122 {
123 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
124 	struct ttm_dma_tt *ttm;
125 
126 	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
127 		return AMDGPU_BO_INVALID_OFFSET;
128 
129 	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
130 	if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
131 		return AMDGPU_BO_INVALID_OFFSET;
132 
133 	return adev->gmc.agp_start + ttm->dma_address[0];
134 }
135 
136 /**
137  * amdgpu_gmc_vram_location - try to find VRAM location
138  *
139  * @adev: amdgpu device structure holding all necessary informations
140  * @mc: memory controller structure holding memory informations
141  * @base: base address at which to put VRAM
142  *
143  * Function will try to place VRAM at base address provided
144  * as parameter.
145  */
146 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
147 			      u64 base)
148 {
149 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
150 
151 	mc->vram_start = base;
152 	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
153 	if (limit && limit < mc->real_vram_size)
154 		mc->real_vram_size = limit;
155 
156 	if (mc->xgmi.num_physical_nodes == 0) {
157 		mc->fb_start = mc->vram_start;
158 		mc->fb_end = mc->vram_end;
159 	}
160 	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
161 			mc->mc_vram_size >> 20, mc->vram_start,
162 			mc->vram_end, mc->real_vram_size >> 20);
163 }
164 
165 /**
166  * amdgpu_gmc_gart_location - try to find GART location
167  *
168  * @adev: amdgpu device structure holding all necessary informations
169  * @mc: memory controller structure holding memory informations
170  *
171  * Function will place try to place GART before or after VRAM.
172  *
173  * If GART size is bigger than space left then we ajust GART size.
174  * Thus function will never fails.
175  */
176 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
177 {
178 	const uint64_t four_gb = 0x100000000ULL;
179 	u64 size_af, size_bf;
180 	/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
181 	u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
182 
183 	mc->gart_size += adev->pm.smu_prv_buffer_size;
184 
185 	/* VCE doesn't like it when BOs cross a 4GB segment, so align
186 	 * the GART base on a 4GB boundary as well.
187 	 */
188 	size_bf = mc->fb_start;
189 	size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
190 
191 	if (mc->gart_size > max(size_bf, size_af)) {
192 		dev_warn(adev->dev, "limiting GART\n");
193 		mc->gart_size = max(size_bf, size_af);
194 	}
195 
196 	if ((size_bf >= mc->gart_size && size_bf < size_af) ||
197 	    (size_af < mc->gart_size))
198 		mc->gart_start = 0;
199 	else
200 		mc->gart_start = max_mc_address - mc->gart_size + 1;
201 
202 	mc->gart_start &= ~(four_gb - 1);
203 	mc->gart_end = mc->gart_start + mc->gart_size - 1;
204 	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
205 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
206 }
207 
208 /**
209  * amdgpu_gmc_agp_location - try to find AGP location
210  * @adev: amdgpu device structure holding all necessary informations
211  * @mc: memory controller structure holding memory informations
212  *
213  * Function will place try to find a place for the AGP BAR in the MC address
214  * space.
215  *
216  * AGP BAR will be assigned the largest available hole in the address space.
217  * Should be called after VRAM and GART locations are setup.
218  */
219 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
220 {
221 	const uint64_t sixteen_gb = 1ULL << 34;
222 	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
223 	u64 size_af, size_bf;
224 
225 	if (amdgpu_sriov_vf(adev)) {
226 		mc->agp_start = 0xffffffffffff;
227 		mc->agp_end = 0x0;
228 		mc->agp_size = 0;
229 
230 		return;
231 	}
232 
233 	if (mc->fb_start > mc->gart_start) {
234 		size_bf = (mc->fb_start & sixteen_gb_mask) -
235 			ALIGN(mc->gart_end + 1, sixteen_gb);
236 		size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
237 	} else {
238 		size_bf = mc->fb_start & sixteen_gb_mask;
239 		size_af = (mc->gart_start & sixteen_gb_mask) -
240 			ALIGN(mc->fb_end + 1, sixteen_gb);
241 	}
242 
243 	if (size_bf > size_af) {
244 		mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
245 		mc->agp_size = size_bf;
246 	} else {
247 		mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
248 		mc->agp_size = size_af;
249 	}
250 
251 	mc->agp_end = mc->agp_start + mc->agp_size - 1;
252 	dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
253 			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
254 }
255 
256 /**
257  * amdgpu_gmc_filter_faults - filter VM faults
258  *
259  * @adev: amdgpu device structure
260  * @addr: address of the VM fault
261  * @pasid: PASID of the process causing the fault
262  * @timestamp: timestamp of the fault
263  *
264  * Returns:
265  * True if the fault was filtered and should not be processed further.
266  * False if the fault is a new one and needs to be handled.
267  */
268 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
269 			      uint16_t pasid, uint64_t timestamp)
270 {
271 	struct amdgpu_gmc *gmc = &adev->gmc;
272 
273 	uint64_t stamp, key = addr << 4 | pasid;
274 	struct amdgpu_gmc_fault *fault;
275 	uint32_t hash;
276 
277 	/* If we don't have space left in the ring buffer return immediately */
278 	stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
279 		AMDGPU_GMC_FAULT_TIMEOUT;
280 	if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
281 		return true;
282 
283 	/* Try to find the fault in the hash */
284 	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
285 	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
286 	while (fault->timestamp >= stamp) {
287 		uint64_t tmp;
288 
289 		if (fault->key == key)
290 			return true;
291 
292 		tmp = fault->timestamp;
293 		fault = &gmc->fault_ring[fault->next];
294 
295 		/* Check if the entry was reused */
296 		if (fault->timestamp >= tmp)
297 			break;
298 	}
299 
300 	/* Add the fault to the ring */
301 	fault = &gmc->fault_ring[gmc->last_fault];
302 	fault->key = key;
303 	fault->timestamp = timestamp;
304 
305 	/* And update the hash */
306 	fault->next = gmc->fault_hash[hash].idx;
307 	gmc->fault_hash[hash].idx = gmc->last_fault++;
308 	return false;
309 }
310 
311 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
312 {
313 	int r;
314 
315 	if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
316 		r = adev->umc.funcs->ras_late_init(adev);
317 		if (r)
318 			return r;
319 	}
320 
321 	if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
322 		r = adev->mmhub.funcs->ras_late_init(adev);
323 		if (r)
324 			return r;
325 	}
326 
327 	return amdgpu_xgmi_ras_late_init(adev);
328 }
329 
330 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
331 {
332 	amdgpu_umc_ras_fini(adev);
333 	amdgpu_mmhub_ras_fini(adev);
334 	amdgpu_xgmi_ras_fini(adev);
335 }
336 
337 	/*
338 	 * The latest engine allocation on gfx9/10 is:
339 	 * Engine 2, 3: firmware
340 	 * Engine 0, 1, 4~16: amdgpu ring,
341 	 *                    subject to change when ring number changes
342 	 * Engine 17: Gart flushes
343 	 */
344 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
345 #define MMHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
346 
347 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
348 {
349 	struct amdgpu_ring *ring;
350 	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
351 		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
352 		GFXHUB_FREE_VM_INV_ENGS_BITMAP};
353 	unsigned i;
354 	unsigned vmhub, inv_eng;
355 
356 	for (i = 0; i < adev->num_rings; ++i) {
357 		ring = adev->rings[i];
358 		vmhub = ring->funcs->vmhub;
359 
360 		inv_eng = ffs(vm_inv_engs[vmhub]);
361 		if (!inv_eng) {
362 			dev_err(adev->dev, "no VM inv eng for ring %s\n",
363 				ring->name);
364 			return -EINVAL;
365 		}
366 
367 		ring->vm_inv_eng = inv_eng - 1;
368 		vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
369 
370 		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
371 			 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
372 	}
373 
374 	return 0;
375 }
376