1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "amdgpu.h" 25 #include "amdgpu_atomfirmware.h" 26 #include "gmc_v10_0.h" 27 28 #include "hdp/hdp_5_0_0_offset.h" 29 #include "hdp/hdp_5_0_0_sh_mask.h" 30 #include "gc/gc_10_1_0_sh_mask.h" 31 #include "mmhub/mmhub_2_0_0_sh_mask.h" 32 #include "dcn/dcn_2_0_0_offset.h" 33 #include "dcn/dcn_2_0_0_sh_mask.h" 34 #include "oss/osssys_5_0_0_offset.h" 35 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 36 #include "navi10_enum.h" 37 38 #include "soc15.h" 39 #include "soc15_common.h" 40 41 #include "nbio_v2_3.h" 42 43 #include "gfxhub_v2_0.h" 44 #include "mmhub_v2_0.h" 45 #include "athub_v2_0.h" 46 /* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ 47 #define AMDGPU_NUM_OF_VMIDS 8 48 49 #if 0 50 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = 51 { 52 /* TODO add golden setting for hdp */ 53 }; 54 #endif 55 56 static int 57 gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, 58 struct amdgpu_irq_src *src, unsigned type, 59 enum amdgpu_interrupt_state state) 60 { 61 struct amdgpu_vmhub *hub; 62 u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; 63 64 bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 65 GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 66 GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 67 GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 68 GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 69 GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 70 GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 71 72 bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 73 MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 74 MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 75 MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 76 MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 77 MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 78 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; 79 80 switch (state) { 81 case AMDGPU_IRQ_STATE_DISABLE: 82 /* MM HUB */ 83 hub = &adev->vmhub[AMDGPU_MMHUB]; 84 for (i = 0; i < 16; i++) { 85 reg = hub->vm_context0_cntl + i; 86 tmp = RREG32(reg); 87 tmp &= ~bits[AMDGPU_MMHUB]; 88 WREG32(reg, tmp); 89 } 90 91 /* GFX HUB */ 92 hub = &adev->vmhub[AMDGPU_GFXHUB]; 93 for (i = 0; i < 16; i++) { 94 reg = hub->vm_context0_cntl + i; 95 tmp = RREG32(reg); 96 tmp &= ~bits[AMDGPU_GFXHUB]; 97 WREG32(reg, tmp); 98 } 99 break; 100 case AMDGPU_IRQ_STATE_ENABLE: 101 /* MM HUB */ 102 hub = &adev->vmhub[AMDGPU_MMHUB]; 103 for (i = 0; i < 16; i++) { 104 reg = hub->vm_context0_cntl + i; 105 tmp = RREG32(reg); 106 tmp |= bits[AMDGPU_MMHUB]; 107 WREG32(reg, tmp); 108 } 109 110 /* GFX HUB */ 111 hub = &adev->vmhub[AMDGPU_GFXHUB]; 112 for (i = 0; i < 16; i++) { 113 reg = hub->vm_context0_cntl + i; 114 tmp = RREG32(reg); 115 tmp |= bits[AMDGPU_GFXHUB]; 116 WREG32(reg, tmp); 117 } 118 break; 119 default: 120 break; 121 } 122 123 return 0; 124 } 125 126 static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, 127 struct amdgpu_irq_src *source, 128 struct amdgpu_iv_entry *entry) 129 { 130 struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; 131 uint32_t status = 0; 132 u64 addr; 133 134 addr = (u64)entry->src_data[0] << 12; 135 addr |= ((u64)entry->src_data[1] & 0xf) << 44; 136 137 if (!amdgpu_sriov_vf(adev)) { 138 status = RREG32(hub->vm_l2_pro_fault_status); 139 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); 140 } 141 142 if (printk_ratelimit()) { 143 dev_err(adev->dev, 144 "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", 145 entry->vmid_src ? "mmhub" : "gfxhub", 146 entry->src_id, entry->ring_id, entry->vmid, 147 entry->pasid); 148 dev_err(adev->dev, " at page 0x%016llx from %d\n", 149 addr, entry->client_id); 150 if (!amdgpu_sriov_vf(adev)) 151 dev_err(adev->dev, 152 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", 153 status); 154 } 155 156 return 0; 157 } 158 159 static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = { 160 .set = gmc_v10_0_vm_fault_interrupt_state, 161 .process = gmc_v10_0_process_interrupt, 162 }; 163 164 static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) 165 { 166 adev->gmc.vm_fault.num_types = 1; 167 adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; 168 } 169 170 static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, 171 uint32_t flush_type) 172 { 173 u32 req = 0; 174 175 /* invalidate using legacy mode on vmid*/ 176 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, 177 PER_VMID_INVALIDATE_REQ, 1 << vmid); 178 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); 179 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); 180 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); 181 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); 182 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); 183 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); 184 req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, 185 CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); 186 187 return req; 188 } 189 190 /* 191 * GART 192 * VMID 0 is the physical GPU addresses as used by the kernel. 193 * VMIDs 1-15 are used for userspace clients and are handled 194 * by the amdgpu vm/hsa code. 195 */ 196 197 static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, 198 unsigned int vmhub, uint32_t flush_type) 199 { 200 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 201 u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); 202 /* Use register 17 for GART */ 203 const unsigned eng = 17; 204 unsigned int i; 205 206 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); 207 208 /* Wait for ACK with a delay.*/ 209 for (i = 0; i < adev->usec_timeout; i++) { 210 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); 211 tmp &= 1 << vmid; 212 if (tmp) 213 break; 214 215 udelay(1); 216 } 217 218 if (i < adev->usec_timeout) 219 return; 220 221 DRM_ERROR("Timeout waiting for VM flush ACK!\n"); 222 } 223 224 /** 225 * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback 226 * 227 * @adev: amdgpu_device pointer 228 * @vmid: vm instance to flush 229 * 230 * Flush the TLB for the requested page table. 231 */ 232 static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, 233 uint32_t vmid, uint32_t flush_type) 234 { 235 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 236 struct dma_fence *fence; 237 struct amdgpu_job *job; 238 239 int r; 240 241 /* flush hdp cache */ 242 adev->nbio_funcs->hdp_flush(adev, NULL); 243 244 mutex_lock(&adev->mman.gtt_window_lock); 245 246 gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); 247 if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || 248 adev->asic_type != CHIP_NAVI10) { 249 gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); 250 mutex_unlock(&adev->mman.gtt_window_lock); 251 return; 252 } 253 254 /* The SDMA on Navi has a bug which can theoretically result in memory 255 * corruption if an invalidation happens at the same time as an VA 256 * translation. Avoid this by doing the invalidation from the SDMA 257 * itself. 258 */ 259 r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job); 260 if (r) 261 goto error_alloc; 262 263 job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); 264 job->vm_needs_flush = true; 265 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 266 r = amdgpu_job_submit(job, &adev->mman.entity, 267 AMDGPU_FENCE_OWNER_UNDEFINED, &fence); 268 if (r) 269 goto error_submit; 270 271 mutex_unlock(&adev->mman.gtt_window_lock); 272 273 dma_fence_wait(fence, false); 274 dma_fence_put(fence); 275 276 return; 277 278 error_submit: 279 amdgpu_job_free(job); 280 281 error_alloc: 282 mutex_unlock(&adev->mman.gtt_window_lock); 283 DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); 284 } 285 286 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 287 unsigned vmid, uint64_t pd_addr) 288 { 289 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 290 uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); 291 unsigned eng = ring->vm_inv_eng; 292 293 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 294 lower_32_bits(pd_addr)); 295 296 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), 297 upper_32_bits(pd_addr)); 298 299 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); 300 301 /* wait for the invalidate to complete */ 302 amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, 303 1 << vmid, 1 << vmid); 304 305 return pd_addr; 306 } 307 308 static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, 309 unsigned pasid) 310 { 311 struct amdgpu_device *adev = ring->adev; 312 uint32_t reg; 313 314 if (ring->funcs->vmhub == AMDGPU_GFXHUB) 315 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; 316 else 317 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; 318 319 amdgpu_ring_emit_wreg(ring, reg, pasid); 320 } 321 322 /* 323 * PTE format on NAVI 10: 324 * 63:59 reserved 325 * 58:57 reserved 326 * 56 F 327 * 55 L 328 * 54 reserved 329 * 53:52 SW 330 * 51 T 331 * 50:48 mtype 332 * 47:12 4k physical page base address 333 * 11:7 fragment 334 * 6 write 335 * 5 read 336 * 4 exe 337 * 3 Z 338 * 2 snooped 339 * 1 system 340 * 0 valid 341 * 342 * PDE format on NAVI 10: 343 * 63:59 block fragment size 344 * 58:55 reserved 345 * 54 P 346 * 53:48 reserved 347 * 47:6 physical base address of PD or PTE 348 * 5:3 reserved 349 * 2 C 350 * 1 system 351 * 0 valid 352 */ 353 static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, 354 uint32_t flags) 355 { 356 uint64_t pte_flag = 0; 357 358 if (flags & AMDGPU_VM_PAGE_EXECUTABLE) 359 pte_flag |= AMDGPU_PTE_EXECUTABLE; 360 if (flags & AMDGPU_VM_PAGE_READABLE) 361 pte_flag |= AMDGPU_PTE_READABLE; 362 if (flags & AMDGPU_VM_PAGE_WRITEABLE) 363 pte_flag |= AMDGPU_PTE_WRITEABLE; 364 365 switch (flags & AMDGPU_VM_MTYPE_MASK) { 366 case AMDGPU_VM_MTYPE_DEFAULT: 367 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 368 break; 369 case AMDGPU_VM_MTYPE_NC: 370 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 371 break; 372 case AMDGPU_VM_MTYPE_WC: 373 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); 374 break; 375 case AMDGPU_VM_MTYPE_CC: 376 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); 377 break; 378 case AMDGPU_VM_MTYPE_UC: 379 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); 380 break; 381 default: 382 pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); 383 break; 384 } 385 386 if (flags & AMDGPU_VM_PAGE_PRT) 387 pte_flag |= AMDGPU_PTE_PRT; 388 389 return pte_flag; 390 } 391 392 static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, 393 uint64_t *addr, uint64_t *flags) 394 { 395 if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) 396 *addr = adev->vm_manager.vram_base_offset + *addr - 397 adev->gmc.vram_start; 398 BUG_ON(*addr & 0xFFFF00000000003FULL); 399 400 if (!adev->gmc.translate_further) 401 return; 402 403 if (level == AMDGPU_VM_PDB1) { 404 /* Set the block fragment size */ 405 if (!(*flags & AMDGPU_PDE_PTE)) 406 *flags |= AMDGPU_PDE_BFS(0x9); 407 408 } else if (level == AMDGPU_VM_PDB0) { 409 if (*flags & AMDGPU_PDE_PTE) 410 *flags &= ~AMDGPU_PDE_PTE; 411 else 412 *flags |= AMDGPU_PTE_TF; 413 } 414 } 415 416 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { 417 .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, 418 .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, 419 .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, 420 .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, 421 .get_vm_pde = gmc_v10_0_get_vm_pde 422 }; 423 424 static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) 425 { 426 if (adev->gmc.gmc_funcs == NULL) 427 adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs; 428 } 429 430 static int gmc_v10_0_early_init(void *handle) 431 { 432 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 433 434 gmc_v10_0_set_gmc_funcs(adev); 435 gmc_v10_0_set_irq_funcs(adev); 436 437 adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 438 adev->gmc.shared_aperture_end = 439 adev->gmc.shared_aperture_start + (4ULL << 30) - 1; 440 adev->gmc.private_aperture_start = 0x1000000000000000ULL; 441 adev->gmc.private_aperture_end = 442 adev->gmc.private_aperture_start + (4ULL << 30) - 1; 443 444 return 0; 445 } 446 447 static int gmc_v10_0_late_init(void *handle) 448 { 449 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 450 unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; 451 unsigned i; 452 453 for(i = 0; i < adev->num_rings; ++i) { 454 struct amdgpu_ring *ring = adev->rings[i]; 455 unsigned vmhub = ring->funcs->vmhub; 456 457 ring->vm_inv_eng = vm_inv_eng[vmhub]++; 458 dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", 459 ring->idx, ring->name, ring->vm_inv_eng, 460 ring->funcs->vmhub); 461 } 462 463 /* Engine 17 is used for GART flushes */ 464 for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) 465 BUG_ON(vm_inv_eng[i] > 17); 466 467 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 468 } 469 470 static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, 471 struct amdgpu_gmc *mc) 472 { 473 u64 base = 0; 474 475 if (!amdgpu_sriov_vf(adev)) 476 base = gfxhub_v2_0_get_fb_location(adev); 477 478 amdgpu_gmc_vram_location(adev, &adev->gmc, base); 479 amdgpu_gmc_gart_location(adev, mc); 480 481 /* base offset of vram pages */ 482 adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); 483 } 484 485 /** 486 * gmc_v10_0_mc_init - initialize the memory controller driver params 487 * 488 * @adev: amdgpu_device pointer 489 * 490 * Look up the amount of vram, vram width, and decide how to place 491 * vram and gart within the GPU's physical address space. 492 * Returns 0 for success. 493 */ 494 static int gmc_v10_0_mc_init(struct amdgpu_device *adev) 495 { 496 int chansize, numchan; 497 498 if (!amdgpu_emu_mode) 499 adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); 500 else { 501 /* hard code vram_width for emulation */ 502 chansize = 128; 503 numchan = 1; 504 adev->gmc.vram_width = numchan * chansize; 505 } 506 507 /* Could aper size report 0 ? */ 508 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); 509 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); 510 511 /* size in MB on si */ 512 adev->gmc.mc_vram_size = 513 adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; 514 adev->gmc.real_vram_size = adev->gmc.mc_vram_size; 515 adev->gmc.visible_vram_size = adev->gmc.aper_size; 516 517 /* In case the PCI BAR is larger than the actual amount of vram */ 518 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) 519 adev->gmc.visible_vram_size = adev->gmc.real_vram_size; 520 521 /* set the gart size */ 522 if (amdgpu_gart_size == -1) { 523 switch (adev->asic_type) { 524 case CHIP_NAVI10: 525 default: 526 adev->gmc.gart_size = 512ULL << 20; 527 break; 528 } 529 } else 530 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; 531 532 gmc_v10_0_vram_gtt_location(adev, &adev->gmc); 533 534 return 0; 535 } 536 537 static int gmc_v10_0_gart_init(struct amdgpu_device *adev) 538 { 539 int r; 540 541 if (adev->gart.bo) { 542 WARN(1, "NAVI10 PCIE GART already initialized\n"); 543 return 0; 544 } 545 546 /* Initialize common gart structure */ 547 r = amdgpu_gart_init(adev); 548 if (r) 549 return r; 550 551 adev->gart.table_size = adev->gart.num_gpu_pages * 8; 552 adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | 553 AMDGPU_PTE_EXECUTABLE; 554 555 return amdgpu_gart_table_vram_alloc(adev); 556 } 557 558 static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) 559 { 560 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); 561 unsigned size; 562 563 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { 564 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ 565 } else { 566 u32 viewport; 567 u32 pitch; 568 569 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); 570 pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); 571 size = (REG_GET_FIELD(viewport, 572 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * 573 REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * 574 4); 575 } 576 /* return 0 if the pre-OS buffer uses up most of vram */ 577 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) { 578 DRM_ERROR("Warning: pre-OS buffer uses most of vram, \ 579 be aware of gart table overwrite\n"); 580 return 0; 581 } 582 583 return size; 584 } 585 586 587 588 static int gmc_v10_0_sw_init(void *handle) 589 { 590 int r; 591 int dma_bits; 592 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 593 594 gfxhub_v2_0_init(adev); 595 mmhub_v2_0_init(adev); 596 597 spin_lock_init(&adev->gmc.invalidate_lock); 598 599 adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); 600 switch (adev->asic_type) { 601 case CHIP_NAVI10: 602 /* 603 * To fulfill 4-level page support, 604 * vm size is 256TB (48bit), maximum size of Navi10, 605 * block size 512 (9bit) 606 */ 607 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 608 break; 609 default: 610 break; 611 } 612 613 /* This interrupt is VMC page fault.*/ 614 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 615 VMC_1_0__SRCID__VM_FAULT, 616 &adev->gmc.vm_fault); 617 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 618 UTCL2_1_0__SRCID__FAULT, 619 &adev->gmc.vm_fault); 620 if (r) 621 return r; 622 623 /* 624 * Set the internal MC address mask This is the max address of the GPU's 625 * internal address space. 626 */ 627 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 628 629 /* 630 * Reserve 8M stolen memory for navi10 like vega10 631 * TODO: will check if it's really needed on asic. 632 */ 633 if (amdgpu_emu_mode == 1) 634 adev->gmc.stolen_size = 0; 635 else 636 adev->gmc.stolen_size = 9 * 1024 *1024; 637 638 /* 639 * Set DMA mask + need_dma32 flags. 640 * PCIE - can handle 44-bits. 641 * IGP - can handle 44-bits 642 * PCI - dma32 for legacy pci gart, 44 bits on navi10 643 */ 644 adev->need_dma32 = false; 645 dma_bits = adev->need_dma32 ? 32 : 44; 646 647 r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 648 if (r) { 649 adev->need_dma32 = true; 650 dma_bits = 32; 651 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); 652 } 653 654 r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 655 if (r) { 656 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 657 printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); 658 } 659 660 r = gmc_v10_0_mc_init(adev); 661 if (r) 662 return r; 663 664 adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); 665 666 /* Memory manager */ 667 r = amdgpu_bo_init(adev); 668 if (r) 669 return r; 670 671 r = gmc_v10_0_gart_init(adev); 672 if (r) 673 return r; 674 675 /* 676 * number of VMs 677 * VMID 0 is reserved for System 678 * amdgpu graphics/compute will use VMIDs 1-7 679 * amdkfd will use VMIDs 8-15 680 */ 681 adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 682 adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; 683 684 amdgpu_vm_manager_init(adev); 685 686 return 0; 687 } 688 689 /** 690 * gmc_v8_0_gart_fini - vm fini callback 691 * 692 * @adev: amdgpu_device pointer 693 * 694 * Tears down the driver GART/VM setup (CIK). 695 */ 696 static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) 697 { 698 amdgpu_gart_table_vram_free(adev); 699 amdgpu_gart_fini(adev); 700 } 701 702 static int gmc_v10_0_sw_fini(void *handle) 703 { 704 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 705 706 amdgpu_vm_manager_fini(adev); 707 gmc_v10_0_gart_fini(adev); 708 amdgpu_gem_force_release(adev); 709 amdgpu_bo_fini(adev); 710 711 return 0; 712 } 713 714 static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) 715 { 716 switch (adev->asic_type) { 717 case CHIP_NAVI10: 718 break; 719 default: 720 break; 721 } 722 } 723 724 /** 725 * gmc_v10_0_gart_enable - gart enable 726 * 727 * @adev: amdgpu_device pointer 728 */ 729 static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) 730 { 731 int r; 732 bool value; 733 u32 tmp; 734 735 if (adev->gart.bo == NULL) { 736 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 737 return -EINVAL; 738 } 739 740 r = amdgpu_gart_table_vram_pin(adev); 741 if (r) 742 return r; 743 744 r = gfxhub_v2_0_gart_enable(adev); 745 if (r) 746 return r; 747 748 r = mmhub_v2_0_gart_enable(adev); 749 if (r) 750 return r; 751 752 tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); 753 tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; 754 WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); 755 756 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); 757 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 758 759 /* Flush HDP after it is initialized */ 760 adev->nbio_funcs->hdp_flush(adev, NULL); 761 762 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 763 false : true; 764 765 gfxhub_v2_0_set_fault_enable_default(adev, value); 766 mmhub_v2_0_set_fault_enable_default(adev, value); 767 gmc_v10_0_flush_gpu_tlb(adev, 0, 0); 768 769 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 770 (unsigned)(adev->gmc.gart_size >> 20), 771 (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); 772 773 adev->gart.ready = true; 774 775 return 0; 776 } 777 778 static int gmc_v10_0_hw_init(void *handle) 779 { 780 int r; 781 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 782 783 /* The sequence of these two function calls matters.*/ 784 gmc_v10_0_init_golden_registers(adev); 785 786 r = gmc_v10_0_gart_enable(adev); 787 if (r) 788 return r; 789 790 return 0; 791 } 792 793 /** 794 * gmc_v10_0_gart_disable - gart disable 795 * 796 * @adev: amdgpu_device pointer 797 * 798 * This disables all VM page table. 799 */ 800 static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) 801 { 802 gfxhub_v2_0_gart_disable(adev); 803 mmhub_v2_0_gart_disable(adev); 804 amdgpu_gart_table_vram_unpin(adev); 805 } 806 807 static int gmc_v10_0_hw_fini(void *handle) 808 { 809 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 810 811 if (amdgpu_sriov_vf(adev)) { 812 /* full access mode, so don't touch any GMC register */ 813 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 814 return 0; 815 } 816 817 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); 818 gmc_v10_0_gart_disable(adev); 819 820 return 0; 821 } 822 823 static int gmc_v10_0_suspend(void *handle) 824 { 825 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 826 827 gmc_v10_0_hw_fini(adev); 828 829 return 0; 830 } 831 832 static int gmc_v10_0_resume(void *handle) 833 { 834 int r; 835 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 836 837 r = gmc_v10_0_hw_init(adev); 838 if (r) 839 return r; 840 841 amdgpu_vmid_reset_all(adev); 842 843 return 0; 844 } 845 846 static bool gmc_v10_0_is_idle(void *handle) 847 { 848 /* MC is always ready in GMC v10.*/ 849 return true; 850 } 851 852 static int gmc_v10_0_wait_for_idle(void *handle) 853 { 854 /* There is no need to wait for MC idle in GMC v10.*/ 855 return 0; 856 } 857 858 static int gmc_v10_0_soft_reset(void *handle) 859 { 860 return 0; 861 } 862 863 static int gmc_v10_0_set_clockgating_state(void *handle, 864 enum amd_clockgating_state state) 865 { 866 int r; 867 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 868 869 r = mmhub_v2_0_set_clockgating(adev, state); 870 if (r) 871 return r; 872 873 return athub_v2_0_set_clockgating(adev, state); 874 } 875 876 static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) 877 { 878 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 879 880 mmhub_v2_0_get_clockgating(adev, flags); 881 882 athub_v2_0_get_clockgating(adev, flags); 883 } 884 885 static int gmc_v10_0_set_powergating_state(void *handle, 886 enum amd_powergating_state state) 887 { 888 return 0; 889 } 890 891 const struct amd_ip_funcs gmc_v10_0_ip_funcs = { 892 .name = "gmc_v10_0", 893 .early_init = gmc_v10_0_early_init, 894 .late_init = gmc_v10_0_late_init, 895 .sw_init = gmc_v10_0_sw_init, 896 .sw_fini = gmc_v10_0_sw_fini, 897 .hw_init = gmc_v10_0_hw_init, 898 .hw_fini = gmc_v10_0_hw_fini, 899 .suspend = gmc_v10_0_suspend, 900 .resume = gmc_v10_0_resume, 901 .is_idle = gmc_v10_0_is_idle, 902 .wait_for_idle = gmc_v10_0_wait_for_idle, 903 .soft_reset = gmc_v10_0_soft_reset, 904 .set_clockgating_state = gmc_v10_0_set_clockgating_state, 905 .set_powergating_state = gmc_v10_0_set_powergating_state, 906 .get_clockgating_state = gmc_v10_0_get_clockgating_state, 907 }; 908 909 const struct amdgpu_ip_block_version gmc_v10_0_ip_block = 910 { 911 .type = AMD_IP_BLOCK_TYPE_GMC, 912 .major = 10, 913 .minor = 0, 914 .rev = 0, 915 .funcs = &gmc_v10_0_ip_funcs, 916 }; 917