1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/module.h> 25 26 #include <drm/drm_drv.h> 27 28 #include "amdgpu.h" 29 #include "amdgpu_ras.h" 30 #include "vi.h" 31 #include "soc15.h" 32 #include "nv.h" 33 34 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) 35 { 36 /* By now all MMIO pages except mailbox are blocked */ 37 /* if blocking is enabled in hypervisor. Choose the */ 38 /* SCRATCH_REG0 to test. */ 39 return RREG32_NO_KIQ(0xc040) == 0xffffffff; 40 } 41 42 void amdgpu_virt_init_setting(struct amdgpu_device *adev) 43 { 44 /* enable virtual display */ 45 if (adev->mode_info.num_crtc == 0) 46 adev->mode_info.num_crtc = 1; 47 adev->enable_virtual_display = true; 48 adev->ddev->driver->driver_features &= ~DRIVER_ATOMIC; 49 adev->cg_flags = 0; 50 adev->pg_flags = 0; 51 } 52 53 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, 54 uint32_t reg0, uint32_t reg1, 55 uint32_t ref, uint32_t mask) 56 { 57 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 58 struct amdgpu_ring *ring = &kiq->ring; 59 signed long r, cnt = 0; 60 unsigned long flags; 61 uint32_t seq; 62 63 spin_lock_irqsave(&kiq->ring_lock, flags); 64 amdgpu_ring_alloc(ring, 32); 65 amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, 66 ref, mask); 67 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 68 if (r) 69 goto failed_undo; 70 71 amdgpu_ring_commit(ring); 72 spin_unlock_irqrestore(&kiq->ring_lock, flags); 73 74 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 75 76 /* don't wait anymore for IRQ context */ 77 if (r < 1 && in_interrupt()) 78 goto failed_kiq; 79 80 might_sleep(); 81 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 82 83 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 84 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 85 } 86 87 if (cnt > MAX_KIQ_REG_TRY) 88 goto failed_kiq; 89 90 return; 91 92 failed_undo: 93 amdgpu_ring_undo(ring); 94 spin_unlock_irqrestore(&kiq->ring_lock, flags); 95 failed_kiq: 96 pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); 97 } 98 99 /** 100 * amdgpu_virt_request_full_gpu() - request full gpu access 101 * @amdgpu: amdgpu device. 102 * @init: is driver init time. 103 * When start to init/fini driver, first need to request full gpu access. 104 * Return: Zero if request success, otherwise will return error. 105 */ 106 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) 107 { 108 struct amdgpu_virt *virt = &adev->virt; 109 int r; 110 111 if (virt->ops && virt->ops->req_full_gpu) { 112 r = virt->ops->req_full_gpu(adev, init); 113 if (r) 114 return r; 115 116 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 117 } 118 119 return 0; 120 } 121 122 /** 123 * amdgpu_virt_release_full_gpu() - release full gpu access 124 * @amdgpu: amdgpu device. 125 * @init: is driver init time. 126 * When finishing driver init/fini, need to release full gpu access. 127 * Return: Zero if release success, otherwise will returen error. 128 */ 129 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init) 130 { 131 struct amdgpu_virt *virt = &adev->virt; 132 int r; 133 134 if (virt->ops && virt->ops->rel_full_gpu) { 135 r = virt->ops->rel_full_gpu(adev, init); 136 if (r) 137 return r; 138 139 adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; 140 } 141 return 0; 142 } 143 144 /** 145 * amdgpu_virt_reset_gpu() - reset gpu 146 * @amdgpu: amdgpu device. 147 * Send reset command to GPU hypervisor to reset GPU that VM is using 148 * Return: Zero if reset success, otherwise will return error. 149 */ 150 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev) 151 { 152 struct amdgpu_virt *virt = &adev->virt; 153 int r; 154 155 if (virt->ops && virt->ops->reset_gpu) { 156 r = virt->ops->reset_gpu(adev); 157 if (r) 158 return r; 159 160 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 161 } 162 163 return 0; 164 } 165 166 void amdgpu_virt_request_init_data(struct amdgpu_device *adev) 167 { 168 struct amdgpu_virt *virt = &adev->virt; 169 170 if (virt->ops && virt->ops->req_init_data) 171 virt->ops->req_init_data(adev); 172 173 if (adev->virt.req_init_data_ver > 0) 174 DRM_INFO("host supports REQ_INIT_DATA handshake\n"); 175 else 176 DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n"); 177 } 178 179 /** 180 * amdgpu_virt_wait_reset() - wait for reset gpu completed 181 * @amdgpu: amdgpu device. 182 * Wait for GPU reset completed. 183 * Return: Zero if reset success, otherwise will return error. 184 */ 185 int amdgpu_virt_wait_reset(struct amdgpu_device *adev) 186 { 187 struct amdgpu_virt *virt = &adev->virt; 188 189 if (!virt->ops || !virt->ops->wait_reset) 190 return -EINVAL; 191 192 return virt->ops->wait_reset(adev); 193 } 194 195 /** 196 * amdgpu_virt_alloc_mm_table() - alloc memory for mm table 197 * @amdgpu: amdgpu device. 198 * MM table is used by UVD and VCE for its initialization 199 * Return: Zero if allocate success. 200 */ 201 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) 202 { 203 int r; 204 205 if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr) 206 return 0; 207 208 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 209 AMDGPU_GEM_DOMAIN_VRAM, 210 &adev->virt.mm_table.bo, 211 &adev->virt.mm_table.gpu_addr, 212 (void *)&adev->virt.mm_table.cpu_addr); 213 if (r) { 214 DRM_ERROR("failed to alloc mm table and error = %d.\n", r); 215 return r; 216 } 217 218 memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); 219 DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n", 220 adev->virt.mm_table.gpu_addr, 221 adev->virt.mm_table.cpu_addr); 222 return 0; 223 } 224 225 /** 226 * amdgpu_virt_free_mm_table() - free mm table memory 227 * @amdgpu: amdgpu device. 228 * Free MM table memory 229 */ 230 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) 231 { 232 if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr) 233 return; 234 235 amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, 236 &adev->virt.mm_table.gpu_addr, 237 (void *)&adev->virt.mm_table.cpu_addr); 238 adev->virt.mm_table.gpu_addr = 0; 239 } 240 241 242 int amdgpu_virt_fw_reserve_get_checksum(void *obj, 243 unsigned long obj_size, 244 unsigned int key, 245 unsigned int chksum) 246 { 247 unsigned int ret = key; 248 unsigned long i = 0; 249 unsigned char *pos; 250 251 pos = (char *)obj; 252 /* calculate checksum */ 253 for (i = 0; i < obj_size; ++i) 254 ret += *(pos + i); 255 /* minus the chksum itself */ 256 pos = (char *)&chksum; 257 for (i = 0; i < sizeof(chksum); ++i) 258 ret -= *(pos + i); 259 return ret; 260 } 261 262 static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) 263 { 264 struct amdgpu_virt *virt = &adev->virt; 265 struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data; 266 /* GPU will be marked bad on host if bp count more then 10, 267 * so alloc 512 is enough. 268 */ 269 unsigned int align_space = 512; 270 void *bps = NULL; 271 struct amdgpu_bo **bps_bo = NULL; 272 273 *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); 274 if (!*data) 275 return -ENOMEM; 276 277 bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL); 278 bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL); 279 280 if (!bps || !bps_bo) { 281 kfree(bps); 282 kfree(bps_bo); 283 kfree(*data); 284 return -ENOMEM; 285 } 286 287 (*data)->bps = bps; 288 (*data)->bps_bo = bps_bo; 289 (*data)->count = 0; 290 (*data)->last_reserved = 0; 291 292 virt->ras_init_done = true; 293 294 return 0; 295 } 296 297 static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) 298 { 299 struct amdgpu_virt *virt = &adev->virt; 300 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; 301 struct amdgpu_bo *bo; 302 int i; 303 304 if (!data) 305 return; 306 307 for (i = data->last_reserved - 1; i >= 0; i--) { 308 bo = data->bps_bo[i]; 309 amdgpu_bo_free_kernel(&bo, NULL, NULL); 310 data->bps_bo[i] = bo; 311 data->last_reserved = i; 312 } 313 } 314 315 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev) 316 { 317 struct amdgpu_virt *virt = &adev->virt; 318 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; 319 320 virt->ras_init_done = false; 321 322 if (!data) 323 return; 324 325 amdgpu_virt_ras_release_bp(adev); 326 327 kfree(data->bps); 328 kfree(data->bps_bo); 329 kfree(data); 330 virt->virt_eh_data = NULL; 331 } 332 333 static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev, 334 struct eeprom_table_record *bps, int pages) 335 { 336 struct amdgpu_virt *virt = &adev->virt; 337 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; 338 339 if (!data) 340 return; 341 342 memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); 343 data->count += pages; 344 } 345 346 static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) 347 { 348 struct amdgpu_virt *virt = &adev->virt; 349 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; 350 struct amdgpu_bo *bo = NULL; 351 uint64_t bp; 352 int i; 353 354 if (!data) 355 return; 356 357 for (i = data->last_reserved; i < data->count; i++) { 358 bp = data->bps[i].retired_page; 359 360 /* There are two cases of reserve error should be ignored: 361 * 1) a ras bad page has been allocated (used by someone); 362 * 2) a ras bad page has been reserved (duplicate error injection 363 * for one page); 364 */ 365 if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, 366 AMDGPU_GPU_PAGE_SIZE, 367 AMDGPU_GEM_DOMAIN_VRAM, 368 &bo, NULL)) 369 DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); 370 371 data->bps_bo[i] = bo; 372 data->last_reserved = i + 1; 373 bo = NULL; 374 } 375 } 376 377 static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev, 378 uint64_t retired_page) 379 { 380 struct amdgpu_virt *virt = &adev->virt; 381 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; 382 int i; 383 384 if (!data) 385 return true; 386 387 for (i = 0; i < data->count; i++) 388 if (retired_page == data->bps[i].retired_page) 389 return true; 390 391 return false; 392 } 393 394 static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, 395 uint64_t bp_block_offset, uint32_t bp_block_size) 396 { 397 struct eeprom_table_record bp; 398 uint64_t retired_page; 399 uint32_t bp_idx, bp_cnt; 400 401 if (bp_block_size) { 402 bp_cnt = bp_block_size / sizeof(uint64_t); 403 for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { 404 retired_page = *(uint64_t *)(adev->fw_vram_usage.va + 405 bp_block_offset + bp_idx * sizeof(uint64_t)); 406 bp.retired_page = retired_page; 407 408 if (amdgpu_virt_ras_check_bad_page(adev, retired_page)) 409 continue; 410 411 amdgpu_virt_ras_add_bps(adev, &bp, 1); 412 413 amdgpu_virt_ras_reserve_bps(adev); 414 } 415 } 416 } 417 418 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) 419 { 420 uint32_t pf2vf_size = 0; 421 uint32_t checksum = 0; 422 uint32_t checkval; 423 char *str; 424 uint64_t bp_block_offset = 0; 425 uint32_t bp_block_size = 0; 426 struct amdgim_pf2vf_info_v2 *pf2vf_v2 = NULL; 427 428 adev->virt.fw_reserve.p_pf2vf = NULL; 429 adev->virt.fw_reserve.p_vf2pf = NULL; 430 431 if (adev->fw_vram_usage.va != NULL) { 432 adev->virt.fw_reserve.p_pf2vf = 433 (struct amd_sriov_msg_pf2vf_info_header *)( 434 adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); 435 AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); 436 AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); 437 AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, &adev->virt.gim_feature); 438 439 /* pf2vf message must be in 4K */ 440 if (pf2vf_size > 0 && pf2vf_size < 4096) { 441 if (adev->virt.fw_reserve.p_pf2vf->version == 2) { 442 pf2vf_v2 = (struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf; 443 bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_L & 0xFFFFFFFF) | 444 ((((uint64_t)pf2vf_v2->bp_block_offset_H) << 32) & 0xFFFFFFFF00000000); 445 bp_block_size = pf2vf_v2->bp_block_size; 446 447 if (bp_block_size && !adev->virt.ras_init_done) 448 amdgpu_virt_init_ras_err_handler_data(adev); 449 450 if (adev->virt.ras_init_done) 451 amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); 452 } 453 454 checkval = amdgpu_virt_fw_reserve_get_checksum( 455 adev->virt.fw_reserve.p_pf2vf, pf2vf_size, 456 adev->virt.fw_reserve.checksum_key, checksum); 457 if (checkval == checksum) { 458 adev->virt.fw_reserve.p_vf2pf = 459 ((void *)adev->virt.fw_reserve.p_pf2vf + 460 pf2vf_size); 461 memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, 462 sizeof(amdgim_vf2pf_info)); 463 AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, 464 AMDGPU_FW_VRAM_VF2PF_VER); 465 AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, 466 sizeof(amdgim_vf2pf_info)); 467 AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, 468 &str); 469 #ifdef MODULE 470 if (THIS_MODULE->version != NULL) 471 strcpy(str, THIS_MODULE->version); 472 else 473 #endif 474 strcpy(str, "N/A"); 475 AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, 476 0); 477 AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, 478 amdgpu_virt_fw_reserve_get_checksum( 479 adev->virt.fw_reserve.p_vf2pf, 480 pf2vf_size, 481 adev->virt.fw_reserve.checksum_key, 0)); 482 } 483 } 484 } 485 } 486 487 void amdgpu_detect_virtualization(struct amdgpu_device *adev) 488 { 489 uint32_t reg; 490 491 switch (adev->asic_type) { 492 case CHIP_TONGA: 493 case CHIP_FIJI: 494 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); 495 break; 496 case CHIP_VEGA10: 497 case CHIP_VEGA20: 498 case CHIP_NAVI10: 499 case CHIP_NAVI12: 500 case CHIP_SIENNA_CICHLID: 501 case CHIP_ARCTURUS: 502 reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER); 503 break; 504 default: /* other chip doesn't support SRIOV */ 505 reg = 0; 506 break; 507 } 508 509 if (reg & 1) 510 adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; 511 512 if (reg & 0x80000000) 513 adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; 514 515 if (!reg) { 516 if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ 517 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; 518 } 519 520 /* we have the ability to check now */ 521 if (amdgpu_sriov_vf(adev)) { 522 switch (adev->asic_type) { 523 case CHIP_TONGA: 524 case CHIP_FIJI: 525 vi_set_virt_ops(adev); 526 break; 527 case CHIP_VEGA10: 528 case CHIP_VEGA20: 529 case CHIP_ARCTURUS: 530 soc15_set_virt_ops(adev); 531 break; 532 case CHIP_NAVI10: 533 case CHIP_NAVI12: 534 case CHIP_SIENNA_CICHLID: 535 nv_set_virt_ops(adev); 536 /* try send GPU_INIT_DATA request to host */ 537 amdgpu_virt_request_init_data(adev); 538 break; 539 default: /* other chip doesn't support SRIOV */ 540 DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); 541 break; 542 } 543 } 544 } 545 546 static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) 547 { 548 return amdgpu_sriov_is_debug(adev) ? true : false; 549 } 550 551 static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev) 552 { 553 return amdgpu_sriov_is_normal(adev) ? true : false; 554 } 555 556 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) 557 { 558 if (!amdgpu_sriov_vf(adev) || 559 amdgpu_virt_access_debugfs_is_kiq(adev)) 560 return 0; 561 562 if (amdgpu_virt_access_debugfs_is_mmio(adev)) 563 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 564 else 565 return -EPERM; 566 567 return 0; 568 } 569 570 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) 571 { 572 if (amdgpu_sriov_vf(adev)) 573 adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; 574 } 575 576 enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev) 577 { 578 enum amdgpu_sriov_vf_mode mode; 579 580 if (amdgpu_sriov_vf(adev)) { 581 if (amdgpu_sriov_is_pp_one_vf(adev)) 582 mode = SRIOV_VF_MODE_ONE_VF; 583 else 584 mode = SRIOV_VF_MODE_MULTI_VF; 585 } else { 586 mode = SRIOV_VF_MODE_BARE_METAL; 587 } 588 589 return mode; 590 } 591