1 /* 2 * Copyright 2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/firmware.h> 25 #include <drm/drm_exec.h> 26 27 #include "amdgpu_mes.h" 28 #include "amdgpu.h" 29 #include "soc15_common.h" 30 #include "amdgpu_mes_ctx.h" 31 32 #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 33 #define AMDGPU_ONE_DOORBELL_SIZE 8 34 35 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) 36 { 37 return roundup(AMDGPU_ONE_DOORBELL_SIZE * 38 AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 39 PAGE_SIZE); 40 } 41 42 int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, 43 unsigned int *doorbell_index) 44 { 45 int r = ida_simple_get(&adev->mes.doorbell_ida, 2, 46 adev->mes.max_doorbell_slices, 47 GFP_KERNEL); 48 if (r > 0) 49 *doorbell_index = r; 50 51 return r; 52 } 53 54 void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, 55 unsigned int doorbell_index) 56 { 57 if (doorbell_index) 58 ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index); 59 } 60 61 unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( 62 struct amdgpu_device *adev, 63 uint32_t doorbell_index, 64 unsigned int doorbell_id) 65 { 66 return ((doorbell_index * 67 amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) + 68 doorbell_id * 2); 69 } 70 71 static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, 72 struct amdgpu_mes_process *process, 73 int ip_type, uint64_t *doorbell_index) 74 { 75 unsigned int offset, found; 76 struct amdgpu_mes *mes = &adev->mes; 77 78 if (ip_type == AMDGPU_RING_TYPE_SDMA) 79 offset = adev->doorbell_index.sdma_engine[0]; 80 else 81 offset = 0; 82 83 found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); 84 if (found >= mes->num_mes_dbs) { 85 DRM_WARN("No doorbell available\n"); 86 return -ENOSPC; 87 } 88 89 set_bit(found, mes->doorbell_bitmap); 90 91 /* Get the absolute doorbell index on BAR */ 92 *doorbell_index = mes->db_start_dw_offset + found * 2; 93 return 0; 94 } 95 96 static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, 97 struct amdgpu_mes_process *process, 98 uint32_t doorbell_index) 99 { 100 unsigned int old, rel_index; 101 struct amdgpu_mes *mes = &adev->mes; 102 103 /* Find the relative index of the doorbell in this object */ 104 rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; 105 old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); 106 WARN_ON(!old); 107 } 108 109 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) 110 { 111 int i; 112 struct amdgpu_mes *mes = &adev->mes; 113 114 /* Bitmap for dynamic allocation of kernel doorbells */ 115 mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); 116 if (!mes->doorbell_bitmap) { 117 DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); 118 return -ENOMEM; 119 } 120 121 mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; 122 for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { 123 adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; 124 set_bit(i, mes->doorbell_bitmap); 125 } 126 127 return 0; 128 } 129 130 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) 131 { 132 bitmap_free(adev->mes.doorbell_bitmap); 133 } 134 135 int amdgpu_mes_init(struct amdgpu_device *adev) 136 { 137 int i, r; 138 139 adev->mes.adev = adev; 140 141 idr_init(&adev->mes.pasid_idr); 142 idr_init(&adev->mes.gang_id_idr); 143 idr_init(&adev->mes.queue_id_idr); 144 ida_init(&adev->mes.doorbell_ida); 145 spin_lock_init(&adev->mes.queue_id_lock); 146 spin_lock_init(&adev->mes.ring_lock); 147 mutex_init(&adev->mes.mutex_hidden); 148 149 adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; 150 adev->mes.vmid_mask_mmhub = 0xffffff00; 151 adev->mes.vmid_mask_gfxhub = 0xffffff00; 152 153 for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { 154 /* use only 1st MEC pipes */ 155 if (i >= 4) 156 continue; 157 adev->mes.compute_hqd_mask[i] = 0xc; 158 } 159 160 for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) 161 adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; 162 163 for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { 164 if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) 165 adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; 166 /* zero sdma_hqd_mask for non-existent engine */ 167 else if (adev->sdma.num_instances == 1) 168 adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; 169 else 170 adev->mes.sdma_hqd_mask[i] = 0xfc; 171 } 172 173 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); 174 if (r) { 175 dev_err(adev->dev, 176 "(%d) ring trail_fence_offs wb alloc failed\n", r); 177 goto error_ids; 178 } 179 adev->mes.sch_ctx_gpu_addr = 180 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); 181 adev->mes.sch_ctx_ptr = 182 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; 183 184 r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); 185 if (r) { 186 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); 187 dev_err(adev->dev, 188 "(%d) query_status_fence_offs wb alloc failed\n", r); 189 goto error_ids; 190 } 191 adev->mes.query_status_fence_gpu_addr = 192 adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); 193 adev->mes.query_status_fence_ptr = 194 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; 195 196 r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); 197 if (r) { 198 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); 199 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); 200 dev_err(adev->dev, 201 "(%d) read_val_offs alloc failed\n", r); 202 goto error_ids; 203 } 204 adev->mes.read_val_gpu_addr = 205 adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); 206 adev->mes.read_val_ptr = 207 (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs]; 208 209 r = amdgpu_mes_doorbell_init(adev); 210 if (r) 211 goto error; 212 213 return 0; 214 215 error: 216 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); 217 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); 218 amdgpu_device_wb_free(adev, adev->mes.read_val_offs); 219 error_ids: 220 idr_destroy(&adev->mes.pasid_idr); 221 idr_destroy(&adev->mes.gang_id_idr); 222 idr_destroy(&adev->mes.queue_id_idr); 223 ida_destroy(&adev->mes.doorbell_ida); 224 mutex_destroy(&adev->mes.mutex_hidden); 225 return r; 226 } 227 228 void amdgpu_mes_fini(struct amdgpu_device *adev) 229 { 230 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); 231 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); 232 amdgpu_device_wb_free(adev, adev->mes.read_val_offs); 233 amdgpu_mes_doorbell_free(adev); 234 235 idr_destroy(&adev->mes.pasid_idr); 236 idr_destroy(&adev->mes.gang_id_idr); 237 idr_destroy(&adev->mes.queue_id_idr); 238 ida_destroy(&adev->mes.doorbell_ida); 239 mutex_destroy(&adev->mes.mutex_hidden); 240 } 241 242 static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) 243 { 244 amdgpu_bo_free_kernel(&q->mqd_obj, 245 &q->mqd_gpu_addr, 246 &q->mqd_cpu_ptr); 247 } 248 249 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, 250 struct amdgpu_vm *vm) 251 { 252 struct amdgpu_mes_process *process; 253 int r; 254 255 /* allocate the mes process buffer */ 256 process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); 257 if (!process) { 258 DRM_ERROR("no more memory to create mes process\n"); 259 return -ENOMEM; 260 } 261 262 process->doorbell_bitmap = 263 kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, 264 BITS_PER_BYTE), GFP_KERNEL); 265 if (!process->doorbell_bitmap) { 266 DRM_ERROR("failed to allocate doorbell bitmap\n"); 267 kfree(process); 268 return -ENOMEM; 269 } 270 271 /* allocate the process context bo and map it */ 272 r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, 273 AMDGPU_GEM_DOMAIN_GTT, 274 &process->proc_ctx_bo, 275 &process->proc_ctx_gpu_addr, 276 &process->proc_ctx_cpu_ptr); 277 if (r) { 278 DRM_ERROR("failed to allocate process context bo\n"); 279 goto clean_up_memory; 280 } 281 memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 282 283 /* 284 * Avoid taking any other locks under MES lock to avoid circular 285 * lock dependencies. 286 */ 287 amdgpu_mes_lock(&adev->mes); 288 289 /* add the mes process to idr list */ 290 r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, 291 GFP_KERNEL); 292 if (r < 0) { 293 DRM_ERROR("failed to lock pasid=%d\n", pasid); 294 goto clean_up_ctx; 295 } 296 297 /* allocate the starting doorbell index of the process */ 298 r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); 299 if (r < 0) { 300 DRM_ERROR("failed to allocate doorbell for process\n"); 301 goto clean_up_pasid; 302 } 303 304 DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); 305 306 INIT_LIST_HEAD(&process->gang_list); 307 process->vm = vm; 308 process->pasid = pasid; 309 process->process_quantum = adev->mes.default_process_quantum; 310 process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); 311 312 amdgpu_mes_unlock(&adev->mes); 313 return 0; 314 315 clean_up_pasid: 316 idr_remove(&adev->mes.pasid_idr, pasid); 317 amdgpu_mes_unlock(&adev->mes); 318 clean_up_ctx: 319 amdgpu_bo_free_kernel(&process->proc_ctx_bo, 320 &process->proc_ctx_gpu_addr, 321 &process->proc_ctx_cpu_ptr); 322 clean_up_memory: 323 kfree(process->doorbell_bitmap); 324 kfree(process); 325 return r; 326 } 327 328 void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) 329 { 330 struct amdgpu_mes_process *process; 331 struct amdgpu_mes_gang *gang, *tmp1; 332 struct amdgpu_mes_queue *queue, *tmp2; 333 struct mes_remove_queue_input queue_input; 334 unsigned long flags; 335 int r; 336 337 /* 338 * Avoid taking any other locks under MES lock to avoid circular 339 * lock dependencies. 340 */ 341 amdgpu_mes_lock(&adev->mes); 342 343 process = idr_find(&adev->mes.pasid_idr, pasid); 344 if (!process) { 345 DRM_WARN("pasid %d doesn't exist\n", pasid); 346 amdgpu_mes_unlock(&adev->mes); 347 return; 348 } 349 350 /* Remove all queues from hardware */ 351 list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { 352 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { 353 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 354 idr_remove(&adev->mes.queue_id_idr, queue->queue_id); 355 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 356 357 queue_input.doorbell_offset = queue->doorbell_off; 358 queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; 359 360 r = adev->mes.funcs->remove_hw_queue(&adev->mes, 361 &queue_input); 362 if (r) 363 DRM_WARN("failed to remove hardware queue\n"); 364 } 365 366 idr_remove(&adev->mes.gang_id_idr, gang->gang_id); 367 } 368 369 amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); 370 idr_remove(&adev->mes.pasid_idr, pasid); 371 amdgpu_mes_unlock(&adev->mes); 372 373 /* free all memory allocated by the process */ 374 list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { 375 /* free all queues in the gang */ 376 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { 377 amdgpu_mes_queue_free_mqd(queue); 378 list_del(&queue->list); 379 kfree(queue); 380 } 381 amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 382 &gang->gang_ctx_gpu_addr, 383 &gang->gang_ctx_cpu_ptr); 384 list_del(&gang->list); 385 kfree(gang); 386 387 } 388 amdgpu_bo_free_kernel(&process->proc_ctx_bo, 389 &process->proc_ctx_gpu_addr, 390 &process->proc_ctx_cpu_ptr); 391 kfree(process->doorbell_bitmap); 392 kfree(process); 393 } 394 395 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, 396 struct amdgpu_mes_gang_properties *gprops, 397 int *gang_id) 398 { 399 struct amdgpu_mes_process *process; 400 struct amdgpu_mes_gang *gang; 401 int r; 402 403 /* allocate the mes gang buffer */ 404 gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); 405 if (!gang) { 406 return -ENOMEM; 407 } 408 409 /* allocate the gang context bo and map it to cpu space */ 410 r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, 411 AMDGPU_GEM_DOMAIN_GTT, 412 &gang->gang_ctx_bo, 413 &gang->gang_ctx_gpu_addr, 414 &gang->gang_ctx_cpu_ptr); 415 if (r) { 416 DRM_ERROR("failed to allocate process context bo\n"); 417 goto clean_up_mem; 418 } 419 memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); 420 421 /* 422 * Avoid taking any other locks under MES lock to avoid circular 423 * lock dependencies. 424 */ 425 amdgpu_mes_lock(&adev->mes); 426 427 process = idr_find(&adev->mes.pasid_idr, pasid); 428 if (!process) { 429 DRM_ERROR("pasid %d doesn't exist\n", pasid); 430 r = -EINVAL; 431 goto clean_up_ctx; 432 } 433 434 /* add the mes gang to idr list */ 435 r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, 436 GFP_KERNEL); 437 if (r < 0) { 438 DRM_ERROR("failed to allocate idr for gang\n"); 439 goto clean_up_ctx; 440 } 441 442 gang->gang_id = r; 443 *gang_id = r; 444 445 INIT_LIST_HEAD(&gang->queue_list); 446 gang->process = process; 447 gang->priority = gprops->priority; 448 gang->gang_quantum = gprops->gang_quantum ? 449 gprops->gang_quantum : adev->mes.default_gang_quantum; 450 gang->global_priority_level = gprops->global_priority_level; 451 gang->inprocess_gang_priority = gprops->inprocess_gang_priority; 452 list_add_tail(&gang->list, &process->gang_list); 453 454 amdgpu_mes_unlock(&adev->mes); 455 return 0; 456 457 clean_up_ctx: 458 amdgpu_mes_unlock(&adev->mes); 459 amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 460 &gang->gang_ctx_gpu_addr, 461 &gang->gang_ctx_cpu_ptr); 462 clean_up_mem: 463 kfree(gang); 464 return r; 465 } 466 467 int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) 468 { 469 struct amdgpu_mes_gang *gang; 470 471 /* 472 * Avoid taking any other locks under MES lock to avoid circular 473 * lock dependencies. 474 */ 475 amdgpu_mes_lock(&adev->mes); 476 477 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 478 if (!gang) { 479 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 480 amdgpu_mes_unlock(&adev->mes); 481 return -EINVAL; 482 } 483 484 if (!list_empty(&gang->queue_list)) { 485 DRM_ERROR("queue list is not empty\n"); 486 amdgpu_mes_unlock(&adev->mes); 487 return -EBUSY; 488 } 489 490 idr_remove(&adev->mes.gang_id_idr, gang->gang_id); 491 list_del(&gang->list); 492 amdgpu_mes_unlock(&adev->mes); 493 494 amdgpu_bo_free_kernel(&gang->gang_ctx_bo, 495 &gang->gang_ctx_gpu_addr, 496 &gang->gang_ctx_cpu_ptr); 497 498 kfree(gang); 499 500 return 0; 501 } 502 503 int amdgpu_mes_suspend(struct amdgpu_device *adev) 504 { 505 struct idr *idp; 506 struct amdgpu_mes_process *process; 507 struct amdgpu_mes_gang *gang; 508 struct mes_suspend_gang_input input; 509 int r, pasid; 510 511 /* 512 * Avoid taking any other locks under MES lock to avoid circular 513 * lock dependencies. 514 */ 515 amdgpu_mes_lock(&adev->mes); 516 517 idp = &adev->mes.pasid_idr; 518 519 idr_for_each_entry(idp, process, pasid) { 520 list_for_each_entry(gang, &process->gang_list, list) { 521 r = adev->mes.funcs->suspend_gang(&adev->mes, &input); 522 if (r) 523 DRM_ERROR("failed to suspend pasid %d gangid %d", 524 pasid, gang->gang_id); 525 } 526 } 527 528 amdgpu_mes_unlock(&adev->mes); 529 return 0; 530 } 531 532 int amdgpu_mes_resume(struct amdgpu_device *adev) 533 { 534 struct idr *idp; 535 struct amdgpu_mes_process *process; 536 struct amdgpu_mes_gang *gang; 537 struct mes_resume_gang_input input; 538 int r, pasid; 539 540 /* 541 * Avoid taking any other locks under MES lock to avoid circular 542 * lock dependencies. 543 */ 544 amdgpu_mes_lock(&adev->mes); 545 546 idp = &adev->mes.pasid_idr; 547 548 idr_for_each_entry(idp, process, pasid) { 549 list_for_each_entry(gang, &process->gang_list, list) { 550 r = adev->mes.funcs->resume_gang(&adev->mes, &input); 551 if (r) 552 DRM_ERROR("failed to resume pasid %d gangid %d", 553 pasid, gang->gang_id); 554 } 555 } 556 557 amdgpu_mes_unlock(&adev->mes); 558 return 0; 559 } 560 561 static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, 562 struct amdgpu_mes_queue *q, 563 struct amdgpu_mes_queue_properties *p) 564 { 565 struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; 566 u32 mqd_size = mqd_mgr->mqd_size; 567 int r; 568 569 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 570 AMDGPU_GEM_DOMAIN_GTT, 571 &q->mqd_obj, 572 &q->mqd_gpu_addr, &q->mqd_cpu_ptr); 573 if (r) { 574 dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r); 575 return r; 576 } 577 memset(q->mqd_cpu_ptr, 0, mqd_size); 578 579 r = amdgpu_bo_reserve(q->mqd_obj, false); 580 if (unlikely(r != 0)) 581 goto clean_up; 582 583 return 0; 584 585 clean_up: 586 amdgpu_bo_free_kernel(&q->mqd_obj, 587 &q->mqd_gpu_addr, 588 &q->mqd_cpu_ptr); 589 return r; 590 } 591 592 static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, 593 struct amdgpu_mes_queue *q, 594 struct amdgpu_mes_queue_properties *p) 595 { 596 struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; 597 struct amdgpu_mqd_prop mqd_prop = {0}; 598 599 mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; 600 mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; 601 mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; 602 mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; 603 mqd_prop.queue_size = p->queue_size; 604 mqd_prop.use_doorbell = true; 605 mqd_prop.doorbell_index = p->doorbell_off; 606 mqd_prop.eop_gpu_addr = p->eop_gpu_addr; 607 mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; 608 mqd_prop.hqd_queue_priority = p->hqd_queue_priority; 609 mqd_prop.hqd_active = false; 610 611 mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); 612 613 amdgpu_bo_unreserve(q->mqd_obj); 614 } 615 616 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, 617 struct amdgpu_mes_queue_properties *qprops, 618 int *queue_id) 619 { 620 struct amdgpu_mes_queue *queue; 621 struct amdgpu_mes_gang *gang; 622 struct mes_add_queue_input queue_input; 623 unsigned long flags; 624 int r; 625 626 memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); 627 628 /* allocate the mes queue buffer */ 629 queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); 630 if (!queue) { 631 DRM_ERROR("Failed to allocate memory for queue\n"); 632 return -ENOMEM; 633 } 634 635 /* Allocate the queue mqd */ 636 r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); 637 if (r) 638 goto clean_up_memory; 639 640 /* 641 * Avoid taking any other locks under MES lock to avoid circular 642 * lock dependencies. 643 */ 644 amdgpu_mes_lock(&adev->mes); 645 646 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 647 if (!gang) { 648 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 649 r = -EINVAL; 650 goto clean_up_mqd; 651 } 652 653 /* add the mes gang to idr list */ 654 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 655 r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, 656 GFP_ATOMIC); 657 if (r < 0) { 658 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 659 goto clean_up_mqd; 660 } 661 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 662 *queue_id = queue->queue_id = r; 663 664 /* allocate a doorbell index for the queue */ 665 r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, 666 qprops->queue_type, 667 &qprops->doorbell_off); 668 if (r) 669 goto clean_up_queue_id; 670 671 /* initialize the queue mqd */ 672 amdgpu_mes_queue_init_mqd(adev, queue, qprops); 673 674 /* add hw queue to mes */ 675 queue_input.process_id = gang->process->pasid; 676 677 queue_input.page_table_base_addr = 678 adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - 679 adev->gmc.vram_start; 680 681 queue_input.process_va_start = 0; 682 queue_input.process_va_end = 683 (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; 684 queue_input.process_quantum = gang->process->process_quantum; 685 queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; 686 queue_input.gang_quantum = gang->gang_quantum; 687 queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; 688 queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; 689 queue_input.gang_global_priority_level = gang->global_priority_level; 690 queue_input.doorbell_offset = qprops->doorbell_off; 691 queue_input.mqd_addr = queue->mqd_gpu_addr; 692 queue_input.wptr_addr = qprops->wptr_gpu_addr; 693 queue_input.wptr_mc_addr = qprops->wptr_mc_addr; 694 queue_input.queue_type = qprops->queue_type; 695 queue_input.paging = qprops->paging; 696 queue_input.is_kfd_process = 0; 697 698 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 699 if (r) { 700 DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", 701 qprops->doorbell_off); 702 goto clean_up_doorbell; 703 } 704 705 DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " 706 "queue type=%d, doorbell=0x%llx\n", 707 gang->process->pasid, gang_id, qprops->queue_type, 708 qprops->doorbell_off); 709 710 queue->ring = qprops->ring; 711 queue->doorbell_off = qprops->doorbell_off; 712 queue->wptr_gpu_addr = qprops->wptr_gpu_addr; 713 queue->queue_type = qprops->queue_type; 714 queue->paging = qprops->paging; 715 queue->gang = gang; 716 queue->ring->mqd_ptr = queue->mqd_cpu_ptr; 717 list_add_tail(&queue->list, &gang->queue_list); 718 719 amdgpu_mes_unlock(&adev->mes); 720 return 0; 721 722 clean_up_doorbell: 723 amdgpu_mes_kernel_doorbell_free(adev, gang->process, 724 qprops->doorbell_off); 725 clean_up_queue_id: 726 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 727 idr_remove(&adev->mes.queue_id_idr, queue->queue_id); 728 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 729 clean_up_mqd: 730 amdgpu_mes_unlock(&adev->mes); 731 amdgpu_mes_queue_free_mqd(queue); 732 clean_up_memory: 733 kfree(queue); 734 return r; 735 } 736 737 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) 738 { 739 unsigned long flags; 740 struct amdgpu_mes_queue *queue; 741 struct amdgpu_mes_gang *gang; 742 struct mes_remove_queue_input queue_input; 743 int r; 744 745 /* 746 * Avoid taking any other locks under MES lock to avoid circular 747 * lock dependencies. 748 */ 749 amdgpu_mes_lock(&adev->mes); 750 751 /* remove the mes gang from idr list */ 752 spin_lock_irqsave(&adev->mes.queue_id_lock, flags); 753 754 queue = idr_find(&adev->mes.queue_id_idr, queue_id); 755 if (!queue) { 756 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 757 amdgpu_mes_unlock(&adev->mes); 758 DRM_ERROR("queue id %d doesn't exist\n", queue_id); 759 return -EINVAL; 760 } 761 762 idr_remove(&adev->mes.queue_id_idr, queue_id); 763 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); 764 765 DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n", 766 queue->doorbell_off); 767 768 gang = queue->gang; 769 queue_input.doorbell_offset = queue->doorbell_off; 770 queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; 771 772 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 773 if (r) 774 DRM_ERROR("failed to remove hardware queue, queue id = %d\n", 775 queue_id); 776 777 list_del(&queue->list); 778 amdgpu_mes_kernel_doorbell_free(adev, gang->process, 779 queue->doorbell_off); 780 amdgpu_mes_unlock(&adev->mes); 781 782 amdgpu_mes_queue_free_mqd(queue); 783 kfree(queue); 784 return 0; 785 } 786 787 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, 788 struct amdgpu_ring *ring, 789 enum amdgpu_unmap_queues_action action, 790 u64 gpu_addr, u64 seq) 791 { 792 struct mes_unmap_legacy_queue_input queue_input; 793 int r; 794 795 queue_input.action = action; 796 queue_input.queue_type = ring->funcs->type; 797 queue_input.doorbell_offset = ring->doorbell_index; 798 queue_input.pipe_id = ring->pipe; 799 queue_input.queue_id = ring->queue; 800 queue_input.trail_fence_addr = gpu_addr; 801 queue_input.trail_fence_data = seq; 802 803 r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); 804 if (r) 805 DRM_ERROR("failed to unmap legacy queue\n"); 806 807 return r; 808 } 809 810 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) 811 { 812 struct mes_misc_op_input op_input; 813 int r, val = 0; 814 815 op_input.op = MES_MISC_OP_READ_REG; 816 op_input.read_reg.reg_offset = reg; 817 op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; 818 819 if (!adev->mes.funcs->misc_op) { 820 DRM_ERROR("mes rreg is not supported!\n"); 821 goto error; 822 } 823 824 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 825 if (r) 826 DRM_ERROR("failed to read reg (0x%x)\n", reg); 827 else 828 val = *(adev->mes.read_val_ptr); 829 830 error: 831 return val; 832 } 833 834 int amdgpu_mes_wreg(struct amdgpu_device *adev, 835 uint32_t reg, uint32_t val) 836 { 837 struct mes_misc_op_input op_input; 838 int r; 839 840 op_input.op = MES_MISC_OP_WRITE_REG; 841 op_input.write_reg.reg_offset = reg; 842 op_input.write_reg.reg_value = val; 843 844 if (!adev->mes.funcs->misc_op) { 845 DRM_ERROR("mes wreg is not supported!\n"); 846 r = -EINVAL; 847 goto error; 848 } 849 850 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 851 if (r) 852 DRM_ERROR("failed to write reg (0x%x)\n", reg); 853 854 error: 855 return r; 856 } 857 858 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, 859 uint32_t reg0, uint32_t reg1, 860 uint32_t ref, uint32_t mask) 861 { 862 struct mes_misc_op_input op_input; 863 int r; 864 865 op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; 866 op_input.wrm_reg.reg0 = reg0; 867 op_input.wrm_reg.reg1 = reg1; 868 op_input.wrm_reg.ref = ref; 869 op_input.wrm_reg.mask = mask; 870 871 if (!adev->mes.funcs->misc_op) { 872 DRM_ERROR("mes reg_write_reg_wait is not supported!\n"); 873 r = -EINVAL; 874 goto error; 875 } 876 877 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 878 if (r) 879 DRM_ERROR("failed to reg_write_reg_wait\n"); 880 881 error: 882 return r; 883 } 884 885 int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, 886 uint32_t val, uint32_t mask) 887 { 888 struct mes_misc_op_input op_input; 889 int r; 890 891 op_input.op = MES_MISC_OP_WRM_REG_WAIT; 892 op_input.wrm_reg.reg0 = reg; 893 op_input.wrm_reg.ref = val; 894 op_input.wrm_reg.mask = mask; 895 896 if (!adev->mes.funcs->misc_op) { 897 DRM_ERROR("mes reg wait is not supported!\n"); 898 r = -EINVAL; 899 goto error; 900 } 901 902 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 903 if (r) 904 DRM_ERROR("failed to reg_write_reg_wait\n"); 905 906 error: 907 return r; 908 } 909 910 int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, 911 uint64_t process_context_addr, 912 uint32_t spi_gdbg_per_vmid_cntl, 913 const uint32_t *tcp_watch_cntl, 914 uint32_t flags, 915 bool trap_en) 916 { 917 struct mes_misc_op_input op_input = {0}; 918 int r; 919 920 if (!adev->mes.funcs->misc_op) { 921 DRM_ERROR("mes set shader debugger is not supported!\n"); 922 return -EINVAL; 923 } 924 925 op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; 926 op_input.set_shader_debugger.process_context_addr = process_context_addr; 927 op_input.set_shader_debugger.flags.u32all = flags; 928 op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; 929 memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, 930 sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); 931 932 if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> 933 AMDGPU_MES_API_VERSION_SHIFT) >= 14) 934 op_input.set_shader_debugger.trap_en = trap_en; 935 936 amdgpu_mes_lock(&adev->mes); 937 938 r = adev->mes.funcs->misc_op(&adev->mes, &op_input); 939 if (r) 940 DRM_ERROR("failed to set_shader_debugger\n"); 941 942 amdgpu_mes_unlock(&adev->mes); 943 944 return r; 945 } 946 947 static void 948 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, 949 struct amdgpu_ring *ring, 950 struct amdgpu_mes_queue_properties *props) 951 { 952 props->queue_type = ring->funcs->type; 953 props->hqd_base_gpu_addr = ring->gpu_addr; 954 props->rptr_gpu_addr = ring->rptr_gpu_addr; 955 props->wptr_gpu_addr = ring->wptr_gpu_addr; 956 props->wptr_mc_addr = 957 ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; 958 props->queue_size = ring->ring_size; 959 props->eop_gpu_addr = ring->eop_gpu_addr; 960 props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; 961 props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; 962 props->paging = false; 963 props->ring = ring; 964 } 965 966 #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \ 967 do { \ 968 if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ 969 return offsetof(struct amdgpu_mes_ctx_meta_data, \ 970 _eng[ring->idx].slots[id_offs]); \ 971 else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \ 972 return offsetof(struct amdgpu_mes_ctx_meta_data, \ 973 _eng[ring->idx].ring); \ 974 else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \ 975 return offsetof(struct amdgpu_mes_ctx_meta_data, \ 976 _eng[ring->idx].ib); \ 977 else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \ 978 return offsetof(struct amdgpu_mes_ctx_meta_data, \ 979 _eng[ring->idx].padding); \ 980 } while(0) 981 982 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) 983 { 984 switch (ring->funcs->type) { 985 case AMDGPU_RING_TYPE_GFX: 986 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx); 987 break; 988 case AMDGPU_RING_TYPE_COMPUTE: 989 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute); 990 break; 991 case AMDGPU_RING_TYPE_SDMA: 992 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma); 993 break; 994 default: 995 break; 996 } 997 998 WARN_ON(1); 999 return -EINVAL; 1000 } 1001 1002 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 1003 int queue_type, int idx, 1004 struct amdgpu_mes_ctx_data *ctx_data, 1005 struct amdgpu_ring **out) 1006 { 1007 struct amdgpu_ring *ring; 1008 struct amdgpu_mes_gang *gang; 1009 struct amdgpu_mes_queue_properties qprops = {0}; 1010 int r, queue_id, pasid; 1011 1012 /* 1013 * Avoid taking any other locks under MES lock to avoid circular 1014 * lock dependencies. 1015 */ 1016 amdgpu_mes_lock(&adev->mes); 1017 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 1018 if (!gang) { 1019 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 1020 amdgpu_mes_unlock(&adev->mes); 1021 return -EINVAL; 1022 } 1023 pasid = gang->process->pasid; 1024 1025 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 1026 if (!ring) { 1027 amdgpu_mes_unlock(&adev->mes); 1028 return -ENOMEM; 1029 } 1030 1031 ring->ring_obj = NULL; 1032 ring->use_doorbell = true; 1033 ring->is_mes_queue = true; 1034 ring->mes_ctx = ctx_data; 1035 ring->idx = idx; 1036 ring->no_scheduler = true; 1037 1038 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1039 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 1040 compute[ring->idx].mec_hpd); 1041 ring->eop_gpu_addr = 1042 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1043 } 1044 1045 switch (queue_type) { 1046 case AMDGPU_RING_TYPE_GFX: 1047 ring->funcs = adev->gfx.gfx_ring[0].funcs; 1048 break; 1049 case AMDGPU_RING_TYPE_COMPUTE: 1050 ring->funcs = adev->gfx.compute_ring[0].funcs; 1051 break; 1052 case AMDGPU_RING_TYPE_SDMA: 1053 ring->funcs = adev->sdma.instance[0].ring.funcs; 1054 break; 1055 default: 1056 BUG(); 1057 } 1058 1059 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1060 AMDGPU_RING_PRIO_DEFAULT, NULL); 1061 if (r) 1062 goto clean_up_memory; 1063 1064 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 1065 1066 dma_fence_wait(gang->process->vm->last_update, false); 1067 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 1068 amdgpu_mes_unlock(&adev->mes); 1069 1070 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 1071 if (r) 1072 goto clean_up_ring; 1073 1074 ring->hw_queue_id = queue_id; 1075 ring->doorbell_index = qprops.doorbell_off; 1076 1077 if (queue_type == AMDGPU_RING_TYPE_GFX) 1078 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); 1079 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 1080 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 1081 queue_id); 1082 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 1083 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 1084 queue_id); 1085 else 1086 BUG(); 1087 1088 *out = ring; 1089 return 0; 1090 1091 clean_up_ring: 1092 amdgpu_ring_fini(ring); 1093 clean_up_memory: 1094 kfree(ring); 1095 amdgpu_mes_unlock(&adev->mes); 1096 return r; 1097 } 1098 1099 void amdgpu_mes_remove_ring(struct amdgpu_device *adev, 1100 struct amdgpu_ring *ring) 1101 { 1102 if (!ring) 1103 return; 1104 1105 amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); 1106 amdgpu_ring_fini(ring); 1107 kfree(ring); 1108 } 1109 1110 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, 1111 enum amdgpu_mes_priority_level prio) 1112 { 1113 return adev->mes.aggregated_doorbells[prio]; 1114 } 1115 1116 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, 1117 struct amdgpu_mes_ctx_data *ctx_data) 1118 { 1119 int r; 1120 1121 r = amdgpu_bo_create_kernel(adev, 1122 sizeof(struct amdgpu_mes_ctx_meta_data), 1123 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1124 &ctx_data->meta_data_obj, 1125 &ctx_data->meta_data_mc_addr, 1126 &ctx_data->meta_data_ptr); 1127 if (r) { 1128 dev_warn(adev->dev, "(%d) create CTX bo failed\n", r); 1129 return r; 1130 } 1131 1132 if (!ctx_data->meta_data_obj) 1133 return -ENOMEM; 1134 1135 memset(ctx_data->meta_data_ptr, 0, 1136 sizeof(struct amdgpu_mes_ctx_meta_data)); 1137 1138 return 0; 1139 } 1140 1141 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) 1142 { 1143 if (ctx_data->meta_data_obj) 1144 amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, 1145 &ctx_data->meta_data_mc_addr, 1146 &ctx_data->meta_data_ptr); 1147 } 1148 1149 int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, 1150 struct amdgpu_vm *vm, 1151 struct amdgpu_mes_ctx_data *ctx_data) 1152 { 1153 struct amdgpu_bo_va *bo_va; 1154 struct amdgpu_sync sync; 1155 struct drm_exec exec; 1156 int r; 1157 1158 amdgpu_sync_create(&sync); 1159 1160 drm_exec_init(&exec, 0); 1161 drm_exec_until_all_locked(&exec) { 1162 r = drm_exec_lock_obj(&exec, 1163 &ctx_data->meta_data_obj->tbo.base); 1164 drm_exec_retry_on_contention(&exec); 1165 if (unlikely(r)) 1166 goto error_fini_exec; 1167 1168 r = amdgpu_vm_lock_pd(vm, &exec, 0); 1169 drm_exec_retry_on_contention(&exec); 1170 if (unlikely(r)) 1171 goto error_fini_exec; 1172 } 1173 1174 bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); 1175 if (!bo_va) { 1176 DRM_ERROR("failed to create bo_va for meta data BO\n"); 1177 r = -ENOMEM; 1178 goto error_fini_exec; 1179 } 1180 1181 r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, 1182 sizeof(struct amdgpu_mes_ctx_meta_data), 1183 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 1184 AMDGPU_PTE_EXECUTABLE); 1185 1186 if (r) { 1187 DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); 1188 goto error_del_bo_va; 1189 } 1190 1191 r = amdgpu_vm_bo_update(adev, bo_va, false); 1192 if (r) { 1193 DRM_ERROR("failed to do vm_bo_update on meta data\n"); 1194 goto error_del_bo_va; 1195 } 1196 amdgpu_sync_fence(&sync, bo_va->last_pt_update); 1197 1198 r = amdgpu_vm_update_pdes(adev, vm, false); 1199 if (r) { 1200 DRM_ERROR("failed to update pdes on meta data\n"); 1201 goto error_del_bo_va; 1202 } 1203 amdgpu_sync_fence(&sync, vm->last_update); 1204 1205 amdgpu_sync_wait(&sync, false); 1206 drm_exec_fini(&exec); 1207 1208 amdgpu_sync_free(&sync); 1209 ctx_data->meta_data_va = bo_va; 1210 return 0; 1211 1212 error_del_bo_va: 1213 amdgpu_vm_bo_del(adev, bo_va); 1214 1215 error_fini_exec: 1216 drm_exec_fini(&exec); 1217 amdgpu_sync_free(&sync); 1218 return r; 1219 } 1220 1221 int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, 1222 struct amdgpu_mes_ctx_data *ctx_data) 1223 { 1224 struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; 1225 struct amdgpu_bo *bo = ctx_data->meta_data_obj; 1226 struct amdgpu_vm *vm = bo_va->base.vm; 1227 struct dma_fence *fence; 1228 struct drm_exec exec; 1229 long r; 1230 1231 drm_exec_init(&exec, 0); 1232 drm_exec_until_all_locked(&exec) { 1233 r = drm_exec_lock_obj(&exec, 1234 &ctx_data->meta_data_obj->tbo.base); 1235 drm_exec_retry_on_contention(&exec); 1236 if (unlikely(r)) 1237 goto out_unlock; 1238 1239 r = amdgpu_vm_lock_pd(vm, &exec, 0); 1240 drm_exec_retry_on_contention(&exec); 1241 if (unlikely(r)) 1242 goto out_unlock; 1243 } 1244 1245 amdgpu_vm_bo_del(adev, bo_va); 1246 if (!amdgpu_vm_ready(vm)) 1247 goto out_unlock; 1248 1249 r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, 1250 &fence); 1251 if (r) 1252 goto out_unlock; 1253 if (fence) { 1254 amdgpu_bo_fence(bo, fence, true); 1255 fence = NULL; 1256 } 1257 1258 r = amdgpu_vm_clear_freed(adev, vm, &fence); 1259 if (r || !fence) 1260 goto out_unlock; 1261 1262 dma_fence_wait(fence, false); 1263 amdgpu_bo_fence(bo, fence, true); 1264 dma_fence_put(fence); 1265 1266 out_unlock: 1267 if (unlikely(r < 0)) 1268 dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); 1269 drm_exec_fini(&exec); 1270 1271 return r; 1272 } 1273 1274 static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, 1275 int pasid, int *gang_id, 1276 int queue_type, int num_queue, 1277 struct amdgpu_ring **added_rings, 1278 struct amdgpu_mes_ctx_data *ctx_data) 1279 { 1280 struct amdgpu_ring *ring; 1281 struct amdgpu_mes_gang_properties gprops = {0}; 1282 int r, j; 1283 1284 /* create a gang for the process */ 1285 gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 1286 gprops.gang_quantum = adev->mes.default_gang_quantum; 1287 gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 1288 gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 1289 gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 1290 1291 r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id); 1292 if (r) { 1293 DRM_ERROR("failed to add gang\n"); 1294 return r; 1295 } 1296 1297 /* create queues for the gang */ 1298 for (j = 0; j < num_queue; j++) { 1299 r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j, 1300 ctx_data, &ring); 1301 if (r) { 1302 DRM_ERROR("failed to add ring\n"); 1303 break; 1304 } 1305 1306 DRM_INFO("ring %s was added\n", ring->name); 1307 added_rings[j] = ring; 1308 } 1309 1310 return 0; 1311 } 1312 1313 static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) 1314 { 1315 struct amdgpu_ring *ring; 1316 int i, r; 1317 1318 for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) { 1319 ring = added_rings[i]; 1320 if (!ring) 1321 continue; 1322 1323 r = amdgpu_ring_test_helper(ring); 1324 if (r) 1325 return r; 1326 1327 r = amdgpu_ring_test_ib(ring, 1000 * 10); 1328 if (r) { 1329 DRM_DEV_ERROR(ring->adev->dev, 1330 "ring %s ib test failed (%d)\n", 1331 ring->name, r); 1332 return r; 1333 } else 1334 DRM_INFO("ring %s ib test pass\n", ring->name); 1335 } 1336 1337 return 0; 1338 } 1339 1340 int amdgpu_mes_self_test(struct amdgpu_device *adev) 1341 { 1342 struct amdgpu_vm *vm = NULL; 1343 struct amdgpu_mes_ctx_data ctx_data = {0}; 1344 struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL }; 1345 int gang_ids[3] = {0}; 1346 int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 }, 1347 { AMDGPU_RING_TYPE_COMPUTE, 1 }, 1348 { AMDGPU_RING_TYPE_SDMA, 1} }; 1349 int i, r, pasid, k = 0; 1350 1351 pasid = amdgpu_pasid_alloc(16); 1352 if (pasid < 0) { 1353 dev_warn(adev->dev, "No more PASIDs available!"); 1354 pasid = 0; 1355 } 1356 1357 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 1358 if (!vm) { 1359 r = -ENOMEM; 1360 goto error_pasid; 1361 } 1362 1363 r = amdgpu_vm_init(adev, vm, -1); 1364 if (r) { 1365 DRM_ERROR("failed to initialize vm\n"); 1366 goto error_pasid; 1367 } 1368 1369 r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); 1370 if (r) { 1371 DRM_ERROR("failed to alloc ctx meta data\n"); 1372 goto error_fini; 1373 } 1374 1375 ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; 1376 r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); 1377 if (r) { 1378 DRM_ERROR("failed to map ctx meta data\n"); 1379 goto error_vm; 1380 } 1381 1382 r = amdgpu_mes_create_process(adev, pasid, vm); 1383 if (r) { 1384 DRM_ERROR("failed to create MES process\n"); 1385 goto error_vm; 1386 } 1387 1388 for (i = 0; i < ARRAY_SIZE(queue_types); i++) { 1389 /* On GFX v10.3, fw hasn't supported to map sdma queue. */ 1390 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) && 1391 adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) && 1392 queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) 1393 continue; 1394 1395 r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, 1396 &gang_ids[i], 1397 queue_types[i][0], 1398 queue_types[i][1], 1399 &added_rings[k], 1400 &ctx_data); 1401 if (r) 1402 goto error_queues; 1403 1404 k += queue_types[i][1]; 1405 } 1406 1407 /* start ring test and ib test for MES queues */ 1408 amdgpu_mes_test_queues(added_rings); 1409 1410 error_queues: 1411 /* remove all queues */ 1412 for (i = 0; i < ARRAY_SIZE(added_rings); i++) { 1413 if (!added_rings[i]) 1414 continue; 1415 amdgpu_mes_remove_ring(adev, added_rings[i]); 1416 } 1417 1418 for (i = 0; i < ARRAY_SIZE(gang_ids); i++) { 1419 if (!gang_ids[i]) 1420 continue; 1421 amdgpu_mes_remove_gang(adev, gang_ids[i]); 1422 } 1423 1424 amdgpu_mes_destroy_process(adev, pasid); 1425 1426 error_vm: 1427 amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); 1428 1429 error_fini: 1430 amdgpu_vm_fini(adev, vm); 1431 1432 error_pasid: 1433 if (pasid) 1434 amdgpu_pasid_free(pasid); 1435 1436 amdgpu_mes_ctx_free_meta_data(&ctx_data); 1437 kfree(vm); 1438 return 0; 1439 } 1440 1441 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) 1442 { 1443 const struct mes_firmware_header_v1_0 *mes_hdr; 1444 struct amdgpu_firmware_info *info; 1445 char ucode_prefix[30]; 1446 char fw_name[40]; 1447 bool need_retry = false; 1448 int r; 1449 1450 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, 1451 sizeof(ucode_prefix)); 1452 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { 1453 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 1454 ucode_prefix, 1455 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); 1456 need_retry = true; 1457 } else { 1458 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", 1459 ucode_prefix, 1460 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); 1461 } 1462 1463 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); 1464 if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { 1465 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", 1466 ucode_prefix); 1467 DRM_INFO("try to fall back to %s\n", fw_name); 1468 r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], 1469 fw_name); 1470 } 1471 1472 if (r) 1473 goto out; 1474 1475 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1476 adev->mes.fw[pipe]->data; 1477 adev->mes.uc_start_addr[pipe] = 1478 le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | 1479 ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); 1480 adev->mes.data_start_addr[pipe] = 1481 le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | 1482 ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); 1483 1484 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1485 int ucode, ucode_data; 1486 1487 if (pipe == AMDGPU_MES_SCHED_PIPE) { 1488 ucode = AMDGPU_UCODE_ID_CP_MES; 1489 ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; 1490 } else { 1491 ucode = AMDGPU_UCODE_ID_CP_MES1; 1492 ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; 1493 } 1494 1495 info = &adev->firmware.ucode[ucode]; 1496 info->ucode_id = ucode; 1497 info->fw = adev->mes.fw[pipe]; 1498 adev->firmware.fw_size += 1499 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes), 1500 PAGE_SIZE); 1501 1502 info = &adev->firmware.ucode[ucode_data]; 1503 info->ucode_id = ucode_data; 1504 info->fw = adev->mes.fw[pipe]; 1505 adev->firmware.fw_size += 1506 ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes), 1507 PAGE_SIZE); 1508 } 1509 1510 return 0; 1511 out: 1512 amdgpu_ucode_release(&adev->mes.fw[pipe]); 1513 return r; 1514 } 1515