1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: monk liu <monk.liu@amd.com> 23 */ 24 25 #include <drm/drm_auth.h> 26 #include <drm/drm_drv.h> 27 #include "amdgpu.h" 28 #include "amdgpu_sched.h" 29 #include "amdgpu_ras.h" 30 #include <linux/nospec.h> 31 32 #define to_amdgpu_ctx_entity(e) \ 33 container_of((e), struct amdgpu_ctx_entity, entity) 34 35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { 36 [AMDGPU_HW_IP_GFX] = 1, 37 [AMDGPU_HW_IP_COMPUTE] = 4, 38 [AMDGPU_HW_IP_DMA] = 2, 39 [AMDGPU_HW_IP_UVD] = 1, 40 [AMDGPU_HW_IP_VCE] = 1, 41 [AMDGPU_HW_IP_UVD_ENC] = 1, 42 [AMDGPU_HW_IP_VCN_DEC] = 1, 43 [AMDGPU_HW_IP_VCN_ENC] = 1, 44 [AMDGPU_HW_IP_VCN_JPEG] = 1, 45 }; 46 47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) 48 { 49 switch (ctx_prio) { 50 case AMDGPU_CTX_PRIORITY_VERY_LOW: 51 case AMDGPU_CTX_PRIORITY_LOW: 52 case AMDGPU_CTX_PRIORITY_NORMAL: 53 case AMDGPU_CTX_PRIORITY_HIGH: 54 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 55 return true; 56 default: 57 case AMDGPU_CTX_PRIORITY_UNSET: 58 return false; 59 } 60 } 61 62 static enum drm_sched_priority 63 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) 64 { 65 switch (ctx_prio) { 66 case AMDGPU_CTX_PRIORITY_UNSET: 67 pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL"); 68 return DRM_SCHED_PRIORITY_NORMAL; 69 70 case AMDGPU_CTX_PRIORITY_VERY_LOW: 71 return DRM_SCHED_PRIORITY_MIN; 72 73 case AMDGPU_CTX_PRIORITY_LOW: 74 return DRM_SCHED_PRIORITY_MIN; 75 76 case AMDGPU_CTX_PRIORITY_NORMAL: 77 return DRM_SCHED_PRIORITY_NORMAL; 78 79 case AMDGPU_CTX_PRIORITY_HIGH: 80 return DRM_SCHED_PRIORITY_HIGH; 81 82 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 83 return DRM_SCHED_PRIORITY_HIGH; 84 85 /* This should not happen as we sanitized userspace provided priority 86 * already, WARN if this happens. 87 */ 88 default: 89 WARN(1, "Invalid context priority %d\n", ctx_prio); 90 return DRM_SCHED_PRIORITY_NORMAL; 91 } 92 93 } 94 95 static int amdgpu_ctx_priority_permit(struct drm_file *filp, 96 int32_t priority) 97 { 98 if (!amdgpu_ctx_priority_is_valid(priority)) 99 return -EINVAL; 100 101 /* NORMAL and below are accessible by everyone */ 102 if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) 103 return 0; 104 105 if (capable(CAP_SYS_NICE)) 106 return 0; 107 108 if (drm_is_current_master(filp)) 109 return 0; 110 111 return -EACCES; 112 } 113 114 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) 115 { 116 switch (prio) { 117 case AMDGPU_CTX_PRIORITY_HIGH: 118 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 119 return AMDGPU_GFX_PIPE_PRIO_HIGH; 120 default: 121 return AMDGPU_GFX_PIPE_PRIO_NORMAL; 122 } 123 } 124 125 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio) 126 { 127 switch (prio) { 128 case AMDGPU_CTX_PRIORITY_HIGH: 129 return AMDGPU_RING_PRIO_1; 130 case AMDGPU_CTX_PRIORITY_VERY_HIGH: 131 return AMDGPU_RING_PRIO_2; 132 default: 133 return AMDGPU_RING_PRIO_0; 134 } 135 } 136 137 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) 138 { 139 struct amdgpu_device *adev = ctx->mgr->adev; 140 unsigned int hw_prio; 141 int32_t ctx_prio; 142 143 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 144 ctx->init_priority : ctx->override_priority; 145 146 switch (hw_ip) { 147 case AMDGPU_HW_IP_GFX: 148 case AMDGPU_HW_IP_COMPUTE: 149 hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); 150 break; 151 case AMDGPU_HW_IP_VCE: 152 case AMDGPU_HW_IP_VCN_ENC: 153 hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio); 154 break; 155 default: 156 hw_prio = AMDGPU_RING_PRIO_DEFAULT; 157 break; 158 } 159 160 hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 161 if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0) 162 hw_prio = AMDGPU_RING_PRIO_DEFAULT; 163 164 return hw_prio; 165 } 166 167 /* Calculate the time spend on the hw */ 168 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) 169 { 170 struct drm_sched_fence *s_fence; 171 172 if (!fence) 173 return ns_to_ktime(0); 174 175 /* When the fence is not even scheduled it can't have spend time */ 176 s_fence = to_drm_sched_fence(fence); 177 if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) 178 return ns_to_ktime(0); 179 180 /* When it is still running account how much already spend */ 181 if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) 182 return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); 183 184 return ktime_sub(s_fence->finished.timestamp, 185 s_fence->scheduled.timestamp); 186 } 187 188 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, 189 struct amdgpu_ctx_entity *centity) 190 { 191 ktime_t res = ns_to_ktime(0); 192 uint32_t i; 193 194 spin_lock(&ctx->ring_lock); 195 for (i = 0; i < amdgpu_sched_jobs; i++) { 196 res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); 197 } 198 spin_unlock(&ctx->ring_lock); 199 return res; 200 } 201 202 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, 203 const u32 ring) 204 { 205 struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; 206 struct amdgpu_device *adev = ctx->mgr->adev; 207 struct amdgpu_ctx_entity *entity; 208 enum drm_sched_priority drm_prio; 209 unsigned int hw_prio, num_scheds; 210 int32_t ctx_prio; 211 int r; 212 213 entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), 214 GFP_KERNEL); 215 if (!entity) 216 return -ENOMEM; 217 218 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 219 ctx->init_priority : ctx->override_priority; 220 entity->hw_ip = hw_ip; 221 entity->sequence = 1; 222 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 223 drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); 224 225 hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); 226 227 if (!(adev)->xcp_mgr) { 228 scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 229 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 230 } else { 231 struct amdgpu_fpriv *fpriv; 232 233 fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr); 234 r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, 235 &num_scheds, &scheds); 236 if (r) 237 goto cleanup_entity; 238 } 239 240 /* disable load balance if the hw engine retains context among dependent jobs */ 241 if (hw_ip == AMDGPU_HW_IP_VCN_ENC || 242 hw_ip == AMDGPU_HW_IP_VCN_DEC || 243 hw_ip == AMDGPU_HW_IP_UVD_ENC || 244 hw_ip == AMDGPU_HW_IP_UVD) { 245 sched = drm_sched_pick_best(scheds, num_scheds); 246 scheds = &sched; 247 num_scheds = 1; 248 } 249 250 r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, 251 &ctx->guilty); 252 if (r) 253 goto error_free_entity; 254 255 /* It's not an error if we fail to install the new entity */ 256 if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) 257 goto cleanup_entity; 258 259 return 0; 260 261 cleanup_entity: 262 drm_sched_entity_fini(&entity->entity); 263 264 error_free_entity: 265 kfree(entity); 266 267 return r; 268 } 269 270 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev, 271 struct amdgpu_ctx_entity *entity) 272 { 273 ktime_t res = ns_to_ktime(0); 274 int i; 275 276 if (!entity) 277 return res; 278 279 for (i = 0; i < amdgpu_sched_jobs; ++i) { 280 res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); 281 dma_fence_put(entity->fences[i]); 282 } 283 284 amdgpu_xcp_release_sched(adev, entity); 285 286 kfree(entity); 287 return res; 288 } 289 290 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, 291 u32 *stable_pstate) 292 { 293 struct amdgpu_device *adev = ctx->mgr->adev; 294 enum amd_dpm_forced_level current_level; 295 296 current_level = amdgpu_dpm_get_performance_level(adev); 297 298 switch (current_level) { 299 case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: 300 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; 301 break; 302 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: 303 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; 304 break; 305 case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: 306 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; 307 break; 308 case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: 309 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; 310 break; 311 default: 312 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; 313 break; 314 } 315 return 0; 316 } 317 318 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, 319 struct drm_file *filp, struct amdgpu_ctx *ctx) 320 { 321 struct amdgpu_fpriv *fpriv = filp->driver_priv; 322 u32 current_stable_pstate; 323 int r; 324 325 r = amdgpu_ctx_priority_permit(filp, priority); 326 if (r) 327 return r; 328 329 memset(ctx, 0, sizeof(*ctx)); 330 331 kref_init(&ctx->refcount); 332 ctx->mgr = mgr; 333 spin_lock_init(&ctx->ring_lock); 334 335 ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); 336 ctx->reset_counter_query = ctx->reset_counter; 337 ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); 338 ctx->init_priority = priority; 339 ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; 340 341 r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 342 if (r) 343 return r; 344 345 if (mgr->adev->pm.stable_pstate_ctx) 346 ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; 347 else 348 ctx->stable_pstate = current_stable_pstate; 349 350 ctx->ctx_mgr = &(fpriv->ctx_mgr); 351 return 0; 352 } 353 354 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, 355 u32 stable_pstate) 356 { 357 struct amdgpu_device *adev = ctx->mgr->adev; 358 enum amd_dpm_forced_level level; 359 u32 current_stable_pstate; 360 int r; 361 362 mutex_lock(&adev->pm.stable_pstate_ctx_lock); 363 if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { 364 r = -EBUSY; 365 goto done; 366 } 367 368 r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); 369 if (r || (stable_pstate == current_stable_pstate)) 370 goto done; 371 372 switch (stable_pstate) { 373 case AMDGPU_CTX_STABLE_PSTATE_NONE: 374 level = AMD_DPM_FORCED_LEVEL_AUTO; 375 break; 376 case AMDGPU_CTX_STABLE_PSTATE_STANDARD: 377 level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; 378 break; 379 case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: 380 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; 381 break; 382 case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: 383 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; 384 break; 385 case AMDGPU_CTX_STABLE_PSTATE_PEAK: 386 level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; 387 break; 388 default: 389 r = -EINVAL; 390 goto done; 391 } 392 393 r = amdgpu_dpm_force_performance_level(adev, level); 394 395 if (level == AMD_DPM_FORCED_LEVEL_AUTO) 396 adev->pm.stable_pstate_ctx = NULL; 397 else 398 adev->pm.stable_pstate_ctx = ctx; 399 done: 400 mutex_unlock(&adev->pm.stable_pstate_ctx_lock); 401 402 return r; 403 } 404 405 static void amdgpu_ctx_fini(struct kref *ref) 406 { 407 struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 408 struct amdgpu_ctx_mgr *mgr = ctx->mgr; 409 struct amdgpu_device *adev = mgr->adev; 410 unsigned i, j, idx; 411 412 if (!adev) 413 return; 414 415 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 416 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { 417 ktime_t spend; 418 419 spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]); 420 atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); 421 } 422 } 423 424 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 425 amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); 426 drm_dev_exit(idx); 427 } 428 429 kfree(ctx); 430 } 431 432 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 433 u32 ring, struct drm_sched_entity **entity) 434 { 435 int r; 436 struct drm_sched_entity *ctx_entity; 437 438 if (hw_ip >= AMDGPU_HW_IP_NUM) { 439 DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 440 return -EINVAL; 441 } 442 443 /* Right now all IPs have only one instance - multiple rings. */ 444 if (instance != 0) { 445 DRM_DEBUG("invalid ip instance: %d\n", instance); 446 return -EINVAL; 447 } 448 449 if (ring >= amdgpu_ctx_num_entities[hw_ip]) { 450 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); 451 return -EINVAL; 452 } 453 454 if (ctx->entities[hw_ip][ring] == NULL) { 455 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring); 456 if (r) 457 return r; 458 } 459 460 ctx_entity = &ctx->entities[hw_ip][ring]->entity; 461 r = drm_sched_entity_error(ctx_entity); 462 if (r) { 463 DRM_DEBUG("error entity %p\n", ctx_entity); 464 return r; 465 } 466 467 *entity = ctx_entity; 468 return 0; 469 } 470 471 static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 472 struct amdgpu_fpriv *fpriv, 473 struct drm_file *filp, 474 int32_t priority, 475 uint32_t *id) 476 { 477 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 478 struct amdgpu_ctx *ctx; 479 int r; 480 481 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 482 if (!ctx) 483 return -ENOMEM; 484 485 mutex_lock(&mgr->lock); 486 r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); 487 if (r < 0) { 488 mutex_unlock(&mgr->lock); 489 kfree(ctx); 490 return r; 491 } 492 493 *id = (uint32_t)r; 494 r = amdgpu_ctx_init(mgr, priority, filp, ctx); 495 if (r) { 496 idr_remove(&mgr->ctx_handles, *id); 497 *id = 0; 498 kfree(ctx); 499 } 500 mutex_unlock(&mgr->lock); 501 return r; 502 } 503 504 static void amdgpu_ctx_do_release(struct kref *ref) 505 { 506 struct amdgpu_ctx *ctx; 507 u32 i, j; 508 509 ctx = container_of(ref, struct amdgpu_ctx, refcount); 510 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 511 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 512 if (!ctx->entities[i][j]) 513 continue; 514 515 drm_sched_entity_destroy(&ctx->entities[i][j]->entity); 516 } 517 } 518 519 amdgpu_ctx_fini(ref); 520 } 521 522 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 523 { 524 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 525 struct amdgpu_ctx *ctx; 526 527 mutex_lock(&mgr->lock); 528 ctx = idr_remove(&mgr->ctx_handles, id); 529 if (ctx) 530 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 531 mutex_unlock(&mgr->lock); 532 return ctx ? 0 : -EINVAL; 533 } 534 535 static int amdgpu_ctx_query(struct amdgpu_device *adev, 536 struct amdgpu_fpriv *fpriv, uint32_t id, 537 union drm_amdgpu_ctx_out *out) 538 { 539 struct amdgpu_ctx *ctx; 540 struct amdgpu_ctx_mgr *mgr; 541 unsigned reset_counter; 542 543 if (!fpriv) 544 return -EINVAL; 545 546 mgr = &fpriv->ctx_mgr; 547 mutex_lock(&mgr->lock); 548 ctx = idr_find(&mgr->ctx_handles, id); 549 if (!ctx) { 550 mutex_unlock(&mgr->lock); 551 return -EINVAL; 552 } 553 554 /* TODO: these two are always zero */ 555 out->state.flags = 0x0; 556 out->state.hangs = 0x0; 557 558 /* determine if a GPU reset has occured since the last call */ 559 reset_counter = atomic_read(&adev->gpu_reset_counter); 560 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ 561 if (ctx->reset_counter_query == reset_counter) 562 out->state.reset_status = AMDGPU_CTX_NO_RESET; 563 else 564 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; 565 ctx->reset_counter_query = reset_counter; 566 567 mutex_unlock(&mgr->lock); 568 return 0; 569 } 570 571 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 572 573 static int amdgpu_ctx_query2(struct amdgpu_device *adev, 574 struct amdgpu_fpriv *fpriv, uint32_t id, 575 union drm_amdgpu_ctx_out *out) 576 { 577 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 578 struct amdgpu_ctx *ctx; 579 struct amdgpu_ctx_mgr *mgr; 580 581 if (!fpriv) 582 return -EINVAL; 583 584 mgr = &fpriv->ctx_mgr; 585 mutex_lock(&mgr->lock); 586 ctx = idr_find(&mgr->ctx_handles, id); 587 if (!ctx) { 588 mutex_unlock(&mgr->lock); 589 return -EINVAL; 590 } 591 592 out->state.flags = 0x0; 593 out->state.hangs = 0x0; 594 595 if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) 596 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; 597 598 if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) 599 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 600 601 if (atomic_read(&ctx->guilty)) 602 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; 603 604 if (amdgpu_in_reset(adev)) 605 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS; 606 607 if (adev->ras_enabled && con) { 608 /* Return the cached values in O(1), 609 * and schedule delayed work to cache 610 * new vaues. 611 */ 612 int ce_count, ue_count; 613 614 ce_count = atomic_read(&con->ras_ce_count); 615 ue_count = atomic_read(&con->ras_ue_count); 616 617 if (ce_count != ctx->ras_counter_ce) { 618 ctx->ras_counter_ce = ce_count; 619 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; 620 } 621 622 if (ue_count != ctx->ras_counter_ue) { 623 ctx->ras_counter_ue = ue_count; 624 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; 625 } 626 627 schedule_delayed_work(&con->ras_counte_delay_work, 628 msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS)); 629 } 630 631 mutex_unlock(&mgr->lock); 632 return 0; 633 } 634 635 636 637 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, 638 struct amdgpu_fpriv *fpriv, uint32_t id, 639 bool set, u32 *stable_pstate) 640 { 641 struct amdgpu_ctx *ctx; 642 struct amdgpu_ctx_mgr *mgr; 643 int r; 644 645 if (!fpriv) 646 return -EINVAL; 647 648 mgr = &fpriv->ctx_mgr; 649 mutex_lock(&mgr->lock); 650 ctx = idr_find(&mgr->ctx_handles, id); 651 if (!ctx) { 652 mutex_unlock(&mgr->lock); 653 return -EINVAL; 654 } 655 656 if (set) 657 r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); 658 else 659 r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); 660 661 mutex_unlock(&mgr->lock); 662 return r; 663 } 664 665 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, 666 struct drm_file *filp) 667 { 668 int r; 669 uint32_t id, stable_pstate; 670 int32_t priority; 671 672 union drm_amdgpu_ctx *args = data; 673 struct amdgpu_device *adev = drm_to_adev(dev); 674 struct amdgpu_fpriv *fpriv = filp->driver_priv; 675 676 id = args->in.ctx_id; 677 priority = args->in.priority; 678 679 /* For backwards compatibility reasons, we need to accept 680 * ioctls with garbage in the priority field */ 681 if (!amdgpu_ctx_priority_is_valid(priority)) 682 priority = AMDGPU_CTX_PRIORITY_NORMAL; 683 684 switch (args->in.op) { 685 case AMDGPU_CTX_OP_ALLOC_CTX: 686 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); 687 args->out.alloc.ctx_id = id; 688 break; 689 case AMDGPU_CTX_OP_FREE_CTX: 690 r = amdgpu_ctx_free(fpriv, id); 691 break; 692 case AMDGPU_CTX_OP_QUERY_STATE: 693 r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 694 break; 695 case AMDGPU_CTX_OP_QUERY_STATE2: 696 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); 697 break; 698 case AMDGPU_CTX_OP_GET_STABLE_PSTATE: 699 if (args->in.flags) 700 return -EINVAL; 701 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); 702 if (!r) 703 args->out.pstate.flags = stable_pstate; 704 break; 705 case AMDGPU_CTX_OP_SET_STABLE_PSTATE: 706 if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) 707 return -EINVAL; 708 stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; 709 if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) 710 return -EINVAL; 711 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); 712 break; 713 default: 714 return -EINVAL; 715 } 716 717 return r; 718 } 719 720 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) 721 { 722 struct amdgpu_ctx *ctx; 723 struct amdgpu_ctx_mgr *mgr; 724 725 if (!fpriv) 726 return NULL; 727 728 mgr = &fpriv->ctx_mgr; 729 730 mutex_lock(&mgr->lock); 731 ctx = idr_find(&mgr->ctx_handles, id); 732 if (ctx) 733 kref_get(&ctx->refcount); 734 mutex_unlock(&mgr->lock); 735 return ctx; 736 } 737 738 int amdgpu_ctx_put(struct amdgpu_ctx *ctx) 739 { 740 if (ctx == NULL) 741 return -EINVAL; 742 743 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 744 return 0; 745 } 746 747 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 748 struct drm_sched_entity *entity, 749 struct dma_fence *fence) 750 { 751 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 752 uint64_t seq = centity->sequence; 753 struct dma_fence *other = NULL; 754 unsigned idx = 0; 755 756 idx = seq & (amdgpu_sched_jobs - 1); 757 other = centity->fences[idx]; 758 WARN_ON(other && !dma_fence_is_signaled(other)); 759 760 dma_fence_get(fence); 761 762 spin_lock(&ctx->ring_lock); 763 centity->fences[idx] = fence; 764 centity->sequence++; 765 spin_unlock(&ctx->ring_lock); 766 767 atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), 768 &ctx->mgr->time_spend[centity->hw_ip]); 769 770 dma_fence_put(other); 771 return seq; 772 } 773 774 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 775 struct drm_sched_entity *entity, 776 uint64_t seq) 777 { 778 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 779 struct dma_fence *fence; 780 781 spin_lock(&ctx->ring_lock); 782 783 if (seq == ~0ull) 784 seq = centity->sequence - 1; 785 786 if (seq >= centity->sequence) { 787 spin_unlock(&ctx->ring_lock); 788 return ERR_PTR(-EINVAL); 789 } 790 791 792 if (seq + amdgpu_sched_jobs < centity->sequence) { 793 spin_unlock(&ctx->ring_lock); 794 return NULL; 795 } 796 797 fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); 798 spin_unlock(&ctx->ring_lock); 799 800 return fence; 801 } 802 803 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, 804 struct amdgpu_ctx_entity *aentity, 805 int hw_ip, 806 int32_t priority) 807 { 808 struct amdgpu_device *adev = ctx->mgr->adev; 809 unsigned int hw_prio; 810 struct drm_gpu_scheduler **scheds = NULL; 811 unsigned num_scheds; 812 813 /* set sw priority */ 814 drm_sched_entity_set_priority(&aentity->entity, 815 amdgpu_ctx_to_drm_sched_prio(priority)); 816 817 /* set hw priority */ 818 if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { 819 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); 820 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); 821 scheds = adev->gpu_sched[hw_ip][hw_prio].sched; 822 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; 823 drm_sched_entity_modify_sched(&aentity->entity, scheds, 824 num_scheds); 825 } 826 } 827 828 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 829 int32_t priority) 830 { 831 int32_t ctx_prio; 832 unsigned i, j; 833 834 ctx->override_priority = priority; 835 836 ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? 837 ctx->init_priority : ctx->override_priority; 838 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 839 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 840 if (!ctx->entities[i][j]) 841 continue; 842 843 amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j], 844 i, ctx_prio); 845 } 846 } 847 } 848 849 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, 850 struct drm_sched_entity *entity) 851 { 852 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 853 struct dma_fence *other; 854 unsigned idx; 855 long r; 856 857 spin_lock(&ctx->ring_lock); 858 idx = centity->sequence & (amdgpu_sched_jobs - 1); 859 other = dma_fence_get(centity->fences[idx]); 860 spin_unlock(&ctx->ring_lock); 861 862 if (!other) 863 return 0; 864 865 r = dma_fence_wait(other, true); 866 if (r < 0 && r != -ERESTARTSYS) 867 DRM_ERROR("Error (%ld) waiting for fence!\n", r); 868 869 dma_fence_put(other); 870 return r; 871 } 872 873 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, 874 struct amdgpu_device *adev) 875 { 876 unsigned int i; 877 878 mgr->adev = adev; 879 mutex_init(&mgr->lock); 880 idr_init_base(&mgr->ctx_handles, 1); 881 882 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 883 atomic64_set(&mgr->time_spend[i], 0); 884 } 885 886 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) 887 { 888 struct amdgpu_ctx *ctx; 889 struct idr *idp; 890 uint32_t id, i, j; 891 892 idp = &mgr->ctx_handles; 893 894 mutex_lock(&mgr->lock); 895 idr_for_each_entry(idp, ctx, id) { 896 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 897 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 898 struct drm_sched_entity *entity; 899 900 if (!ctx->entities[i][j]) 901 continue; 902 903 entity = &ctx->entities[i][j]->entity; 904 timeout = drm_sched_entity_flush(entity, timeout); 905 } 906 } 907 } 908 mutex_unlock(&mgr->lock); 909 return timeout; 910 } 911 912 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 913 { 914 struct amdgpu_ctx *ctx; 915 struct idr *idp; 916 uint32_t id, i, j; 917 918 idp = &mgr->ctx_handles; 919 920 idr_for_each_entry(idp, ctx, id) { 921 if (kref_read(&ctx->refcount) != 1) { 922 DRM_ERROR("ctx %p is still alive\n", ctx); 923 continue; 924 } 925 926 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 927 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { 928 struct drm_sched_entity *entity; 929 930 if (!ctx->entities[i][j]) 931 continue; 932 933 entity = &ctx->entities[i][j]->entity; 934 drm_sched_entity_fini(entity); 935 } 936 } 937 } 938 } 939 940 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 941 { 942 struct amdgpu_ctx *ctx; 943 struct idr *idp; 944 uint32_t id; 945 946 amdgpu_ctx_mgr_entity_fini(mgr); 947 948 idp = &mgr->ctx_handles; 949 950 idr_for_each_entry(idp, ctx, id) { 951 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) 952 DRM_ERROR("ctx %p is still alive\n", ctx); 953 } 954 955 idr_destroy(&mgr->ctx_handles); 956 mutex_destroy(&mgr->lock); 957 } 958 959 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, 960 ktime_t usage[AMDGPU_HW_IP_NUM]) 961 { 962 struct amdgpu_ctx *ctx; 963 unsigned int hw_ip, i; 964 uint32_t id; 965 966 /* 967 * This is a little bit racy because it can be that a ctx or a fence are 968 * destroyed just in the moment we try to account them. But that is ok 969 * since exactly that case is explicitely allowed by the interface. 970 */ 971 mutex_lock(&mgr->lock); 972 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 973 uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); 974 975 usage[hw_ip] = ns_to_ktime(ns); 976 } 977 978 idr_for_each_entry(&mgr->ctx_handles, ctx, id) { 979 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { 980 for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { 981 struct amdgpu_ctx_entity *centity; 982 ktime_t spend; 983 984 centity = ctx->entities[hw_ip][i]; 985 if (!centity) 986 continue; 987 spend = amdgpu_ctx_entity_time(ctx, centity); 988 usage[hw_ip] = ktime_add(usage[hw_ip], spend); 989 } 990 } 991 } 992 mutex_unlock(&mgr->lock); 993 } 994