1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: monk liu <monk.liu@amd.com> 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/drm_auth.h> 27 #include "amdgpu.h" 28 #include "amdgpu_sched.h" 29 30 #define to_amdgpu_ctx_entity(e) \ 31 container_of((e), struct amdgpu_ctx_entity, entity) 32 33 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { 34 [AMDGPU_HW_IP_GFX] = 1, 35 [AMDGPU_HW_IP_COMPUTE] = 4, 36 [AMDGPU_HW_IP_DMA] = 2, 37 [AMDGPU_HW_IP_UVD] = 1, 38 [AMDGPU_HW_IP_VCE] = 1, 39 [AMDGPU_HW_IP_UVD_ENC] = 1, 40 [AMDGPU_HW_IP_VCN_DEC] = 1, 41 [AMDGPU_HW_IP_VCN_ENC] = 1, 42 [AMDGPU_HW_IP_VCN_JPEG] = 1, 43 }; 44 45 static int amdgput_ctx_total_num_entities(void) 46 { 47 unsigned i, num_entities = 0; 48 49 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) 50 num_entities += amdgpu_ctx_num_entities[i]; 51 52 return num_entities; 53 } 54 55 static int amdgpu_ctx_priority_permit(struct drm_file *filp, 56 enum drm_sched_priority priority) 57 { 58 /* NORMAL and below are accessible by everyone */ 59 if (priority <= DRM_SCHED_PRIORITY_NORMAL) 60 return 0; 61 62 if (capable(CAP_SYS_NICE)) 63 return 0; 64 65 if (drm_is_current_master(filp)) 66 return 0; 67 68 return -EACCES; 69 } 70 71 static int amdgpu_ctx_init(struct amdgpu_device *adev, 72 enum drm_sched_priority priority, 73 struct drm_file *filp, 74 struct amdgpu_ctx *ctx) 75 { 76 unsigned num_entities = amdgput_ctx_total_num_entities(); 77 unsigned i, j; 78 int r; 79 80 if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) 81 return -EINVAL; 82 83 r = amdgpu_ctx_priority_permit(filp, priority); 84 if (r) 85 return r; 86 87 memset(ctx, 0, sizeof(*ctx)); 88 ctx->adev = adev; 89 90 ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, 91 sizeof(struct dma_fence*), GFP_KERNEL); 92 if (!ctx->fences) 93 return -ENOMEM; 94 95 ctx->entities[0] = kcalloc(num_entities, 96 sizeof(struct amdgpu_ctx_entity), 97 GFP_KERNEL); 98 if (!ctx->entities[0]) { 99 r = -ENOMEM; 100 goto error_free_fences; 101 } 102 103 for (i = 0; i < num_entities; ++i) { 104 struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; 105 106 entity->sequence = 1; 107 entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; 108 } 109 for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) 110 ctx->entities[i] = ctx->entities[i - 1] + 111 amdgpu_ctx_num_entities[i - 1]; 112 113 kref_init(&ctx->refcount); 114 spin_lock_init(&ctx->ring_lock); 115 mutex_init(&ctx->lock); 116 117 ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); 118 ctx->reset_counter_query = ctx->reset_counter; 119 ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); 120 ctx->init_priority = priority; 121 ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; 122 123 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { 124 struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; 125 struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; 126 unsigned num_rings; 127 128 switch (i) { 129 case AMDGPU_HW_IP_GFX: 130 rings[0] = &adev->gfx.gfx_ring[0]; 131 num_rings = 1; 132 break; 133 case AMDGPU_HW_IP_COMPUTE: 134 for (j = 0; j < adev->gfx.num_compute_rings; ++j) 135 rings[j] = &adev->gfx.compute_ring[j]; 136 num_rings = adev->gfx.num_compute_rings; 137 break; 138 case AMDGPU_HW_IP_DMA: 139 for (j = 0; j < adev->sdma.num_instances; ++j) 140 rings[j] = &adev->sdma.instance[j].ring; 141 num_rings = adev->sdma.num_instances; 142 break; 143 case AMDGPU_HW_IP_UVD: 144 rings[0] = &adev->uvd.inst[0].ring; 145 num_rings = 1; 146 break; 147 case AMDGPU_HW_IP_VCE: 148 rings[0] = &adev->vce.ring[0]; 149 num_rings = 1; 150 break; 151 case AMDGPU_HW_IP_UVD_ENC: 152 rings[0] = &adev->uvd.inst[0].ring_enc[0]; 153 num_rings = 1; 154 break; 155 case AMDGPU_HW_IP_VCN_DEC: 156 rings[0] = &adev->vcn.ring_dec; 157 num_rings = 1; 158 break; 159 case AMDGPU_HW_IP_VCN_ENC: 160 rings[0] = &adev->vcn.ring_enc[0]; 161 num_rings = 1; 162 break; 163 case AMDGPU_HW_IP_VCN_JPEG: 164 rings[0] = &adev->vcn.ring_jpeg; 165 num_rings = 1; 166 break; 167 } 168 169 for (j = 0; j < num_rings; ++j) 170 rqs[j] = &rings[j]->sched.sched_rq[priority]; 171 172 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) 173 r = drm_sched_entity_init(&ctx->entities[i][j].entity, 174 rqs, num_rings, &ctx->guilty); 175 if (r) 176 goto error_cleanup_entities; 177 } 178 179 return 0; 180 181 error_cleanup_entities: 182 for (i = 0; i < num_entities; ++i) 183 drm_sched_entity_destroy(&ctx->entities[0][i].entity); 184 kfree(ctx->entities[0]); 185 186 error_free_fences: 187 kfree(ctx->fences); 188 ctx->fences = NULL; 189 return r; 190 } 191 192 static void amdgpu_ctx_fini(struct kref *ref) 193 { 194 struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); 195 unsigned num_entities = amdgput_ctx_total_num_entities(); 196 struct amdgpu_device *adev = ctx->adev; 197 unsigned i, j; 198 199 if (!adev) 200 return; 201 202 for (i = 0; i < num_entities; ++i) 203 for (j = 0; j < amdgpu_sched_jobs; ++j) 204 dma_fence_put(ctx->entities[0][i].fences[j]); 205 kfree(ctx->fences); 206 kfree(ctx->entities[0]); 207 208 mutex_destroy(&ctx->lock); 209 210 kfree(ctx); 211 } 212 213 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, 214 u32 ring, struct drm_sched_entity **entity) 215 { 216 if (hw_ip >= AMDGPU_HW_IP_NUM) { 217 DRM_ERROR("unknown HW IP type: %d\n", hw_ip); 218 return -EINVAL; 219 } 220 221 /* Right now all IPs have only one instance - multiple rings. */ 222 if (instance != 0) { 223 DRM_DEBUG("invalid ip instance: %d\n", instance); 224 return -EINVAL; 225 } 226 227 if (ring >= amdgpu_ctx_num_entities[hw_ip]) { 228 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); 229 return -EINVAL; 230 } 231 232 *entity = &ctx->entities[hw_ip][ring].entity; 233 return 0; 234 } 235 236 static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 237 struct amdgpu_fpriv *fpriv, 238 struct drm_file *filp, 239 enum drm_sched_priority priority, 240 uint32_t *id) 241 { 242 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 243 struct amdgpu_ctx *ctx; 244 int r; 245 246 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 247 if (!ctx) 248 return -ENOMEM; 249 250 mutex_lock(&mgr->lock); 251 r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); 252 if (r < 0) { 253 mutex_unlock(&mgr->lock); 254 kfree(ctx); 255 return r; 256 } 257 258 *id = (uint32_t)r; 259 r = amdgpu_ctx_init(adev, priority, filp, ctx); 260 if (r) { 261 idr_remove(&mgr->ctx_handles, *id); 262 *id = 0; 263 kfree(ctx); 264 } 265 mutex_unlock(&mgr->lock); 266 return r; 267 } 268 269 static void amdgpu_ctx_do_release(struct kref *ref) 270 { 271 struct amdgpu_ctx *ctx; 272 unsigned num_entities; 273 u32 i; 274 275 ctx = container_of(ref, struct amdgpu_ctx, refcount); 276 277 num_entities = 0; 278 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) 279 num_entities += amdgpu_ctx_num_entities[i]; 280 281 for (i = 0; i < num_entities; i++) 282 drm_sched_entity_destroy(&ctx->entities[0][i].entity); 283 284 amdgpu_ctx_fini(ref); 285 } 286 287 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 288 { 289 struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; 290 struct amdgpu_ctx *ctx; 291 292 mutex_lock(&mgr->lock); 293 ctx = idr_remove(&mgr->ctx_handles, id); 294 if (ctx) 295 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 296 mutex_unlock(&mgr->lock); 297 return ctx ? 0 : -EINVAL; 298 } 299 300 static int amdgpu_ctx_query(struct amdgpu_device *adev, 301 struct amdgpu_fpriv *fpriv, uint32_t id, 302 union drm_amdgpu_ctx_out *out) 303 { 304 struct amdgpu_ctx *ctx; 305 struct amdgpu_ctx_mgr *mgr; 306 unsigned reset_counter; 307 308 if (!fpriv) 309 return -EINVAL; 310 311 mgr = &fpriv->ctx_mgr; 312 mutex_lock(&mgr->lock); 313 ctx = idr_find(&mgr->ctx_handles, id); 314 if (!ctx) { 315 mutex_unlock(&mgr->lock); 316 return -EINVAL; 317 } 318 319 /* TODO: these two are always zero */ 320 out->state.flags = 0x0; 321 out->state.hangs = 0x0; 322 323 /* determine if a GPU reset has occured since the last call */ 324 reset_counter = atomic_read(&adev->gpu_reset_counter); 325 /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ 326 if (ctx->reset_counter_query == reset_counter) 327 out->state.reset_status = AMDGPU_CTX_NO_RESET; 328 else 329 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; 330 ctx->reset_counter_query = reset_counter; 331 332 mutex_unlock(&mgr->lock); 333 return 0; 334 } 335 336 static int amdgpu_ctx_query2(struct amdgpu_device *adev, 337 struct amdgpu_fpriv *fpriv, uint32_t id, 338 union drm_amdgpu_ctx_out *out) 339 { 340 struct amdgpu_ctx *ctx; 341 struct amdgpu_ctx_mgr *mgr; 342 343 if (!fpriv) 344 return -EINVAL; 345 346 mgr = &fpriv->ctx_mgr; 347 mutex_lock(&mgr->lock); 348 ctx = idr_find(&mgr->ctx_handles, id); 349 if (!ctx) { 350 mutex_unlock(&mgr->lock); 351 return -EINVAL; 352 } 353 354 out->state.flags = 0x0; 355 out->state.hangs = 0x0; 356 357 if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) 358 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; 359 360 if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) 361 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; 362 363 if (atomic_read(&ctx->guilty)) 364 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; 365 366 mutex_unlock(&mgr->lock); 367 return 0; 368 } 369 370 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, 371 struct drm_file *filp) 372 { 373 int r; 374 uint32_t id; 375 enum drm_sched_priority priority; 376 377 union drm_amdgpu_ctx *args = data; 378 struct amdgpu_device *adev = dev->dev_private; 379 struct amdgpu_fpriv *fpriv = filp->driver_priv; 380 381 r = 0; 382 id = args->in.ctx_id; 383 priority = amdgpu_to_sched_priority(args->in.priority); 384 385 /* For backwards compatibility reasons, we need to accept 386 * ioctls with garbage in the priority field */ 387 if (priority == DRM_SCHED_PRIORITY_INVALID) 388 priority = DRM_SCHED_PRIORITY_NORMAL; 389 390 switch (args->in.op) { 391 case AMDGPU_CTX_OP_ALLOC_CTX: 392 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); 393 args->out.alloc.ctx_id = id; 394 break; 395 case AMDGPU_CTX_OP_FREE_CTX: 396 r = amdgpu_ctx_free(fpriv, id); 397 break; 398 case AMDGPU_CTX_OP_QUERY_STATE: 399 r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 400 break; 401 case AMDGPU_CTX_OP_QUERY_STATE2: 402 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); 403 break; 404 default: 405 return -EINVAL; 406 } 407 408 return r; 409 } 410 411 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) 412 { 413 struct amdgpu_ctx *ctx; 414 struct amdgpu_ctx_mgr *mgr; 415 416 if (!fpriv) 417 return NULL; 418 419 mgr = &fpriv->ctx_mgr; 420 421 mutex_lock(&mgr->lock); 422 ctx = idr_find(&mgr->ctx_handles, id); 423 if (ctx) 424 kref_get(&ctx->refcount); 425 mutex_unlock(&mgr->lock); 426 return ctx; 427 } 428 429 int amdgpu_ctx_put(struct amdgpu_ctx *ctx) 430 { 431 if (ctx == NULL) 432 return -EINVAL; 433 434 kref_put(&ctx->refcount, amdgpu_ctx_do_release); 435 return 0; 436 } 437 438 void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 439 struct drm_sched_entity *entity, 440 struct dma_fence *fence, uint64_t* handle) 441 { 442 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 443 uint64_t seq = centity->sequence; 444 struct dma_fence *other = NULL; 445 unsigned idx = 0; 446 447 idx = seq & (amdgpu_sched_jobs - 1); 448 other = centity->fences[idx]; 449 if (other) 450 BUG_ON(!dma_fence_is_signaled(other)); 451 452 dma_fence_get(fence); 453 454 spin_lock(&ctx->ring_lock); 455 centity->fences[idx] = fence; 456 centity->sequence++; 457 spin_unlock(&ctx->ring_lock); 458 459 dma_fence_put(other); 460 if (handle) 461 *handle = seq; 462 } 463 464 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 465 struct drm_sched_entity *entity, 466 uint64_t seq) 467 { 468 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 469 struct dma_fence *fence; 470 471 spin_lock(&ctx->ring_lock); 472 473 if (seq == ~0ull) 474 seq = centity->sequence - 1; 475 476 if (seq >= centity->sequence) { 477 spin_unlock(&ctx->ring_lock); 478 return ERR_PTR(-EINVAL); 479 } 480 481 482 if (seq + amdgpu_sched_jobs < centity->sequence) { 483 spin_unlock(&ctx->ring_lock); 484 return NULL; 485 } 486 487 fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); 488 spin_unlock(&ctx->ring_lock); 489 490 return fence; 491 } 492 493 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, 494 enum drm_sched_priority priority) 495 { 496 unsigned num_entities = amdgput_ctx_total_num_entities(); 497 enum drm_sched_priority ctx_prio; 498 unsigned i; 499 500 ctx->override_priority = priority; 501 502 ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? 503 ctx->init_priority : ctx->override_priority; 504 505 for (i = 0; i < num_entities; i++) { 506 struct drm_sched_entity *entity = &ctx->entities[0][i].entity; 507 508 drm_sched_entity_set_priority(entity, ctx_prio); 509 } 510 } 511 512 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, 513 struct drm_sched_entity *entity) 514 { 515 struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); 516 unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); 517 struct dma_fence *other = centity->fences[idx]; 518 519 if (other) { 520 signed long r; 521 r = dma_fence_wait(other, true); 522 if (r < 0) { 523 if (r != -ERESTARTSYS) 524 DRM_ERROR("Error (%ld) waiting for fence!\n", r); 525 526 return r; 527 } 528 } 529 530 return 0; 531 } 532 533 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) 534 { 535 mutex_init(&mgr->lock); 536 idr_init(&mgr->ctx_handles); 537 } 538 539 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) 540 { 541 unsigned num_entities = amdgput_ctx_total_num_entities(); 542 struct amdgpu_ctx *ctx; 543 struct idr *idp; 544 uint32_t id, i; 545 long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; 546 547 idp = &mgr->ctx_handles; 548 549 mutex_lock(&mgr->lock); 550 idr_for_each_entry(idp, ctx, id) { 551 552 if (!ctx->adev) { 553 mutex_unlock(&mgr->lock); 554 return; 555 } 556 557 for (i = 0; i < num_entities; i++) { 558 struct drm_sched_entity *entity; 559 560 entity = &ctx->entities[0][i].entity; 561 max_wait = drm_sched_entity_flush(entity, max_wait); 562 } 563 } 564 mutex_unlock(&mgr->lock); 565 } 566 567 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) 568 { 569 unsigned num_entities = amdgput_ctx_total_num_entities(); 570 struct amdgpu_ctx *ctx; 571 struct idr *idp; 572 uint32_t id, i; 573 574 idp = &mgr->ctx_handles; 575 576 idr_for_each_entry(idp, ctx, id) { 577 578 if (!ctx->adev) 579 return; 580 581 if (kref_read(&ctx->refcount) != 1) { 582 DRM_ERROR("ctx %p is still alive\n", ctx); 583 continue; 584 } 585 586 for (i = 0; i < num_entities; i++) 587 drm_sched_entity_fini(&ctx->entities[0][i].entity); 588 } 589 } 590 591 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 592 { 593 struct amdgpu_ctx *ctx; 594 struct idr *idp; 595 uint32_t id; 596 597 amdgpu_ctx_mgr_entity_fini(mgr); 598 599 idp = &mgr->ctx_handles; 600 601 idr_for_each_entry(idp, ctx, id) { 602 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) 603 DRM_ERROR("ctx %p is still alive\n", ctx); 604 } 605 606 idr_destroy(&mgr->ctx_handles); 607 mutex_destroy(&mgr->lock); 608 } 609