1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 27 #include <drm/drm_print.h> 28 #include <drm/gpu_scheduler.h> 29 30 #include "gpu_scheduler_trace.h" 31 32 #define to_drm_sched_job(sched_job) \ 33 container_of((sched_job), struct drm_sched_job, queue_node) 34 35 /** 36 * drm_sched_entity_init - Init a context entity used by scheduler when 37 * submit to HW ring. 38 * 39 * @entity: scheduler entity to init 40 * @rq_list: the list of run queue on which jobs from this 41 * entity can be submitted 42 * @num_rq_list: number of run queue in rq_list 43 * @guilty: atomic_t set to 1 when a job on this queue 44 * is found to be guilty causing a timeout 45 * 46 * Note: the rq_list should have atleast one element to schedule 47 * the entity 48 * 49 * Returns 0 on success or a negative error code on failure. 50 */ 51 int drm_sched_entity_init(struct drm_sched_entity *entity, 52 struct drm_sched_rq **rq_list, 53 unsigned int num_rq_list, 54 atomic_t *guilty) 55 { 56 int i; 57 58 if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0]))) 59 return -EINVAL; 60 61 memset(entity, 0, sizeof(struct drm_sched_entity)); 62 INIT_LIST_HEAD(&entity->list); 63 entity->rq = NULL; 64 entity->guilty = guilty; 65 entity->num_rq_list = num_rq_list; 66 entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *), 67 GFP_KERNEL); 68 if (!entity->rq_list) 69 return -ENOMEM; 70 71 for (i = 0; i < num_rq_list; ++i) 72 entity->rq_list[i] = rq_list[i]; 73 74 if (num_rq_list) 75 entity->rq = rq_list[0]; 76 77 entity->last_scheduled = NULL; 78 79 spin_lock_init(&entity->rq_lock); 80 spsc_queue_init(&entity->job_queue); 81 82 atomic_set(&entity->fence_seq, 0); 83 entity->fence_context = dma_fence_context_alloc(2); 84 85 return 0; 86 } 87 EXPORT_SYMBOL(drm_sched_entity_init); 88 89 /** 90 * drm_sched_entity_is_idle - Check if entity is idle 91 * 92 * @entity: scheduler entity 93 * 94 * Returns true if the entity does not have any unscheduled jobs. 95 */ 96 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) 97 { 98 rmb(); /* for list_empty to work without lock */ 99 100 if (list_empty(&entity->list) || 101 spsc_queue_count(&entity->job_queue) == 0) 102 return true; 103 104 return false; 105 } 106 107 /** 108 * drm_sched_entity_is_ready - Check if entity is ready 109 * 110 * @entity: scheduler entity 111 * 112 * Return true if entity could provide a job. 113 */ 114 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) 115 { 116 if (spsc_queue_peek(&entity->job_queue) == NULL) 117 return false; 118 119 if (READ_ONCE(entity->dependency)) 120 return false; 121 122 return true; 123 } 124 125 /** 126 * drm_sched_entity_get_free_sched - Get the rq from rq_list with least load 127 * 128 * @entity: scheduler entity 129 * 130 * Return the pointer to the rq with least load. 131 */ 132 static struct drm_sched_rq * 133 drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) 134 { 135 struct drm_sched_rq *rq = NULL; 136 unsigned int min_jobs = UINT_MAX, num_jobs; 137 int i; 138 139 for (i = 0; i < entity->num_rq_list; ++i) { 140 struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched; 141 142 if (!entity->rq_list[i]->sched->ready) { 143 DRM_WARN("sched%s is not ready, skipping", sched->name); 144 continue; 145 } 146 147 num_jobs = atomic_read(&sched->num_jobs); 148 if (num_jobs < min_jobs) { 149 min_jobs = num_jobs; 150 rq = entity->rq_list[i]; 151 } 152 } 153 154 return rq; 155 } 156 157 /** 158 * drm_sched_entity_flush - Flush a context entity 159 * 160 * @entity: scheduler entity 161 * @timeout: time to wait in for Q to become empty in jiffies. 162 * 163 * Splitting drm_sched_entity_fini() into two functions, The first one does the 164 * waiting, removes the entity from the runqueue and returns an error when the 165 * process was killed. 166 * 167 * Returns the remaining time in jiffies left from the input timeout 168 */ 169 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) 170 { 171 struct drm_gpu_scheduler *sched; 172 struct task_struct *last_user; 173 long ret = timeout; 174 175 if (!entity->rq) 176 return 0; 177 178 sched = entity->rq->sched; 179 /** 180 * The client will not queue more IBs during this fini, consume existing 181 * queued IBs or discard them on SIGKILL 182 */ 183 if (current->flags & PF_EXITING) { 184 if (timeout) 185 ret = wait_event_timeout( 186 sched->job_scheduled, 187 drm_sched_entity_is_idle(entity), 188 timeout); 189 } else { 190 wait_event_killable(sched->job_scheduled, 191 drm_sched_entity_is_idle(entity)); 192 } 193 194 /* For killed process disable any more IBs enqueue right now */ 195 last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); 196 if ((!last_user || last_user == current->group_leader) && 197 (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) { 198 spin_lock(&entity->rq_lock); 199 entity->stopped = true; 200 drm_sched_rq_remove_entity(entity->rq, entity); 201 spin_unlock(&entity->rq_lock); 202 } 203 204 return ret; 205 } 206 EXPORT_SYMBOL(drm_sched_entity_flush); 207 208 /** 209 * drm_sched_entity_kill_jobs - helper for drm_sched_entity_kill_jobs 210 * 211 * @f: signaled fence 212 * @cb: our callback structure 213 * 214 * Signal the scheduler finished fence when the entity in question is killed. 215 */ 216 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 217 struct dma_fence_cb *cb) 218 { 219 struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 220 finish_cb); 221 222 drm_sched_fence_finished(job->s_fence); 223 WARN_ON(job->s_fence->parent); 224 job->sched->ops->free_job(job); 225 } 226 227 /** 228 * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed 229 * 230 * @entity: entity which is cleaned up 231 * 232 * Makes sure that all remaining jobs in an entity are killed before it is 233 * destroyed. 234 */ 235 static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) 236 { 237 struct drm_sched_job *job; 238 int r; 239 240 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { 241 struct drm_sched_fence *s_fence = job->s_fence; 242 243 drm_sched_fence_scheduled(s_fence); 244 dma_fence_set_error(&s_fence->finished, -ESRCH); 245 246 /* 247 * When pipe is hanged by older entity, new entity might 248 * not even have chance to submit it's first job to HW 249 * and so entity->last_scheduled will remain NULL 250 */ 251 if (!entity->last_scheduled) { 252 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 253 continue; 254 } 255 256 r = dma_fence_add_callback(entity->last_scheduled, 257 &job->finish_cb, 258 drm_sched_entity_kill_jobs_cb); 259 if (r == -ENOENT) 260 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 261 else if (r) 262 DRM_ERROR("fence add callback failed (%d)\n", r); 263 } 264 } 265 266 /** 267 * drm_sched_entity_cleanup - Destroy a context entity 268 * 269 * @entity: scheduler entity 270 * 271 * This should be called after @drm_sched_entity_do_release. It goes over the 272 * entity and signals all jobs with an error code if the process was killed. 273 * 274 */ 275 void drm_sched_entity_fini(struct drm_sched_entity *entity) 276 { 277 struct drm_gpu_scheduler *sched = NULL; 278 279 if (entity->rq) { 280 sched = entity->rq->sched; 281 drm_sched_rq_remove_entity(entity->rq, entity); 282 } 283 284 /* Consumption of existing IBs wasn't completed. Forcefully 285 * remove them here. 286 */ 287 if (spsc_queue_count(&entity->job_queue)) { 288 if (sched) { 289 /* Park the kernel for a moment to make sure it isn't processing 290 * our enity. 291 */ 292 kthread_park(sched->thread); 293 kthread_unpark(sched->thread); 294 } 295 if (entity->dependency) { 296 dma_fence_remove_callback(entity->dependency, 297 &entity->cb); 298 dma_fence_put(entity->dependency); 299 entity->dependency = NULL; 300 } 301 302 drm_sched_entity_kill_jobs(entity); 303 } 304 305 dma_fence_put(entity->last_scheduled); 306 entity->last_scheduled = NULL; 307 kfree(entity->rq_list); 308 } 309 EXPORT_SYMBOL(drm_sched_entity_fini); 310 311 /** 312 * drm_sched_entity_fini - Destroy a context entity 313 * 314 * @entity: scheduler entity 315 * 316 * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() 317 */ 318 void drm_sched_entity_destroy(struct drm_sched_entity *entity) 319 { 320 drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); 321 drm_sched_entity_fini(entity); 322 } 323 EXPORT_SYMBOL(drm_sched_entity_destroy); 324 325 /** 326 * drm_sched_entity_clear_dep - callback to clear the entities dependency 327 */ 328 static void drm_sched_entity_clear_dep(struct dma_fence *f, 329 struct dma_fence_cb *cb) 330 { 331 struct drm_sched_entity *entity = 332 container_of(cb, struct drm_sched_entity, cb); 333 334 entity->dependency = NULL; 335 dma_fence_put(f); 336 } 337 338 /** 339 * drm_sched_entity_clear_dep - callback to clear the entities dependency and 340 * wake up scheduler 341 */ 342 static void drm_sched_entity_wakeup(struct dma_fence *f, 343 struct dma_fence_cb *cb) 344 { 345 struct drm_sched_entity *entity = 346 container_of(cb, struct drm_sched_entity, cb); 347 348 drm_sched_entity_clear_dep(f, cb); 349 drm_sched_wakeup(entity->rq->sched); 350 } 351 352 /** 353 * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority 354 */ 355 static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq, 356 enum drm_sched_priority priority) 357 { 358 *rq = &(*rq)->sched->sched_rq[priority]; 359 } 360 361 /** 362 * drm_sched_entity_set_priority - Sets priority of the entity 363 * 364 * @entity: scheduler entity 365 * @priority: scheduler priority 366 * 367 * Update the priority of runqueus used for the entity. 368 */ 369 void drm_sched_entity_set_priority(struct drm_sched_entity *entity, 370 enum drm_sched_priority priority) 371 { 372 unsigned int i; 373 374 spin_lock(&entity->rq_lock); 375 376 for (i = 0; i < entity->num_rq_list; ++i) 377 drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority); 378 379 if (entity->rq) { 380 drm_sched_rq_remove_entity(entity->rq, entity); 381 drm_sched_entity_set_rq_priority(&entity->rq, priority); 382 drm_sched_rq_add_entity(entity->rq, entity); 383 } 384 385 spin_unlock(&entity->rq_lock); 386 } 387 EXPORT_SYMBOL(drm_sched_entity_set_priority); 388 389 /** 390 * drm_sched_entity_add_dependency_cb - add callback for the entities dependency 391 * 392 * @entity: entity with dependency 393 * 394 * Add a callback to the current dependency of the entity to wake up the 395 * scheduler when the entity becomes available. 396 */ 397 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) 398 { 399 struct drm_gpu_scheduler *sched = entity->rq->sched; 400 struct dma_fence *fence = entity->dependency; 401 struct drm_sched_fence *s_fence; 402 403 if (fence->context == entity->fence_context || 404 fence->context == entity->fence_context + 1) { 405 /* 406 * Fence is a scheduled/finished fence from a job 407 * which belongs to the same entity, we can ignore 408 * fences from ourself 409 */ 410 dma_fence_put(entity->dependency); 411 return false; 412 } 413 414 s_fence = to_drm_sched_fence(fence); 415 if (s_fence && s_fence->sched == sched) { 416 417 /* 418 * Fence is from the same scheduler, only need to wait for 419 * it to be scheduled 420 */ 421 fence = dma_fence_get(&s_fence->scheduled); 422 dma_fence_put(entity->dependency); 423 entity->dependency = fence; 424 if (!dma_fence_add_callback(fence, &entity->cb, 425 drm_sched_entity_clear_dep)) 426 return true; 427 428 /* Ignore it when it is already scheduled */ 429 dma_fence_put(fence); 430 return false; 431 } 432 433 if (!dma_fence_add_callback(entity->dependency, &entity->cb, 434 drm_sched_entity_wakeup)) 435 return true; 436 437 dma_fence_put(entity->dependency); 438 return false; 439 } 440 441 /** 442 * drm_sched_entity_pop_job - get a ready to be scheduled job from the entity 443 * 444 * @entity: entity to get the job from 445 * 446 * Process all dependencies and try to get one job from the entities queue. 447 */ 448 struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) 449 { 450 struct drm_gpu_scheduler *sched = entity->rq->sched; 451 struct drm_sched_job *sched_job; 452 453 sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); 454 if (!sched_job) 455 return NULL; 456 457 while ((entity->dependency = 458 sched->ops->dependency(sched_job, entity))) { 459 trace_drm_sched_job_wait_dep(sched_job, entity->dependency); 460 461 if (drm_sched_entity_add_dependency_cb(entity)) 462 return NULL; 463 } 464 465 /* skip jobs from entity that marked guilty */ 466 if (entity->guilty && atomic_read(entity->guilty)) 467 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); 468 469 dma_fence_put(entity->last_scheduled); 470 entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); 471 472 spsc_queue_pop(&entity->job_queue); 473 return sched_job; 474 } 475 476 /** 477 * drm_sched_entity_select_rq - select a new rq for the entity 478 * 479 * @entity: scheduler entity 480 * 481 * Check all prerequisites and select a new rq for the entity for load 482 * balancing. 483 */ 484 void drm_sched_entity_select_rq(struct drm_sched_entity *entity) 485 { 486 struct dma_fence *fence; 487 struct drm_sched_rq *rq; 488 489 if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1) 490 return; 491 492 fence = READ_ONCE(entity->last_scheduled); 493 if (fence && !dma_fence_is_signaled(fence)) 494 return; 495 496 rq = drm_sched_entity_get_free_sched(entity); 497 if (rq == entity->rq) 498 return; 499 500 spin_lock(&entity->rq_lock); 501 drm_sched_rq_remove_entity(entity->rq, entity); 502 entity->rq = rq; 503 spin_unlock(&entity->rq_lock); 504 } 505 506 /** 507 * drm_sched_entity_push_job - Submit a job to the entity's job queue 508 * 509 * @sched_job: job to submit 510 * @entity: scheduler entity 511 * 512 * Note: To guarantee that the order of insertion to queue matches 513 * the job's fence sequence number this function should be 514 * called with drm_sched_job_init under common lock. 515 * 516 * Returns 0 for success, negative error code otherwise. 517 */ 518 void drm_sched_entity_push_job(struct drm_sched_job *sched_job, 519 struct drm_sched_entity *entity) 520 { 521 bool first; 522 523 trace_drm_sched_job(sched_job, entity); 524 atomic_inc(&entity->rq->sched->num_jobs); 525 WRITE_ONCE(entity->last_user, current->group_leader); 526 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); 527 528 /* first job wakes up scheduler */ 529 if (first) { 530 /* Add the entity to the run queue */ 531 spin_lock(&entity->rq_lock); 532 if (entity->stopped) { 533 spin_unlock(&entity->rq_lock); 534 535 DRM_ERROR("Trying to push to a killed entity\n"); 536 return; 537 } 538 drm_sched_rq_add_entity(entity->rq, entity); 539 spin_unlock(&entity->rq_lock); 540 drm_sched_wakeup(entity->rq->sched); 541 } 542 } 543 EXPORT_SYMBOL(drm_sched_entity_push_job); 544