1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/kthread.h> 25 #include <drm/gpu_scheduler.h> 26 27 #include "gpu_scheduler_trace.h" 28 29 #define to_drm_sched_job(sched_job) \ 30 container_of((sched_job), struct drm_sched_job, queue_node) 31 32 /** 33 * drm_sched_entity_init - Init a context entity used by scheduler when 34 * submit to HW ring. 35 * 36 * @entity: scheduler entity to init 37 * @rq_list: the list of run queue on which jobs from this 38 * entity can be submitted 39 * @num_rq_list: number of run queue in rq_list 40 * @guilty: atomic_t set to 1 when a job on this queue 41 * is found to be guilty causing a timeout 42 * 43 * Note: the rq_list should have atleast one element to schedule 44 * the entity 45 * 46 * Returns 0 on success or a negative error code on failure. 47 */ 48 int drm_sched_entity_init(struct drm_sched_entity *entity, 49 struct drm_sched_rq **rq_list, 50 unsigned int num_rq_list, 51 atomic_t *guilty) 52 { 53 int i; 54 55 if (!(entity && rq_list && num_rq_list > 0 && rq_list[0])) 56 return -EINVAL; 57 58 memset(entity, 0, sizeof(struct drm_sched_entity)); 59 INIT_LIST_HEAD(&entity->list); 60 entity->rq = rq_list[0]; 61 entity->guilty = guilty; 62 entity->num_rq_list = num_rq_list; 63 entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *), 64 GFP_KERNEL); 65 if (!entity->rq_list) 66 return -ENOMEM; 67 68 for (i = 0; i < num_rq_list; ++i) 69 entity->rq_list[i] = rq_list[i]; 70 entity->last_scheduled = NULL; 71 72 spin_lock_init(&entity->rq_lock); 73 spsc_queue_init(&entity->job_queue); 74 75 atomic_set(&entity->fence_seq, 0); 76 entity->fence_context = dma_fence_context_alloc(2); 77 78 return 0; 79 } 80 EXPORT_SYMBOL(drm_sched_entity_init); 81 82 /** 83 * drm_sched_entity_is_idle - Check if entity is idle 84 * 85 * @entity: scheduler entity 86 * 87 * Returns true if the entity does not have any unscheduled jobs. 88 */ 89 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) 90 { 91 rmb(); /* for list_empty to work without lock */ 92 93 if (list_empty(&entity->list) || 94 spsc_queue_peek(&entity->job_queue) == NULL) 95 return true; 96 97 return false; 98 } 99 100 /** 101 * drm_sched_entity_is_ready - Check if entity is ready 102 * 103 * @entity: scheduler entity 104 * 105 * Return true if entity could provide a job. 106 */ 107 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) 108 { 109 if (spsc_queue_peek(&entity->job_queue) == NULL) 110 return false; 111 112 if (READ_ONCE(entity->dependency)) 113 return false; 114 115 return true; 116 } 117 118 /** 119 * drm_sched_entity_get_free_sched - Get the rq from rq_list with least load 120 * 121 * @entity: scheduler entity 122 * 123 * Return the pointer to the rq with least load. 124 */ 125 static struct drm_sched_rq * 126 drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) 127 { 128 struct drm_sched_rq *rq = NULL; 129 unsigned int min_jobs = UINT_MAX, num_jobs; 130 int i; 131 132 for (i = 0; i < entity->num_rq_list; ++i) { 133 num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs); 134 if (num_jobs < min_jobs) { 135 min_jobs = num_jobs; 136 rq = entity->rq_list[i]; 137 } 138 } 139 140 return rq; 141 } 142 143 /** 144 * drm_sched_entity_flush - Flush a context entity 145 * 146 * @entity: scheduler entity 147 * @timeout: time to wait in for Q to become empty in jiffies. 148 * 149 * Splitting drm_sched_entity_fini() into two functions, The first one does the 150 * waiting, removes the entity from the runqueue and returns an error when the 151 * process was killed. 152 * 153 * Returns the remaining time in jiffies left from the input timeout 154 */ 155 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) 156 { 157 struct drm_gpu_scheduler *sched; 158 struct task_struct *last_user; 159 long ret = timeout; 160 161 sched = entity->rq->sched; 162 /** 163 * The client will not queue more IBs during this fini, consume existing 164 * queued IBs or discard them on SIGKILL 165 */ 166 if (current->flags & PF_EXITING) { 167 if (timeout) 168 ret = wait_event_timeout( 169 sched->job_scheduled, 170 drm_sched_entity_is_idle(entity), 171 timeout); 172 } else { 173 wait_event_killable(sched->job_scheduled, 174 drm_sched_entity_is_idle(entity)); 175 } 176 177 /* For killed process disable any more IBs enqueue right now */ 178 last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); 179 if ((!last_user || last_user == current->group_leader) && 180 (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) { 181 spin_lock(&entity->rq_lock); 182 entity->stopped = true; 183 drm_sched_rq_remove_entity(entity->rq, entity); 184 spin_unlock(&entity->rq_lock); 185 } 186 187 return ret; 188 } 189 EXPORT_SYMBOL(drm_sched_entity_flush); 190 191 /** 192 * drm_sched_entity_kill_jobs - helper for drm_sched_entity_kill_jobs 193 * 194 * @f: signaled fence 195 * @cb: our callback structure 196 * 197 * Signal the scheduler finished fence when the entity in question is killed. 198 */ 199 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 200 struct dma_fence_cb *cb) 201 { 202 struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 203 finish_cb); 204 205 drm_sched_fence_finished(job->s_fence); 206 WARN_ON(job->s_fence->parent); 207 dma_fence_put(&job->s_fence->finished); 208 job->sched->ops->free_job(job); 209 } 210 211 /** 212 * drm_sched_entity_kill_jobs - Make sure all remaining jobs are killed 213 * 214 * @entity: entity which is cleaned up 215 * 216 * Makes sure that all remaining jobs in an entity are killed before it is 217 * destroyed. 218 */ 219 static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) 220 { 221 struct drm_sched_job *job; 222 int r; 223 224 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { 225 struct drm_sched_fence *s_fence = job->s_fence; 226 227 drm_sched_fence_scheduled(s_fence); 228 dma_fence_set_error(&s_fence->finished, -ESRCH); 229 230 /* 231 * When pipe is hanged by older entity, new entity might 232 * not even have chance to submit it's first job to HW 233 * and so entity->last_scheduled will remain NULL 234 */ 235 if (!entity->last_scheduled) { 236 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 237 continue; 238 } 239 240 r = dma_fence_add_callback(entity->last_scheduled, 241 &job->finish_cb, 242 drm_sched_entity_kill_jobs_cb); 243 if (r == -ENOENT) 244 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); 245 else if (r) 246 DRM_ERROR("fence add callback failed (%d)\n", r); 247 } 248 } 249 250 /** 251 * drm_sched_entity_cleanup - Destroy a context entity 252 * 253 * @entity: scheduler entity 254 * 255 * This should be called after @drm_sched_entity_do_release. It goes over the 256 * entity and signals all jobs with an error code if the process was killed. 257 * 258 */ 259 void drm_sched_entity_fini(struct drm_sched_entity *entity) 260 { 261 struct drm_gpu_scheduler *sched; 262 263 sched = entity->rq->sched; 264 drm_sched_rq_remove_entity(entity->rq, entity); 265 266 /* Consumption of existing IBs wasn't completed. Forcefully 267 * remove them here. 268 */ 269 if (spsc_queue_peek(&entity->job_queue)) { 270 /* Park the kernel for a moment to make sure it isn't processing 271 * our enity. 272 */ 273 kthread_park(sched->thread); 274 kthread_unpark(sched->thread); 275 if (entity->dependency) { 276 dma_fence_remove_callback(entity->dependency, 277 &entity->cb); 278 dma_fence_put(entity->dependency); 279 entity->dependency = NULL; 280 } 281 282 drm_sched_entity_kill_jobs(entity); 283 } 284 285 dma_fence_put(entity->last_scheduled); 286 entity->last_scheduled = NULL; 287 kfree(entity->rq_list); 288 } 289 EXPORT_SYMBOL(drm_sched_entity_fini); 290 291 /** 292 * drm_sched_entity_fini - Destroy a context entity 293 * 294 * @entity: scheduler entity 295 * 296 * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() 297 */ 298 void drm_sched_entity_destroy(struct drm_sched_entity *entity) 299 { 300 drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); 301 drm_sched_entity_fini(entity); 302 } 303 EXPORT_SYMBOL(drm_sched_entity_destroy); 304 305 /** 306 * drm_sched_entity_clear_dep - callback to clear the entities dependency 307 */ 308 static void drm_sched_entity_clear_dep(struct dma_fence *f, 309 struct dma_fence_cb *cb) 310 { 311 struct drm_sched_entity *entity = 312 container_of(cb, struct drm_sched_entity, cb); 313 314 entity->dependency = NULL; 315 dma_fence_put(f); 316 } 317 318 /** 319 * drm_sched_entity_clear_dep - callback to clear the entities dependency and 320 * wake up scheduler 321 */ 322 static void drm_sched_entity_wakeup(struct dma_fence *f, 323 struct dma_fence_cb *cb) 324 { 325 struct drm_sched_entity *entity = 326 container_of(cb, struct drm_sched_entity, cb); 327 328 drm_sched_entity_clear_dep(f, cb); 329 drm_sched_wakeup(entity->rq->sched); 330 } 331 332 /** 333 * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority 334 */ 335 static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq, 336 enum drm_sched_priority priority) 337 { 338 *rq = &(*rq)->sched->sched_rq[priority]; 339 } 340 341 /** 342 * drm_sched_entity_set_priority - Sets priority of the entity 343 * 344 * @entity: scheduler entity 345 * @priority: scheduler priority 346 * 347 * Update the priority of runqueus used for the entity. 348 */ 349 void drm_sched_entity_set_priority(struct drm_sched_entity *entity, 350 enum drm_sched_priority priority) 351 { 352 unsigned int i; 353 354 spin_lock(&entity->rq_lock); 355 356 for (i = 0; i < entity->num_rq_list; ++i) 357 drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority); 358 359 drm_sched_rq_remove_entity(entity->rq, entity); 360 drm_sched_entity_set_rq_priority(&entity->rq, priority); 361 drm_sched_rq_add_entity(entity->rq, entity); 362 363 spin_unlock(&entity->rq_lock); 364 } 365 EXPORT_SYMBOL(drm_sched_entity_set_priority); 366 367 /** 368 * drm_sched_entity_add_dependency_cb - add callback for the entities dependency 369 * 370 * @entity: entity with dependency 371 * 372 * Add a callback to the current dependency of the entity to wake up the 373 * scheduler when the entity becomes available. 374 */ 375 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) 376 { 377 struct drm_gpu_scheduler *sched = entity->rq->sched; 378 struct dma_fence *fence = entity->dependency; 379 struct drm_sched_fence *s_fence; 380 381 if (fence->context == entity->fence_context || 382 fence->context == entity->fence_context + 1) { 383 /* 384 * Fence is a scheduled/finished fence from a job 385 * which belongs to the same entity, we can ignore 386 * fences from ourself 387 */ 388 dma_fence_put(entity->dependency); 389 return false; 390 } 391 392 s_fence = to_drm_sched_fence(fence); 393 if (s_fence && s_fence->sched == sched) { 394 395 /* 396 * Fence is from the same scheduler, only need to wait for 397 * it to be scheduled 398 */ 399 fence = dma_fence_get(&s_fence->scheduled); 400 dma_fence_put(entity->dependency); 401 entity->dependency = fence; 402 if (!dma_fence_add_callback(fence, &entity->cb, 403 drm_sched_entity_clear_dep)) 404 return true; 405 406 /* Ignore it when it is already scheduled */ 407 dma_fence_put(fence); 408 return false; 409 } 410 411 if (!dma_fence_add_callback(entity->dependency, &entity->cb, 412 drm_sched_entity_wakeup)) 413 return true; 414 415 dma_fence_put(entity->dependency); 416 return false; 417 } 418 419 /** 420 * drm_sched_entity_pop_job - get a ready to be scheduled job from the entity 421 * 422 * @entity: entity to get the job from 423 * 424 * Process all dependencies and try to get one job from the entities queue. 425 */ 426 struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) 427 { 428 struct drm_gpu_scheduler *sched = entity->rq->sched; 429 struct drm_sched_job *sched_job; 430 431 sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); 432 if (!sched_job) 433 return NULL; 434 435 while ((entity->dependency = 436 sched->ops->dependency(sched_job, entity))) { 437 438 if (drm_sched_entity_add_dependency_cb(entity)) { 439 440 trace_drm_sched_job_wait_dep(sched_job, 441 entity->dependency); 442 return NULL; 443 } 444 } 445 446 /* skip jobs from entity that marked guilty */ 447 if (entity->guilty && atomic_read(entity->guilty)) 448 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); 449 450 dma_fence_put(entity->last_scheduled); 451 entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); 452 453 spsc_queue_pop(&entity->job_queue); 454 return sched_job; 455 } 456 457 /** 458 * drm_sched_entity_select_rq - select a new rq for the entity 459 * 460 * @entity: scheduler entity 461 * 462 * Check all prerequisites and select a new rq for the entity for load 463 * balancing. 464 */ 465 void drm_sched_entity_select_rq(struct drm_sched_entity *entity) 466 { 467 struct dma_fence *fence; 468 struct drm_sched_rq *rq; 469 470 if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1) 471 return; 472 473 fence = READ_ONCE(entity->last_scheduled); 474 if (fence && !dma_fence_is_signaled(fence)) 475 return; 476 477 rq = drm_sched_entity_get_free_sched(entity); 478 if (rq == entity->rq) 479 return; 480 481 spin_lock(&entity->rq_lock); 482 drm_sched_rq_remove_entity(entity->rq, entity); 483 entity->rq = rq; 484 spin_unlock(&entity->rq_lock); 485 } 486 487 /** 488 * drm_sched_entity_push_job - Submit a job to the entity's job queue 489 * 490 * @sched_job: job to submit 491 * @entity: scheduler entity 492 * 493 * Note: To guarantee that the order of insertion to queue matches 494 * the job's fence sequence number this function should be 495 * called with drm_sched_job_init under common lock. 496 * 497 * Returns 0 for success, negative error code otherwise. 498 */ 499 void drm_sched_entity_push_job(struct drm_sched_job *sched_job, 500 struct drm_sched_entity *entity) 501 { 502 bool first; 503 504 trace_drm_sched_job(sched_job, entity); 505 atomic_inc(&entity->rq->sched->num_jobs); 506 WRITE_ONCE(entity->last_user, current->group_leader); 507 first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); 508 509 /* first job wakes up scheduler */ 510 if (first) { 511 /* Add the entity to the run queue */ 512 spin_lock(&entity->rq_lock); 513 if (entity->stopped) { 514 spin_unlock(&entity->rq_lock); 515 516 DRM_ERROR("Trying to push to a killed entity\n"); 517 return; 518 } 519 drm_sched_rq_add_entity(entity->rq, entity); 520 spin_unlock(&entity->rq_lock); 521 drm_sched_wakeup(entity->rq->sched); 522 } 523 } 524 EXPORT_SYMBOL(drm_sched_entity_push_job); 525