1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 for (i = 0; i < state->user_state.bo_count; i++) 57 drm_gem_object_unreference_unlocked(state->bo[i]); 58 59 kfree(state); 60 } 61 62 int 63 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 64 struct drm_file *file_priv) 65 { 66 struct drm_vc4_get_hang_state *get_state = data; 67 struct drm_vc4_get_hang_state_bo *bo_state; 68 struct vc4_hang_state *kernel_state; 69 struct drm_vc4_get_hang_state *state; 70 struct vc4_dev *vc4 = to_vc4_dev(dev); 71 unsigned long irqflags; 72 u32 i; 73 int ret = 0; 74 75 spin_lock_irqsave(&vc4->job_lock, irqflags); 76 kernel_state = vc4->hang_state; 77 if (!kernel_state) { 78 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 79 return -ENOENT; 80 } 81 state = &kernel_state->user_state; 82 83 /* If the user's array isn't big enough, just return the 84 * required array size. 85 */ 86 if (get_state->bo_count < state->bo_count) { 87 get_state->bo_count = state->bo_count; 88 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 89 return 0; 90 } 91 92 vc4->hang_state = NULL; 93 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 94 95 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 96 state->bo = get_state->bo; 97 memcpy(get_state, state, sizeof(*state)); 98 99 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 100 if (!bo_state) { 101 ret = -ENOMEM; 102 goto err_free; 103 } 104 105 for (i = 0; i < state->bo_count; i++) { 106 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 107 u32 handle; 108 109 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 110 &handle); 111 112 if (ret) { 113 state->bo_count = i - 1; 114 goto err; 115 } 116 bo_state[i].handle = handle; 117 bo_state[i].paddr = vc4_bo->base.paddr; 118 bo_state[i].size = vc4_bo->base.base.size; 119 } 120 121 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 122 bo_state, 123 state->bo_count * sizeof(*bo_state))) 124 ret = -EFAULT; 125 126 kfree(bo_state); 127 128 err_free: 129 130 vc4_free_hang_state(dev, kernel_state); 131 132 err: 133 return ret; 134 } 135 136 static void 137 vc4_save_hang_state(struct drm_device *dev) 138 { 139 struct vc4_dev *vc4 = to_vc4_dev(dev); 140 struct drm_vc4_get_hang_state *state; 141 struct vc4_hang_state *kernel_state; 142 struct vc4_exec_info *exec[2]; 143 struct vc4_bo *bo; 144 unsigned long irqflags; 145 unsigned int i, j, unref_list_count, prev_idx; 146 147 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 148 if (!kernel_state) 149 return; 150 151 state = &kernel_state->user_state; 152 153 spin_lock_irqsave(&vc4->job_lock, irqflags); 154 exec[0] = vc4_first_bin_job(vc4); 155 exec[1] = vc4_first_render_job(vc4); 156 if (!exec[0] && !exec[1]) { 157 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 158 return; 159 } 160 161 /* Get the bos from both binner and renderer into hang state. */ 162 state->bo_count = 0; 163 for (i = 0; i < 2; i++) { 164 if (!exec[i]) 165 continue; 166 167 unref_list_count = 0; 168 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 169 unref_list_count++; 170 state->bo_count += exec[i]->bo_count + unref_list_count; 171 } 172 173 kernel_state->bo = kcalloc(state->bo_count, 174 sizeof(*kernel_state->bo), GFP_ATOMIC); 175 176 if (!kernel_state->bo) { 177 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 178 return; 179 } 180 181 prev_idx = 0; 182 for (i = 0; i < 2; i++) { 183 if (!exec[i]) 184 continue; 185 186 for (j = 0; j < exec[i]->bo_count; j++) { 187 drm_gem_object_reference(&exec[i]->bo[j]->base); 188 kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 189 } 190 191 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 192 drm_gem_object_reference(&bo->base.base); 193 kernel_state->bo[j + prev_idx] = &bo->base.base; 194 j++; 195 } 196 prev_idx = j + 1; 197 } 198 199 if (exec[0]) 200 state->start_bin = exec[0]->ct0ca; 201 if (exec[1]) 202 state->start_render = exec[1]->ct1ca; 203 204 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 205 206 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 207 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 208 209 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 210 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 211 212 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 213 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 214 215 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 216 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 217 218 state->bpca = V3D_READ(V3D_BPCA); 219 state->bpcs = V3D_READ(V3D_BPCS); 220 state->bpoa = V3D_READ(V3D_BPOA); 221 state->bpos = V3D_READ(V3D_BPOS); 222 223 state->vpmbase = V3D_READ(V3D_VPMBASE); 224 225 state->dbge = V3D_READ(V3D_DBGE); 226 state->fdbgo = V3D_READ(V3D_FDBGO); 227 state->fdbgb = V3D_READ(V3D_FDBGB); 228 state->fdbgr = V3D_READ(V3D_FDBGR); 229 state->fdbgs = V3D_READ(V3D_FDBGS); 230 state->errstat = V3D_READ(V3D_ERRSTAT); 231 232 spin_lock_irqsave(&vc4->job_lock, irqflags); 233 if (vc4->hang_state) { 234 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 235 vc4_free_hang_state(dev, kernel_state); 236 } else { 237 vc4->hang_state = kernel_state; 238 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 239 } 240 } 241 242 static void 243 vc4_reset(struct drm_device *dev) 244 { 245 struct vc4_dev *vc4 = to_vc4_dev(dev); 246 247 DRM_INFO("Resetting GPU.\n"); 248 249 mutex_lock(&vc4->power_lock); 250 if (vc4->power_refcount) { 251 /* Power the device off and back on the by dropping the 252 * reference on runtime PM. 253 */ 254 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 255 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 256 } 257 mutex_unlock(&vc4->power_lock); 258 259 vc4_irq_reset(dev); 260 261 /* Rearm the hangcheck -- another job might have been waiting 262 * for our hung one to get kicked off, and vc4_irq_reset() 263 * would have started it. 264 */ 265 vc4_queue_hangcheck(dev); 266 } 267 268 static void 269 vc4_reset_work(struct work_struct *work) 270 { 271 struct vc4_dev *vc4 = 272 container_of(work, struct vc4_dev, hangcheck.reset_work); 273 274 vc4_save_hang_state(vc4->dev); 275 276 vc4_reset(vc4->dev); 277 } 278 279 static void 280 vc4_hangcheck_elapsed(unsigned long data) 281 { 282 struct drm_device *dev = (struct drm_device *)data; 283 struct vc4_dev *vc4 = to_vc4_dev(dev); 284 uint32_t ct0ca, ct1ca; 285 unsigned long irqflags; 286 struct vc4_exec_info *bin_exec, *render_exec; 287 288 spin_lock_irqsave(&vc4->job_lock, irqflags); 289 290 bin_exec = vc4_first_bin_job(vc4); 291 render_exec = vc4_first_render_job(vc4); 292 293 /* If idle, we can stop watching for hangs. */ 294 if (!bin_exec && !render_exec) { 295 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 return; 297 } 298 299 ct0ca = V3D_READ(V3D_CTNCA(0)); 300 ct1ca = V3D_READ(V3D_CTNCA(1)); 301 302 /* If we've made any progress in execution, rearm the timer 303 * and wait. 304 */ 305 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 306 (render_exec && ct1ca != render_exec->last_ct1ca)) { 307 if (bin_exec) 308 bin_exec->last_ct0ca = ct0ca; 309 if (render_exec) 310 render_exec->last_ct1ca = ct1ca; 311 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 312 vc4_queue_hangcheck(dev); 313 return; 314 } 315 316 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 317 318 /* We've gone too long with no progress, reset. This has to 319 * be done from a work struct, since resetting can sleep and 320 * this timer hook isn't allowed to. 321 */ 322 schedule_work(&vc4->hangcheck.reset_work); 323 } 324 325 static void 326 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 327 { 328 struct vc4_dev *vc4 = to_vc4_dev(dev); 329 330 /* Set the current and end address of the control list. 331 * Writing the end register is what starts the job. 332 */ 333 V3D_WRITE(V3D_CTNCA(thread), start); 334 V3D_WRITE(V3D_CTNEA(thread), end); 335 } 336 337 int 338 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 339 bool interruptible) 340 { 341 struct vc4_dev *vc4 = to_vc4_dev(dev); 342 int ret = 0; 343 unsigned long timeout_expire; 344 DEFINE_WAIT(wait); 345 346 if (vc4->finished_seqno >= seqno) 347 return 0; 348 349 if (timeout_ns == 0) 350 return -ETIME; 351 352 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 353 354 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 355 for (;;) { 356 prepare_to_wait(&vc4->job_wait_queue, &wait, 357 interruptible ? TASK_INTERRUPTIBLE : 358 TASK_UNINTERRUPTIBLE); 359 360 if (interruptible && signal_pending(current)) { 361 ret = -ERESTARTSYS; 362 break; 363 } 364 365 if (vc4->finished_seqno >= seqno) 366 break; 367 368 if (timeout_ns != ~0ull) { 369 if (time_after_eq(jiffies, timeout_expire)) { 370 ret = -ETIME; 371 break; 372 } 373 schedule_timeout(timeout_expire - jiffies); 374 } else { 375 schedule(); 376 } 377 } 378 379 finish_wait(&vc4->job_wait_queue, &wait); 380 trace_vc4_wait_for_seqno_end(dev, seqno); 381 382 return ret; 383 } 384 385 static void 386 vc4_flush_caches(struct drm_device *dev) 387 { 388 struct vc4_dev *vc4 = to_vc4_dev(dev); 389 390 /* Flush the GPU L2 caches. These caches sit on top of system 391 * L3 (the 128kb or so shared with the CPU), and are 392 * non-allocating in the L3. 393 */ 394 V3D_WRITE(V3D_L2CACTL, 395 V3D_L2CACTL_L2CCLR); 396 397 V3D_WRITE(V3D_SLCACTL, 398 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 399 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 400 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 401 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 402 } 403 404 /* Sets the registers for the next job to be actually be executed in 405 * the hardware. 406 * 407 * The job_lock should be held during this. 408 */ 409 void 410 vc4_submit_next_bin_job(struct drm_device *dev) 411 { 412 struct vc4_dev *vc4 = to_vc4_dev(dev); 413 struct vc4_exec_info *exec; 414 415 again: 416 exec = vc4_first_bin_job(vc4); 417 if (!exec) 418 return; 419 420 vc4_flush_caches(dev); 421 422 /* Disable the binner's pre-loaded overflow memory address */ 423 V3D_WRITE(V3D_BPOA, 0); 424 V3D_WRITE(V3D_BPOS, 0); 425 426 /* Either put the job in the binner if it uses the binner, or 427 * immediately move it to the to-be-rendered queue. 428 */ 429 if (exec->ct0ca != exec->ct0ea) { 430 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 431 } else { 432 vc4_move_job_to_render(dev, exec); 433 goto again; 434 } 435 } 436 437 void 438 vc4_submit_next_render_job(struct drm_device *dev) 439 { 440 struct vc4_dev *vc4 = to_vc4_dev(dev); 441 struct vc4_exec_info *exec = vc4_first_render_job(vc4); 442 443 if (!exec) 444 return; 445 446 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 447 } 448 449 void 450 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 451 { 452 struct vc4_dev *vc4 = to_vc4_dev(dev); 453 bool was_empty = list_empty(&vc4->render_job_list); 454 455 list_move_tail(&exec->head, &vc4->render_job_list); 456 if (was_empty) 457 vc4_submit_next_render_job(dev); 458 } 459 460 static void 461 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 462 { 463 struct vc4_bo *bo; 464 unsigned i; 465 466 for (i = 0; i < exec->bo_count; i++) { 467 bo = to_vc4_bo(&exec->bo[i]->base); 468 bo->seqno = seqno; 469 } 470 471 list_for_each_entry(bo, &exec->unref_list, unref_head) { 472 bo->seqno = seqno; 473 } 474 } 475 476 /* Queues a struct vc4_exec_info for execution. If no job is 477 * currently executing, then submits it. 478 * 479 * Unlike most GPUs, our hardware only handles one command list at a 480 * time. To queue multiple jobs at once, we'd need to edit the 481 * previous command list to have a jump to the new one at the end, and 482 * then bump the end address. That's a change for a later date, 483 * though. 484 */ 485 static void 486 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 487 { 488 struct vc4_dev *vc4 = to_vc4_dev(dev); 489 uint64_t seqno; 490 unsigned long irqflags; 491 492 spin_lock_irqsave(&vc4->job_lock, irqflags); 493 494 seqno = ++vc4->emit_seqno; 495 exec->seqno = seqno; 496 vc4_update_bo_seqnos(exec, seqno); 497 498 list_add_tail(&exec->head, &vc4->bin_job_list); 499 500 /* If no job was executing, kick ours off. Otherwise, it'll 501 * get started when the previous job's flush done interrupt 502 * occurs. 503 */ 504 if (vc4_first_bin_job(vc4) == exec) { 505 vc4_submit_next_bin_job(dev); 506 vc4_queue_hangcheck(dev); 507 } 508 509 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 510 } 511 512 /** 513 * Looks up a bunch of GEM handles for BOs and stores the array for 514 * use in the command validator that actually writes relocated 515 * addresses pointing to them. 516 */ 517 static int 518 vc4_cl_lookup_bos(struct drm_device *dev, 519 struct drm_file *file_priv, 520 struct vc4_exec_info *exec) 521 { 522 struct drm_vc4_submit_cl *args = exec->args; 523 uint32_t *handles; 524 int ret = 0; 525 int i; 526 527 exec->bo_count = args->bo_handle_count; 528 529 if (!exec->bo_count) { 530 /* See comment on bo_index for why we have to check 531 * this. 532 */ 533 DRM_ERROR("Rendering requires BOs to validate\n"); 534 return -EINVAL; 535 } 536 537 exec->bo = drm_calloc_large(exec->bo_count, 538 sizeof(struct drm_gem_cma_object *)); 539 if (!exec->bo) { 540 DRM_ERROR("Failed to allocate validated BO pointers\n"); 541 return -ENOMEM; 542 } 543 544 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 545 if (!handles) { 546 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 547 goto fail; 548 } 549 550 ret = copy_from_user(handles, 551 (void __user *)(uintptr_t)args->bo_handles, 552 exec->bo_count * sizeof(uint32_t)); 553 if (ret) { 554 DRM_ERROR("Failed to copy in GEM handles\n"); 555 goto fail; 556 } 557 558 spin_lock(&file_priv->table_lock); 559 for (i = 0; i < exec->bo_count; i++) { 560 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 561 handles[i]); 562 if (!bo) { 563 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 564 i, handles[i]); 565 ret = -EINVAL; 566 spin_unlock(&file_priv->table_lock); 567 goto fail; 568 } 569 drm_gem_object_reference(bo); 570 exec->bo[i] = (struct drm_gem_cma_object *)bo; 571 } 572 spin_unlock(&file_priv->table_lock); 573 574 fail: 575 drm_free_large(handles); 576 return ret; 577 } 578 579 static int 580 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 581 { 582 struct drm_vc4_submit_cl *args = exec->args; 583 void *temp = NULL; 584 void *bin; 585 int ret = 0; 586 uint32_t bin_offset = 0; 587 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 588 16); 589 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 590 uint32_t exec_size = uniforms_offset + args->uniforms_size; 591 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 592 args->shader_rec_count); 593 struct vc4_bo *bo; 594 595 if (uniforms_offset < shader_rec_offset || 596 exec_size < uniforms_offset || 597 args->shader_rec_count >= (UINT_MAX / 598 sizeof(struct vc4_shader_state)) || 599 temp_size < exec_size) { 600 DRM_ERROR("overflow in exec arguments\n"); 601 goto fail; 602 } 603 604 /* Allocate space where we'll store the copied in user command lists 605 * and shader records. 606 * 607 * We don't just copy directly into the BOs because we need to 608 * read the contents back for validation, and I think the 609 * bo->vaddr is uncached access. 610 */ 611 temp = drm_malloc_ab(temp_size, 1); 612 if (!temp) { 613 DRM_ERROR("Failed to allocate storage for copying " 614 "in bin/render CLs.\n"); 615 ret = -ENOMEM; 616 goto fail; 617 } 618 bin = temp + bin_offset; 619 exec->shader_rec_u = temp + shader_rec_offset; 620 exec->uniforms_u = temp + uniforms_offset; 621 exec->shader_state = temp + exec_size; 622 exec->shader_state_size = args->shader_rec_count; 623 624 if (copy_from_user(bin, 625 (void __user *)(uintptr_t)args->bin_cl, 626 args->bin_cl_size)) { 627 ret = -EFAULT; 628 goto fail; 629 } 630 631 if (copy_from_user(exec->shader_rec_u, 632 (void __user *)(uintptr_t)args->shader_rec, 633 args->shader_rec_size)) { 634 ret = -EFAULT; 635 goto fail; 636 } 637 638 if (copy_from_user(exec->uniforms_u, 639 (void __user *)(uintptr_t)args->uniforms, 640 args->uniforms_size)) { 641 ret = -EFAULT; 642 goto fail; 643 } 644 645 bo = vc4_bo_create(dev, exec_size, true); 646 if (IS_ERR(bo)) { 647 DRM_ERROR("Couldn't allocate BO for binning\n"); 648 ret = PTR_ERR(bo); 649 goto fail; 650 } 651 exec->exec_bo = &bo->base; 652 653 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 654 &exec->unref_list); 655 656 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 657 658 exec->bin_u = bin; 659 660 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 661 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 662 exec->shader_rec_size = args->shader_rec_size; 663 664 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 665 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 666 exec->uniforms_size = args->uniforms_size; 667 668 ret = vc4_validate_bin_cl(dev, 669 exec->exec_bo->vaddr + bin_offset, 670 bin, 671 exec); 672 if (ret) 673 goto fail; 674 675 ret = vc4_validate_shader_recs(dev, exec); 676 677 fail: 678 drm_free_large(temp); 679 return ret; 680 } 681 682 static void 683 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 684 { 685 struct vc4_dev *vc4 = to_vc4_dev(dev); 686 unsigned i; 687 688 if (exec->bo) { 689 for (i = 0; i < exec->bo_count; i++) 690 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); 691 drm_free_large(exec->bo); 692 } 693 694 while (!list_empty(&exec->unref_list)) { 695 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 696 struct vc4_bo, unref_head); 697 list_del(&bo->unref_head); 698 drm_gem_object_unreference_unlocked(&bo->base.base); 699 } 700 701 mutex_lock(&vc4->power_lock); 702 if (--vc4->power_refcount == 0) 703 pm_runtime_put(&vc4->v3d->pdev->dev); 704 mutex_unlock(&vc4->power_lock); 705 706 kfree(exec); 707 } 708 709 void 710 vc4_job_handle_completed(struct vc4_dev *vc4) 711 { 712 unsigned long irqflags; 713 struct vc4_seqno_cb *cb, *cb_temp; 714 715 spin_lock_irqsave(&vc4->job_lock, irqflags); 716 while (!list_empty(&vc4->job_done_list)) { 717 struct vc4_exec_info *exec = 718 list_first_entry(&vc4->job_done_list, 719 struct vc4_exec_info, head); 720 list_del(&exec->head); 721 722 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 723 vc4_complete_exec(vc4->dev, exec); 724 spin_lock_irqsave(&vc4->job_lock, irqflags); 725 } 726 727 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 728 if (cb->seqno <= vc4->finished_seqno) { 729 list_del_init(&cb->work.entry); 730 schedule_work(&cb->work); 731 } 732 } 733 734 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 735 } 736 737 static void vc4_seqno_cb_work(struct work_struct *work) 738 { 739 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 740 741 cb->func(cb); 742 } 743 744 int vc4_queue_seqno_cb(struct drm_device *dev, 745 struct vc4_seqno_cb *cb, uint64_t seqno, 746 void (*func)(struct vc4_seqno_cb *cb)) 747 { 748 struct vc4_dev *vc4 = to_vc4_dev(dev); 749 int ret = 0; 750 unsigned long irqflags; 751 752 cb->func = func; 753 INIT_WORK(&cb->work, vc4_seqno_cb_work); 754 755 spin_lock_irqsave(&vc4->job_lock, irqflags); 756 if (seqno > vc4->finished_seqno) { 757 cb->seqno = seqno; 758 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 759 } else { 760 schedule_work(&cb->work); 761 } 762 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 763 764 return ret; 765 } 766 767 /* Scheduled when any job has been completed, this walks the list of 768 * jobs that had completed and unrefs their BOs and frees their exec 769 * structs. 770 */ 771 static void 772 vc4_job_done_work(struct work_struct *work) 773 { 774 struct vc4_dev *vc4 = 775 container_of(work, struct vc4_dev, job_done_work); 776 777 vc4_job_handle_completed(vc4); 778 } 779 780 static int 781 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 782 uint64_t seqno, 783 uint64_t *timeout_ns) 784 { 785 unsigned long start = jiffies; 786 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 787 788 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 789 uint64_t delta = jiffies_to_nsecs(jiffies - start); 790 791 if (*timeout_ns >= delta) 792 *timeout_ns -= delta; 793 } 794 795 return ret; 796 } 797 798 int 799 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 800 struct drm_file *file_priv) 801 { 802 struct drm_vc4_wait_seqno *args = data; 803 804 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 805 &args->timeout_ns); 806 } 807 808 int 809 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 810 struct drm_file *file_priv) 811 { 812 int ret; 813 struct drm_vc4_wait_bo *args = data; 814 struct drm_gem_object *gem_obj; 815 struct vc4_bo *bo; 816 817 if (args->pad != 0) 818 return -EINVAL; 819 820 gem_obj = drm_gem_object_lookup(file_priv, args->handle); 821 if (!gem_obj) { 822 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 823 return -EINVAL; 824 } 825 bo = to_vc4_bo(gem_obj); 826 827 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 828 &args->timeout_ns); 829 830 drm_gem_object_unreference_unlocked(gem_obj); 831 return ret; 832 } 833 834 /** 835 * Submits a command list to the VC4. 836 * 837 * This is what is called batchbuffer emitting on other hardware. 838 */ 839 int 840 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 841 struct drm_file *file_priv) 842 { 843 struct vc4_dev *vc4 = to_vc4_dev(dev); 844 struct drm_vc4_submit_cl *args = data; 845 struct vc4_exec_info *exec; 846 int ret = 0; 847 848 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 849 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 850 return -EINVAL; 851 } 852 853 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 854 if (!exec) { 855 DRM_ERROR("malloc failure on exec struct\n"); 856 return -ENOMEM; 857 } 858 859 mutex_lock(&vc4->power_lock); 860 if (vc4->power_refcount++ == 0) 861 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 862 mutex_unlock(&vc4->power_lock); 863 if (ret < 0) { 864 kfree(exec); 865 return ret; 866 } 867 868 exec->args = args; 869 INIT_LIST_HEAD(&exec->unref_list); 870 871 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 872 if (ret) 873 goto fail; 874 875 if (exec->args->bin_cl_size != 0) { 876 ret = vc4_get_bcl(dev, exec); 877 if (ret) 878 goto fail; 879 } else { 880 exec->ct0ca = 0; 881 exec->ct0ea = 0; 882 } 883 884 ret = vc4_get_rcl(dev, exec); 885 if (ret) 886 goto fail; 887 888 /* Clear this out of the struct we'll be putting in the queue, 889 * since it's part of our stack. 890 */ 891 exec->args = NULL; 892 893 vc4_queue_submit(dev, exec); 894 895 /* Return the seqno for our job. */ 896 args->seqno = vc4->emit_seqno; 897 898 return 0; 899 900 fail: 901 vc4_complete_exec(vc4->dev, exec); 902 903 return ret; 904 } 905 906 void 907 vc4_gem_init(struct drm_device *dev) 908 { 909 struct vc4_dev *vc4 = to_vc4_dev(dev); 910 911 INIT_LIST_HEAD(&vc4->bin_job_list); 912 INIT_LIST_HEAD(&vc4->render_job_list); 913 INIT_LIST_HEAD(&vc4->job_done_list); 914 INIT_LIST_HEAD(&vc4->seqno_cb_list); 915 spin_lock_init(&vc4->job_lock); 916 917 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 918 setup_timer(&vc4->hangcheck.timer, 919 vc4_hangcheck_elapsed, 920 (unsigned long)dev); 921 922 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 923 924 mutex_init(&vc4->power_lock); 925 } 926 927 void 928 vc4_gem_destroy(struct drm_device *dev) 929 { 930 struct vc4_dev *vc4 = to_vc4_dev(dev); 931 932 /* Waiting for exec to finish would need to be done before 933 * unregistering V3D. 934 */ 935 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 936 937 /* V3D should already have disabled its interrupt and cleared 938 * the overflow allocation registers. Now free the object. 939 */ 940 if (vc4->overflow_mem) { 941 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 942 vc4->overflow_mem = NULL; 943 } 944 945 if (vc4->hang_state) 946 vc4_free_hang_state(dev, vc4->hang_state); 947 948 vc4_bo_cache_destroy(dev); 949 } 950