1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 for (i = 0; i < state->user_state.bo_count; i++) 57 drm_gem_object_unreference_unlocked(state->bo[i]); 58 59 kfree(state); 60 } 61 62 int 63 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 64 struct drm_file *file_priv) 65 { 66 struct drm_vc4_get_hang_state *get_state = data; 67 struct drm_vc4_get_hang_state_bo *bo_state; 68 struct vc4_hang_state *kernel_state; 69 struct drm_vc4_get_hang_state *state; 70 struct vc4_dev *vc4 = to_vc4_dev(dev); 71 unsigned long irqflags; 72 u32 i; 73 int ret = 0; 74 75 spin_lock_irqsave(&vc4->job_lock, irqflags); 76 kernel_state = vc4->hang_state; 77 if (!kernel_state) { 78 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 79 return -ENOENT; 80 } 81 state = &kernel_state->user_state; 82 83 /* If the user's array isn't big enough, just return the 84 * required array size. 85 */ 86 if (get_state->bo_count < state->bo_count) { 87 get_state->bo_count = state->bo_count; 88 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 89 return 0; 90 } 91 92 vc4->hang_state = NULL; 93 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 94 95 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 96 state->bo = get_state->bo; 97 memcpy(get_state, state, sizeof(*state)); 98 99 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 100 if (!bo_state) { 101 ret = -ENOMEM; 102 goto err_free; 103 } 104 105 for (i = 0; i < state->bo_count; i++) { 106 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 107 u32 handle; 108 109 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 110 &handle); 111 112 if (ret) { 113 state->bo_count = i - 1; 114 goto err; 115 } 116 bo_state[i].handle = handle; 117 bo_state[i].paddr = vc4_bo->base.paddr; 118 bo_state[i].size = vc4_bo->base.base.size; 119 } 120 121 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 122 bo_state, 123 state->bo_count * sizeof(*bo_state))) 124 ret = -EFAULT; 125 126 kfree(bo_state); 127 128 err_free: 129 130 vc4_free_hang_state(dev, kernel_state); 131 132 err: 133 return ret; 134 } 135 136 static void 137 vc4_save_hang_state(struct drm_device *dev) 138 { 139 struct vc4_dev *vc4 = to_vc4_dev(dev); 140 struct drm_vc4_get_hang_state *state; 141 struct vc4_hang_state *kernel_state; 142 struct vc4_exec_info *exec[2]; 143 struct vc4_bo *bo; 144 unsigned long irqflags; 145 unsigned int i, j, unref_list_count, prev_idx; 146 147 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 148 if (!kernel_state) 149 return; 150 151 state = &kernel_state->user_state; 152 153 spin_lock_irqsave(&vc4->job_lock, irqflags); 154 exec[0] = vc4_first_bin_job(vc4); 155 exec[1] = vc4_first_render_job(vc4); 156 if (!exec[0] && !exec[1]) { 157 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 158 return; 159 } 160 161 /* Get the bos from both binner and renderer into hang state. */ 162 state->bo_count = 0; 163 for (i = 0; i < 2; i++) { 164 if (!exec[i]) 165 continue; 166 167 unref_list_count = 0; 168 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 169 unref_list_count++; 170 state->bo_count += exec[i]->bo_count + unref_list_count; 171 } 172 173 kernel_state->bo = kcalloc(state->bo_count, 174 sizeof(*kernel_state->bo), GFP_ATOMIC); 175 176 if (!kernel_state->bo) { 177 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 178 return; 179 } 180 181 prev_idx = 0; 182 for (i = 0; i < 2; i++) { 183 if (!exec[i]) 184 continue; 185 186 for (j = 0; j < exec[i]->bo_count; j++) { 187 drm_gem_object_reference(&exec[i]->bo[j]->base); 188 kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 189 } 190 191 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 192 drm_gem_object_reference(&bo->base.base); 193 kernel_state->bo[j + prev_idx] = &bo->base.base; 194 j++; 195 } 196 prev_idx = j + 1; 197 } 198 199 if (exec[0]) 200 state->start_bin = exec[0]->ct0ca; 201 if (exec[1]) 202 state->start_render = exec[1]->ct1ca; 203 204 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 205 206 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 207 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 208 209 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 210 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 211 212 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 213 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 214 215 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 216 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 217 218 state->bpca = V3D_READ(V3D_BPCA); 219 state->bpcs = V3D_READ(V3D_BPCS); 220 state->bpoa = V3D_READ(V3D_BPOA); 221 state->bpos = V3D_READ(V3D_BPOS); 222 223 state->vpmbase = V3D_READ(V3D_VPMBASE); 224 225 state->dbge = V3D_READ(V3D_DBGE); 226 state->fdbgo = V3D_READ(V3D_FDBGO); 227 state->fdbgb = V3D_READ(V3D_FDBGB); 228 state->fdbgr = V3D_READ(V3D_FDBGR); 229 state->fdbgs = V3D_READ(V3D_FDBGS); 230 state->errstat = V3D_READ(V3D_ERRSTAT); 231 232 spin_lock_irqsave(&vc4->job_lock, irqflags); 233 if (vc4->hang_state) { 234 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 235 vc4_free_hang_state(dev, kernel_state); 236 } else { 237 vc4->hang_state = kernel_state; 238 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 239 } 240 } 241 242 static void 243 vc4_reset(struct drm_device *dev) 244 { 245 struct vc4_dev *vc4 = to_vc4_dev(dev); 246 247 DRM_INFO("Resetting GPU.\n"); 248 249 mutex_lock(&vc4->power_lock); 250 if (vc4->power_refcount) { 251 /* Power the device off and back on the by dropping the 252 * reference on runtime PM. 253 */ 254 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 255 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 256 } 257 mutex_unlock(&vc4->power_lock); 258 259 vc4_irq_reset(dev); 260 261 /* Rearm the hangcheck -- another job might have been waiting 262 * for our hung one to get kicked off, and vc4_irq_reset() 263 * would have started it. 264 */ 265 vc4_queue_hangcheck(dev); 266 } 267 268 static void 269 vc4_reset_work(struct work_struct *work) 270 { 271 struct vc4_dev *vc4 = 272 container_of(work, struct vc4_dev, hangcheck.reset_work); 273 274 vc4_save_hang_state(vc4->dev); 275 276 vc4_reset(vc4->dev); 277 } 278 279 static void 280 vc4_hangcheck_elapsed(unsigned long data) 281 { 282 struct drm_device *dev = (struct drm_device *)data; 283 struct vc4_dev *vc4 = to_vc4_dev(dev); 284 uint32_t ct0ca, ct1ca; 285 unsigned long irqflags; 286 struct vc4_exec_info *bin_exec, *render_exec; 287 288 spin_lock_irqsave(&vc4->job_lock, irqflags); 289 290 bin_exec = vc4_first_bin_job(vc4); 291 render_exec = vc4_first_render_job(vc4); 292 293 /* If idle, we can stop watching for hangs. */ 294 if (!bin_exec && !render_exec) { 295 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 return; 297 } 298 299 ct0ca = V3D_READ(V3D_CTNCA(0)); 300 ct1ca = V3D_READ(V3D_CTNCA(1)); 301 302 /* If we've made any progress in execution, rearm the timer 303 * and wait. 304 */ 305 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 306 (render_exec && ct1ca != render_exec->last_ct1ca)) { 307 if (bin_exec) 308 bin_exec->last_ct0ca = ct0ca; 309 if (render_exec) 310 render_exec->last_ct1ca = ct1ca; 311 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 312 vc4_queue_hangcheck(dev); 313 return; 314 } 315 316 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 317 318 /* We've gone too long with no progress, reset. This has to 319 * be done from a work struct, since resetting can sleep and 320 * this timer hook isn't allowed to. 321 */ 322 schedule_work(&vc4->hangcheck.reset_work); 323 } 324 325 static void 326 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 327 { 328 struct vc4_dev *vc4 = to_vc4_dev(dev); 329 330 /* Set the current and end address of the control list. 331 * Writing the end register is what starts the job. 332 */ 333 V3D_WRITE(V3D_CTNCA(thread), start); 334 V3D_WRITE(V3D_CTNEA(thread), end); 335 } 336 337 int 338 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 339 bool interruptible) 340 { 341 struct vc4_dev *vc4 = to_vc4_dev(dev); 342 int ret = 0; 343 unsigned long timeout_expire; 344 DEFINE_WAIT(wait); 345 346 if (vc4->finished_seqno >= seqno) 347 return 0; 348 349 if (timeout_ns == 0) 350 return -ETIME; 351 352 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 353 354 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 355 for (;;) { 356 prepare_to_wait(&vc4->job_wait_queue, &wait, 357 interruptible ? TASK_INTERRUPTIBLE : 358 TASK_UNINTERRUPTIBLE); 359 360 if (interruptible && signal_pending(current)) { 361 ret = -ERESTARTSYS; 362 break; 363 } 364 365 if (vc4->finished_seqno >= seqno) 366 break; 367 368 if (timeout_ns != ~0ull) { 369 if (time_after_eq(jiffies, timeout_expire)) { 370 ret = -ETIME; 371 break; 372 } 373 schedule_timeout(timeout_expire - jiffies); 374 } else { 375 schedule(); 376 } 377 } 378 379 finish_wait(&vc4->job_wait_queue, &wait); 380 trace_vc4_wait_for_seqno_end(dev, seqno); 381 382 return ret; 383 } 384 385 static void 386 vc4_flush_caches(struct drm_device *dev) 387 { 388 struct vc4_dev *vc4 = to_vc4_dev(dev); 389 390 /* Flush the GPU L2 caches. These caches sit on top of system 391 * L3 (the 128kb or so shared with the CPU), and are 392 * non-allocating in the L3. 393 */ 394 V3D_WRITE(V3D_L2CACTL, 395 V3D_L2CACTL_L2CCLR); 396 397 V3D_WRITE(V3D_SLCACTL, 398 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 399 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 400 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 401 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 402 } 403 404 /* Sets the registers for the next job to be actually be executed in 405 * the hardware. 406 * 407 * The job_lock should be held during this. 408 */ 409 void 410 vc4_submit_next_bin_job(struct drm_device *dev) 411 { 412 struct vc4_dev *vc4 = to_vc4_dev(dev); 413 struct vc4_exec_info *exec; 414 415 again: 416 exec = vc4_first_bin_job(vc4); 417 if (!exec) 418 return; 419 420 vc4_flush_caches(dev); 421 422 /* Either put the job in the binner if it uses the binner, or 423 * immediately move it to the to-be-rendered queue. 424 */ 425 if (exec->ct0ca != exec->ct0ea) { 426 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 427 } else { 428 vc4_move_job_to_render(dev, exec); 429 goto again; 430 } 431 } 432 433 void 434 vc4_submit_next_render_job(struct drm_device *dev) 435 { 436 struct vc4_dev *vc4 = to_vc4_dev(dev); 437 struct vc4_exec_info *exec = vc4_first_render_job(vc4); 438 439 if (!exec) 440 return; 441 442 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 443 } 444 445 void 446 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 447 { 448 struct vc4_dev *vc4 = to_vc4_dev(dev); 449 bool was_empty = list_empty(&vc4->render_job_list); 450 451 list_move_tail(&exec->head, &vc4->render_job_list); 452 if (was_empty) 453 vc4_submit_next_render_job(dev); 454 } 455 456 static void 457 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 458 { 459 struct vc4_bo *bo; 460 unsigned i; 461 462 for (i = 0; i < exec->bo_count; i++) { 463 bo = to_vc4_bo(&exec->bo[i]->base); 464 bo->seqno = seqno; 465 } 466 467 list_for_each_entry(bo, &exec->unref_list, unref_head) { 468 bo->seqno = seqno; 469 } 470 471 for (i = 0; i < exec->rcl_write_bo_count; i++) { 472 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 473 bo->write_seqno = seqno; 474 } 475 } 476 477 /* Queues a struct vc4_exec_info for execution. If no job is 478 * currently executing, then submits it. 479 * 480 * Unlike most GPUs, our hardware only handles one command list at a 481 * time. To queue multiple jobs at once, we'd need to edit the 482 * previous command list to have a jump to the new one at the end, and 483 * then bump the end address. That's a change for a later date, 484 * though. 485 */ 486 static void 487 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 488 { 489 struct vc4_dev *vc4 = to_vc4_dev(dev); 490 uint64_t seqno; 491 unsigned long irqflags; 492 493 spin_lock_irqsave(&vc4->job_lock, irqflags); 494 495 seqno = ++vc4->emit_seqno; 496 exec->seqno = seqno; 497 vc4_update_bo_seqnos(exec, seqno); 498 499 list_add_tail(&exec->head, &vc4->bin_job_list); 500 501 /* If no job was executing, kick ours off. Otherwise, it'll 502 * get started when the previous job's flush done interrupt 503 * occurs. 504 */ 505 if (vc4_first_bin_job(vc4) == exec) { 506 vc4_submit_next_bin_job(dev); 507 vc4_queue_hangcheck(dev); 508 } 509 510 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 511 } 512 513 /** 514 * Looks up a bunch of GEM handles for BOs and stores the array for 515 * use in the command validator that actually writes relocated 516 * addresses pointing to them. 517 */ 518 static int 519 vc4_cl_lookup_bos(struct drm_device *dev, 520 struct drm_file *file_priv, 521 struct vc4_exec_info *exec) 522 { 523 struct drm_vc4_submit_cl *args = exec->args; 524 uint32_t *handles; 525 int ret = 0; 526 int i; 527 528 exec->bo_count = args->bo_handle_count; 529 530 if (!exec->bo_count) { 531 /* See comment on bo_index for why we have to check 532 * this. 533 */ 534 DRM_ERROR("Rendering requires BOs to validate\n"); 535 return -EINVAL; 536 } 537 538 exec->bo = drm_calloc_large(exec->bo_count, 539 sizeof(struct drm_gem_cma_object *)); 540 if (!exec->bo) { 541 DRM_ERROR("Failed to allocate validated BO pointers\n"); 542 return -ENOMEM; 543 } 544 545 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 546 if (!handles) { 547 ret = -ENOMEM; 548 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 549 goto fail; 550 } 551 552 if (copy_from_user(handles, 553 (void __user *)(uintptr_t)args->bo_handles, 554 exec->bo_count * sizeof(uint32_t))) { 555 ret = -EFAULT; 556 DRM_ERROR("Failed to copy in GEM handles\n"); 557 goto fail; 558 } 559 560 spin_lock(&file_priv->table_lock); 561 for (i = 0; i < exec->bo_count; i++) { 562 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 563 handles[i]); 564 if (!bo) { 565 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 566 i, handles[i]); 567 ret = -EINVAL; 568 spin_unlock(&file_priv->table_lock); 569 goto fail; 570 } 571 drm_gem_object_reference(bo); 572 exec->bo[i] = (struct drm_gem_cma_object *)bo; 573 } 574 spin_unlock(&file_priv->table_lock); 575 576 fail: 577 drm_free_large(handles); 578 return ret; 579 } 580 581 static int 582 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 583 { 584 struct drm_vc4_submit_cl *args = exec->args; 585 void *temp = NULL; 586 void *bin; 587 int ret = 0; 588 uint32_t bin_offset = 0; 589 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 590 16); 591 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 592 uint32_t exec_size = uniforms_offset + args->uniforms_size; 593 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 594 args->shader_rec_count); 595 struct vc4_bo *bo; 596 597 if (uniforms_offset < shader_rec_offset || 598 exec_size < uniforms_offset || 599 args->shader_rec_count >= (UINT_MAX / 600 sizeof(struct vc4_shader_state)) || 601 temp_size < exec_size) { 602 DRM_ERROR("overflow in exec arguments\n"); 603 goto fail; 604 } 605 606 /* Allocate space where we'll store the copied in user command lists 607 * and shader records. 608 * 609 * We don't just copy directly into the BOs because we need to 610 * read the contents back for validation, and I think the 611 * bo->vaddr is uncached access. 612 */ 613 temp = drm_malloc_ab(temp_size, 1); 614 if (!temp) { 615 DRM_ERROR("Failed to allocate storage for copying " 616 "in bin/render CLs.\n"); 617 ret = -ENOMEM; 618 goto fail; 619 } 620 bin = temp + bin_offset; 621 exec->shader_rec_u = temp + shader_rec_offset; 622 exec->uniforms_u = temp + uniforms_offset; 623 exec->shader_state = temp + exec_size; 624 exec->shader_state_size = args->shader_rec_count; 625 626 if (copy_from_user(bin, 627 (void __user *)(uintptr_t)args->bin_cl, 628 args->bin_cl_size)) { 629 ret = -EFAULT; 630 goto fail; 631 } 632 633 if (copy_from_user(exec->shader_rec_u, 634 (void __user *)(uintptr_t)args->shader_rec, 635 args->shader_rec_size)) { 636 ret = -EFAULT; 637 goto fail; 638 } 639 640 if (copy_from_user(exec->uniforms_u, 641 (void __user *)(uintptr_t)args->uniforms, 642 args->uniforms_size)) { 643 ret = -EFAULT; 644 goto fail; 645 } 646 647 bo = vc4_bo_create(dev, exec_size, true); 648 if (IS_ERR(bo)) { 649 DRM_ERROR("Couldn't allocate BO for binning\n"); 650 ret = PTR_ERR(bo); 651 goto fail; 652 } 653 exec->exec_bo = &bo->base; 654 655 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 656 &exec->unref_list); 657 658 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 659 660 exec->bin_u = bin; 661 662 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 663 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 664 exec->shader_rec_size = args->shader_rec_size; 665 666 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 667 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 668 exec->uniforms_size = args->uniforms_size; 669 670 ret = vc4_validate_bin_cl(dev, 671 exec->exec_bo->vaddr + bin_offset, 672 bin, 673 exec); 674 if (ret) 675 goto fail; 676 677 ret = vc4_validate_shader_recs(dev, exec); 678 if (ret) 679 goto fail; 680 681 /* Block waiting on any previous rendering into the CS's VBO, 682 * IB, or textures, so that pixels are actually written by the 683 * time we try to read them. 684 */ 685 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); 686 687 fail: 688 drm_free_large(temp); 689 return ret; 690 } 691 692 static void 693 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 694 { 695 struct vc4_dev *vc4 = to_vc4_dev(dev); 696 unsigned i; 697 698 if (exec->bo) { 699 for (i = 0; i < exec->bo_count; i++) 700 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); 701 drm_free_large(exec->bo); 702 } 703 704 while (!list_empty(&exec->unref_list)) { 705 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 706 struct vc4_bo, unref_head); 707 list_del(&bo->unref_head); 708 drm_gem_object_unreference_unlocked(&bo->base.base); 709 } 710 711 mutex_lock(&vc4->power_lock); 712 if (--vc4->power_refcount == 0) { 713 pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); 714 pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); 715 } 716 mutex_unlock(&vc4->power_lock); 717 718 kfree(exec); 719 } 720 721 void 722 vc4_job_handle_completed(struct vc4_dev *vc4) 723 { 724 unsigned long irqflags; 725 struct vc4_seqno_cb *cb, *cb_temp; 726 727 spin_lock_irqsave(&vc4->job_lock, irqflags); 728 while (!list_empty(&vc4->job_done_list)) { 729 struct vc4_exec_info *exec = 730 list_first_entry(&vc4->job_done_list, 731 struct vc4_exec_info, head); 732 list_del(&exec->head); 733 734 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 735 vc4_complete_exec(vc4->dev, exec); 736 spin_lock_irqsave(&vc4->job_lock, irqflags); 737 } 738 739 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 740 if (cb->seqno <= vc4->finished_seqno) { 741 list_del_init(&cb->work.entry); 742 schedule_work(&cb->work); 743 } 744 } 745 746 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 747 } 748 749 static void vc4_seqno_cb_work(struct work_struct *work) 750 { 751 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 752 753 cb->func(cb); 754 } 755 756 int vc4_queue_seqno_cb(struct drm_device *dev, 757 struct vc4_seqno_cb *cb, uint64_t seqno, 758 void (*func)(struct vc4_seqno_cb *cb)) 759 { 760 struct vc4_dev *vc4 = to_vc4_dev(dev); 761 int ret = 0; 762 unsigned long irqflags; 763 764 cb->func = func; 765 INIT_WORK(&cb->work, vc4_seqno_cb_work); 766 767 spin_lock_irqsave(&vc4->job_lock, irqflags); 768 if (seqno > vc4->finished_seqno) { 769 cb->seqno = seqno; 770 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 771 } else { 772 schedule_work(&cb->work); 773 } 774 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 775 776 return ret; 777 } 778 779 /* Scheduled when any job has been completed, this walks the list of 780 * jobs that had completed and unrefs their BOs and frees their exec 781 * structs. 782 */ 783 static void 784 vc4_job_done_work(struct work_struct *work) 785 { 786 struct vc4_dev *vc4 = 787 container_of(work, struct vc4_dev, job_done_work); 788 789 vc4_job_handle_completed(vc4); 790 } 791 792 static int 793 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 794 uint64_t seqno, 795 uint64_t *timeout_ns) 796 { 797 unsigned long start = jiffies; 798 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 799 800 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 801 uint64_t delta = jiffies_to_nsecs(jiffies - start); 802 803 if (*timeout_ns >= delta) 804 *timeout_ns -= delta; 805 } 806 807 return ret; 808 } 809 810 int 811 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 812 struct drm_file *file_priv) 813 { 814 struct drm_vc4_wait_seqno *args = data; 815 816 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 817 &args->timeout_ns); 818 } 819 820 int 821 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 822 struct drm_file *file_priv) 823 { 824 int ret; 825 struct drm_vc4_wait_bo *args = data; 826 struct drm_gem_object *gem_obj; 827 struct vc4_bo *bo; 828 829 if (args->pad != 0) 830 return -EINVAL; 831 832 gem_obj = drm_gem_object_lookup(file_priv, args->handle); 833 if (!gem_obj) { 834 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 835 return -EINVAL; 836 } 837 bo = to_vc4_bo(gem_obj); 838 839 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 840 &args->timeout_ns); 841 842 drm_gem_object_unreference_unlocked(gem_obj); 843 return ret; 844 } 845 846 /** 847 * Submits a command list to the VC4. 848 * 849 * This is what is called batchbuffer emitting on other hardware. 850 */ 851 int 852 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 853 struct drm_file *file_priv) 854 { 855 struct vc4_dev *vc4 = to_vc4_dev(dev); 856 struct drm_vc4_submit_cl *args = data; 857 struct vc4_exec_info *exec; 858 int ret = 0; 859 860 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 861 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 862 return -EINVAL; 863 } 864 865 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 866 if (!exec) { 867 DRM_ERROR("malloc failure on exec struct\n"); 868 return -ENOMEM; 869 } 870 871 mutex_lock(&vc4->power_lock); 872 if (vc4->power_refcount++ == 0) 873 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 874 mutex_unlock(&vc4->power_lock); 875 if (ret < 0) { 876 kfree(exec); 877 return ret; 878 } 879 880 exec->args = args; 881 INIT_LIST_HEAD(&exec->unref_list); 882 883 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 884 if (ret) 885 goto fail; 886 887 if (exec->args->bin_cl_size != 0) { 888 ret = vc4_get_bcl(dev, exec); 889 if (ret) 890 goto fail; 891 } else { 892 exec->ct0ca = 0; 893 exec->ct0ea = 0; 894 } 895 896 ret = vc4_get_rcl(dev, exec); 897 if (ret) 898 goto fail; 899 900 /* Clear this out of the struct we'll be putting in the queue, 901 * since it's part of our stack. 902 */ 903 exec->args = NULL; 904 905 vc4_queue_submit(dev, exec); 906 907 /* Return the seqno for our job. */ 908 args->seqno = vc4->emit_seqno; 909 910 return 0; 911 912 fail: 913 vc4_complete_exec(vc4->dev, exec); 914 915 return ret; 916 } 917 918 void 919 vc4_gem_init(struct drm_device *dev) 920 { 921 struct vc4_dev *vc4 = to_vc4_dev(dev); 922 923 INIT_LIST_HEAD(&vc4->bin_job_list); 924 INIT_LIST_HEAD(&vc4->render_job_list); 925 INIT_LIST_HEAD(&vc4->job_done_list); 926 INIT_LIST_HEAD(&vc4->seqno_cb_list); 927 spin_lock_init(&vc4->job_lock); 928 929 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 930 setup_timer(&vc4->hangcheck.timer, 931 vc4_hangcheck_elapsed, 932 (unsigned long)dev); 933 934 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 935 936 mutex_init(&vc4->power_lock); 937 } 938 939 void 940 vc4_gem_destroy(struct drm_device *dev) 941 { 942 struct vc4_dev *vc4 = to_vc4_dev(dev); 943 944 /* Waiting for exec to finish would need to be done before 945 * unregistering V3D. 946 */ 947 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 948 949 /* V3D should already have disabled its interrupt and cleared 950 * the overflow allocation registers. Now free the object. 951 */ 952 if (vc4->overflow_mem) { 953 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 954 vc4->overflow_mem = NULL; 955 } 956 957 if (vc4->hang_state) 958 vc4_free_hang_state(dev, vc4->hang_state); 959 960 vc4_bo_cache_destroy(dev); 961 } 962