1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 for (i = 0; i < state->user_state.bo_count; i++) 57 drm_gem_object_unreference_unlocked(state->bo[i]); 58 59 kfree(state); 60 } 61 62 int 63 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 64 struct drm_file *file_priv) 65 { 66 struct drm_vc4_get_hang_state *get_state = data; 67 struct drm_vc4_get_hang_state_bo *bo_state; 68 struct vc4_hang_state *kernel_state; 69 struct drm_vc4_get_hang_state *state; 70 struct vc4_dev *vc4 = to_vc4_dev(dev); 71 unsigned long irqflags; 72 u32 i; 73 int ret = 0; 74 75 spin_lock_irqsave(&vc4->job_lock, irqflags); 76 kernel_state = vc4->hang_state; 77 if (!kernel_state) { 78 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 79 return -ENOENT; 80 } 81 state = &kernel_state->user_state; 82 83 /* If the user's array isn't big enough, just return the 84 * required array size. 85 */ 86 if (get_state->bo_count < state->bo_count) { 87 get_state->bo_count = state->bo_count; 88 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 89 return 0; 90 } 91 92 vc4->hang_state = NULL; 93 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 94 95 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 96 state->bo = get_state->bo; 97 memcpy(get_state, state, sizeof(*state)); 98 99 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 100 if (!bo_state) { 101 ret = -ENOMEM; 102 goto err_free; 103 } 104 105 for (i = 0; i < state->bo_count; i++) { 106 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 107 u32 handle; 108 109 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 110 &handle); 111 112 if (ret) { 113 state->bo_count = i - 1; 114 goto err; 115 } 116 bo_state[i].handle = handle; 117 bo_state[i].paddr = vc4_bo->base.paddr; 118 bo_state[i].size = vc4_bo->base.base.size; 119 } 120 121 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 122 bo_state, 123 state->bo_count * sizeof(*bo_state))) 124 ret = -EFAULT; 125 126 kfree(bo_state); 127 128 err_free: 129 130 vc4_free_hang_state(dev, kernel_state); 131 132 err: 133 return ret; 134 } 135 136 static void 137 vc4_save_hang_state(struct drm_device *dev) 138 { 139 struct vc4_dev *vc4 = to_vc4_dev(dev); 140 struct drm_vc4_get_hang_state *state; 141 struct vc4_hang_state *kernel_state; 142 struct vc4_exec_info *exec[2]; 143 struct vc4_bo *bo; 144 unsigned long irqflags; 145 unsigned int i, j, unref_list_count, prev_idx; 146 147 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 148 if (!kernel_state) 149 return; 150 151 state = &kernel_state->user_state; 152 153 spin_lock_irqsave(&vc4->job_lock, irqflags); 154 exec[0] = vc4_first_bin_job(vc4); 155 exec[1] = vc4_first_render_job(vc4); 156 if (!exec[0] && !exec[1]) { 157 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 158 return; 159 } 160 161 /* Get the bos from both binner and renderer into hang state. */ 162 state->bo_count = 0; 163 for (i = 0; i < 2; i++) { 164 if (!exec[i]) 165 continue; 166 167 unref_list_count = 0; 168 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 169 unref_list_count++; 170 state->bo_count += exec[i]->bo_count + unref_list_count; 171 } 172 173 kernel_state->bo = kcalloc(state->bo_count, 174 sizeof(*kernel_state->bo), GFP_ATOMIC); 175 176 if (!kernel_state->bo) { 177 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 178 return; 179 } 180 181 prev_idx = 0; 182 for (i = 0; i < 2; i++) { 183 if (!exec[i]) 184 continue; 185 186 for (j = 0; j < exec[i]->bo_count; j++) { 187 drm_gem_object_reference(&exec[i]->bo[j]->base); 188 kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 189 } 190 191 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 192 drm_gem_object_reference(&bo->base.base); 193 kernel_state->bo[j + prev_idx] = &bo->base.base; 194 j++; 195 } 196 prev_idx = j + 1; 197 } 198 199 if (exec[0]) 200 state->start_bin = exec[0]->ct0ca; 201 if (exec[1]) 202 state->start_render = exec[1]->ct1ca; 203 204 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 205 206 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 207 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 208 209 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 210 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 211 212 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 213 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 214 215 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 216 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 217 218 state->bpca = V3D_READ(V3D_BPCA); 219 state->bpcs = V3D_READ(V3D_BPCS); 220 state->bpoa = V3D_READ(V3D_BPOA); 221 state->bpos = V3D_READ(V3D_BPOS); 222 223 state->vpmbase = V3D_READ(V3D_VPMBASE); 224 225 state->dbge = V3D_READ(V3D_DBGE); 226 state->fdbgo = V3D_READ(V3D_FDBGO); 227 state->fdbgb = V3D_READ(V3D_FDBGB); 228 state->fdbgr = V3D_READ(V3D_FDBGR); 229 state->fdbgs = V3D_READ(V3D_FDBGS); 230 state->errstat = V3D_READ(V3D_ERRSTAT); 231 232 spin_lock_irqsave(&vc4->job_lock, irqflags); 233 if (vc4->hang_state) { 234 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 235 vc4_free_hang_state(dev, kernel_state); 236 } else { 237 vc4->hang_state = kernel_state; 238 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 239 } 240 } 241 242 static void 243 vc4_reset(struct drm_device *dev) 244 { 245 struct vc4_dev *vc4 = to_vc4_dev(dev); 246 247 DRM_INFO("Resetting GPU.\n"); 248 249 mutex_lock(&vc4->power_lock); 250 if (vc4->power_refcount) { 251 /* Power the device off and back on the by dropping the 252 * reference on runtime PM. 253 */ 254 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 255 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 256 } 257 mutex_unlock(&vc4->power_lock); 258 259 vc4_irq_reset(dev); 260 261 /* Rearm the hangcheck -- another job might have been waiting 262 * for our hung one to get kicked off, and vc4_irq_reset() 263 * would have started it. 264 */ 265 vc4_queue_hangcheck(dev); 266 } 267 268 static void 269 vc4_reset_work(struct work_struct *work) 270 { 271 struct vc4_dev *vc4 = 272 container_of(work, struct vc4_dev, hangcheck.reset_work); 273 274 vc4_save_hang_state(vc4->dev); 275 276 vc4_reset(vc4->dev); 277 } 278 279 static void 280 vc4_hangcheck_elapsed(unsigned long data) 281 { 282 struct drm_device *dev = (struct drm_device *)data; 283 struct vc4_dev *vc4 = to_vc4_dev(dev); 284 uint32_t ct0ca, ct1ca; 285 unsigned long irqflags; 286 struct vc4_exec_info *bin_exec, *render_exec; 287 288 spin_lock_irqsave(&vc4->job_lock, irqflags); 289 290 bin_exec = vc4_first_bin_job(vc4); 291 render_exec = vc4_first_render_job(vc4); 292 293 /* If idle, we can stop watching for hangs. */ 294 if (!bin_exec && !render_exec) { 295 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 return; 297 } 298 299 ct0ca = V3D_READ(V3D_CTNCA(0)); 300 ct1ca = V3D_READ(V3D_CTNCA(1)); 301 302 /* If we've made any progress in execution, rearm the timer 303 * and wait. 304 */ 305 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 306 (render_exec && ct1ca != render_exec->last_ct1ca)) { 307 if (bin_exec) 308 bin_exec->last_ct0ca = ct0ca; 309 if (render_exec) 310 render_exec->last_ct1ca = ct1ca; 311 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 312 vc4_queue_hangcheck(dev); 313 return; 314 } 315 316 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 317 318 /* We've gone too long with no progress, reset. This has to 319 * be done from a work struct, since resetting can sleep and 320 * this timer hook isn't allowed to. 321 */ 322 schedule_work(&vc4->hangcheck.reset_work); 323 } 324 325 static void 326 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 327 { 328 struct vc4_dev *vc4 = to_vc4_dev(dev); 329 330 /* Set the current and end address of the control list. 331 * Writing the end register is what starts the job. 332 */ 333 V3D_WRITE(V3D_CTNCA(thread), start); 334 V3D_WRITE(V3D_CTNEA(thread), end); 335 } 336 337 int 338 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 339 bool interruptible) 340 { 341 struct vc4_dev *vc4 = to_vc4_dev(dev); 342 int ret = 0; 343 unsigned long timeout_expire; 344 DEFINE_WAIT(wait); 345 346 if (vc4->finished_seqno >= seqno) 347 return 0; 348 349 if (timeout_ns == 0) 350 return -ETIME; 351 352 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 353 354 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 355 for (;;) { 356 prepare_to_wait(&vc4->job_wait_queue, &wait, 357 interruptible ? TASK_INTERRUPTIBLE : 358 TASK_UNINTERRUPTIBLE); 359 360 if (interruptible && signal_pending(current)) { 361 ret = -ERESTARTSYS; 362 break; 363 } 364 365 if (vc4->finished_seqno >= seqno) 366 break; 367 368 if (timeout_ns != ~0ull) { 369 if (time_after_eq(jiffies, timeout_expire)) { 370 ret = -ETIME; 371 break; 372 } 373 schedule_timeout(timeout_expire - jiffies); 374 } else { 375 schedule(); 376 } 377 } 378 379 finish_wait(&vc4->job_wait_queue, &wait); 380 trace_vc4_wait_for_seqno_end(dev, seqno); 381 382 return ret; 383 } 384 385 static void 386 vc4_flush_caches(struct drm_device *dev) 387 { 388 struct vc4_dev *vc4 = to_vc4_dev(dev); 389 390 /* Flush the GPU L2 caches. These caches sit on top of system 391 * L3 (the 128kb or so shared with the CPU), and are 392 * non-allocating in the L3. 393 */ 394 V3D_WRITE(V3D_L2CACTL, 395 V3D_L2CACTL_L2CCLR); 396 397 V3D_WRITE(V3D_SLCACTL, 398 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 399 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 400 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 401 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 402 } 403 404 /* Sets the registers for the next job to be actually be executed in 405 * the hardware. 406 * 407 * The job_lock should be held during this. 408 */ 409 void 410 vc4_submit_next_bin_job(struct drm_device *dev) 411 { 412 struct vc4_dev *vc4 = to_vc4_dev(dev); 413 struct vc4_exec_info *exec; 414 415 again: 416 exec = vc4_first_bin_job(vc4); 417 if (!exec) 418 return; 419 420 vc4_flush_caches(dev); 421 422 /* Either put the job in the binner if it uses the binner, or 423 * immediately move it to the to-be-rendered queue. 424 */ 425 if (exec->ct0ca != exec->ct0ea) { 426 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 427 } else { 428 vc4_move_job_to_render(dev, exec); 429 goto again; 430 } 431 } 432 433 void 434 vc4_submit_next_render_job(struct drm_device *dev) 435 { 436 struct vc4_dev *vc4 = to_vc4_dev(dev); 437 struct vc4_exec_info *exec = vc4_first_render_job(vc4); 438 439 if (!exec) 440 return; 441 442 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 443 } 444 445 void 446 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 447 { 448 struct vc4_dev *vc4 = to_vc4_dev(dev); 449 bool was_empty = list_empty(&vc4->render_job_list); 450 451 list_move_tail(&exec->head, &vc4->render_job_list); 452 if (was_empty) 453 vc4_submit_next_render_job(dev); 454 } 455 456 static void 457 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 458 { 459 struct vc4_bo *bo; 460 unsigned i; 461 462 for (i = 0; i < exec->bo_count; i++) { 463 bo = to_vc4_bo(&exec->bo[i]->base); 464 bo->seqno = seqno; 465 } 466 467 list_for_each_entry(bo, &exec->unref_list, unref_head) { 468 bo->seqno = seqno; 469 } 470 471 for (i = 0; i < exec->rcl_write_bo_count; i++) { 472 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 473 bo->write_seqno = seqno; 474 } 475 } 476 477 /* Queues a struct vc4_exec_info for execution. If no job is 478 * currently executing, then submits it. 479 * 480 * Unlike most GPUs, our hardware only handles one command list at a 481 * time. To queue multiple jobs at once, we'd need to edit the 482 * previous command list to have a jump to the new one at the end, and 483 * then bump the end address. That's a change for a later date, 484 * though. 485 */ 486 static void 487 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 488 { 489 struct vc4_dev *vc4 = to_vc4_dev(dev); 490 uint64_t seqno; 491 unsigned long irqflags; 492 493 spin_lock_irqsave(&vc4->job_lock, irqflags); 494 495 seqno = ++vc4->emit_seqno; 496 exec->seqno = seqno; 497 vc4_update_bo_seqnos(exec, seqno); 498 499 list_add_tail(&exec->head, &vc4->bin_job_list); 500 501 /* If no job was executing, kick ours off. Otherwise, it'll 502 * get started when the previous job's flush done interrupt 503 * occurs. 504 */ 505 if (vc4_first_bin_job(vc4) == exec) { 506 vc4_submit_next_bin_job(dev); 507 vc4_queue_hangcheck(dev); 508 } 509 510 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 511 } 512 513 /** 514 * Looks up a bunch of GEM handles for BOs and stores the array for 515 * use in the command validator that actually writes relocated 516 * addresses pointing to them. 517 */ 518 static int 519 vc4_cl_lookup_bos(struct drm_device *dev, 520 struct drm_file *file_priv, 521 struct vc4_exec_info *exec) 522 { 523 struct drm_vc4_submit_cl *args = exec->args; 524 uint32_t *handles; 525 int ret = 0; 526 int i; 527 528 exec->bo_count = args->bo_handle_count; 529 530 if (!exec->bo_count) { 531 /* See comment on bo_index for why we have to check 532 * this. 533 */ 534 DRM_ERROR("Rendering requires BOs to validate\n"); 535 return -EINVAL; 536 } 537 538 exec->bo = drm_calloc_large(exec->bo_count, 539 sizeof(struct drm_gem_cma_object *)); 540 if (!exec->bo) { 541 DRM_ERROR("Failed to allocate validated BO pointers\n"); 542 return -ENOMEM; 543 } 544 545 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 546 if (!handles) { 547 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 548 goto fail; 549 } 550 551 ret = copy_from_user(handles, 552 (void __user *)(uintptr_t)args->bo_handles, 553 exec->bo_count * sizeof(uint32_t)); 554 if (ret) { 555 DRM_ERROR("Failed to copy in GEM handles\n"); 556 goto fail; 557 } 558 559 spin_lock(&file_priv->table_lock); 560 for (i = 0; i < exec->bo_count; i++) { 561 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 562 handles[i]); 563 if (!bo) { 564 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 565 i, handles[i]); 566 ret = -EINVAL; 567 spin_unlock(&file_priv->table_lock); 568 goto fail; 569 } 570 drm_gem_object_reference(bo); 571 exec->bo[i] = (struct drm_gem_cma_object *)bo; 572 } 573 spin_unlock(&file_priv->table_lock); 574 575 fail: 576 drm_free_large(handles); 577 return ret; 578 } 579 580 static int 581 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 582 { 583 struct drm_vc4_submit_cl *args = exec->args; 584 void *temp = NULL; 585 void *bin; 586 int ret = 0; 587 uint32_t bin_offset = 0; 588 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 589 16); 590 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 591 uint32_t exec_size = uniforms_offset + args->uniforms_size; 592 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 593 args->shader_rec_count); 594 struct vc4_bo *bo; 595 596 if (uniforms_offset < shader_rec_offset || 597 exec_size < uniforms_offset || 598 args->shader_rec_count >= (UINT_MAX / 599 sizeof(struct vc4_shader_state)) || 600 temp_size < exec_size) { 601 DRM_ERROR("overflow in exec arguments\n"); 602 goto fail; 603 } 604 605 /* Allocate space where we'll store the copied in user command lists 606 * and shader records. 607 * 608 * We don't just copy directly into the BOs because we need to 609 * read the contents back for validation, and I think the 610 * bo->vaddr is uncached access. 611 */ 612 temp = drm_malloc_ab(temp_size, 1); 613 if (!temp) { 614 DRM_ERROR("Failed to allocate storage for copying " 615 "in bin/render CLs.\n"); 616 ret = -ENOMEM; 617 goto fail; 618 } 619 bin = temp + bin_offset; 620 exec->shader_rec_u = temp + shader_rec_offset; 621 exec->uniforms_u = temp + uniforms_offset; 622 exec->shader_state = temp + exec_size; 623 exec->shader_state_size = args->shader_rec_count; 624 625 if (copy_from_user(bin, 626 (void __user *)(uintptr_t)args->bin_cl, 627 args->bin_cl_size)) { 628 ret = -EFAULT; 629 goto fail; 630 } 631 632 if (copy_from_user(exec->shader_rec_u, 633 (void __user *)(uintptr_t)args->shader_rec, 634 args->shader_rec_size)) { 635 ret = -EFAULT; 636 goto fail; 637 } 638 639 if (copy_from_user(exec->uniforms_u, 640 (void __user *)(uintptr_t)args->uniforms, 641 args->uniforms_size)) { 642 ret = -EFAULT; 643 goto fail; 644 } 645 646 bo = vc4_bo_create(dev, exec_size, true); 647 if (IS_ERR(bo)) { 648 DRM_ERROR("Couldn't allocate BO for binning\n"); 649 ret = PTR_ERR(bo); 650 goto fail; 651 } 652 exec->exec_bo = &bo->base; 653 654 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 655 &exec->unref_list); 656 657 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 658 659 exec->bin_u = bin; 660 661 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 662 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 663 exec->shader_rec_size = args->shader_rec_size; 664 665 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 666 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 667 exec->uniforms_size = args->uniforms_size; 668 669 ret = vc4_validate_bin_cl(dev, 670 exec->exec_bo->vaddr + bin_offset, 671 bin, 672 exec); 673 if (ret) 674 goto fail; 675 676 ret = vc4_validate_shader_recs(dev, exec); 677 if (ret) 678 goto fail; 679 680 /* Block waiting on any previous rendering into the CS's VBO, 681 * IB, or textures, so that pixels are actually written by the 682 * time we try to read them. 683 */ 684 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); 685 686 fail: 687 drm_free_large(temp); 688 return ret; 689 } 690 691 static void 692 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 693 { 694 struct vc4_dev *vc4 = to_vc4_dev(dev); 695 unsigned i; 696 697 if (exec->bo) { 698 for (i = 0; i < exec->bo_count; i++) 699 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); 700 drm_free_large(exec->bo); 701 } 702 703 while (!list_empty(&exec->unref_list)) { 704 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 705 struct vc4_bo, unref_head); 706 list_del(&bo->unref_head); 707 drm_gem_object_unreference_unlocked(&bo->base.base); 708 } 709 710 mutex_lock(&vc4->power_lock); 711 if (--vc4->power_refcount == 0) 712 pm_runtime_put(&vc4->v3d->pdev->dev); 713 mutex_unlock(&vc4->power_lock); 714 715 kfree(exec); 716 } 717 718 void 719 vc4_job_handle_completed(struct vc4_dev *vc4) 720 { 721 unsigned long irqflags; 722 struct vc4_seqno_cb *cb, *cb_temp; 723 724 spin_lock_irqsave(&vc4->job_lock, irqflags); 725 while (!list_empty(&vc4->job_done_list)) { 726 struct vc4_exec_info *exec = 727 list_first_entry(&vc4->job_done_list, 728 struct vc4_exec_info, head); 729 list_del(&exec->head); 730 731 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 732 vc4_complete_exec(vc4->dev, exec); 733 spin_lock_irqsave(&vc4->job_lock, irqflags); 734 } 735 736 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 737 if (cb->seqno <= vc4->finished_seqno) { 738 list_del_init(&cb->work.entry); 739 schedule_work(&cb->work); 740 } 741 } 742 743 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 744 } 745 746 static void vc4_seqno_cb_work(struct work_struct *work) 747 { 748 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 749 750 cb->func(cb); 751 } 752 753 int vc4_queue_seqno_cb(struct drm_device *dev, 754 struct vc4_seqno_cb *cb, uint64_t seqno, 755 void (*func)(struct vc4_seqno_cb *cb)) 756 { 757 struct vc4_dev *vc4 = to_vc4_dev(dev); 758 int ret = 0; 759 unsigned long irqflags; 760 761 cb->func = func; 762 INIT_WORK(&cb->work, vc4_seqno_cb_work); 763 764 spin_lock_irqsave(&vc4->job_lock, irqflags); 765 if (seqno > vc4->finished_seqno) { 766 cb->seqno = seqno; 767 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 768 } else { 769 schedule_work(&cb->work); 770 } 771 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 772 773 return ret; 774 } 775 776 /* Scheduled when any job has been completed, this walks the list of 777 * jobs that had completed and unrefs their BOs and frees their exec 778 * structs. 779 */ 780 static void 781 vc4_job_done_work(struct work_struct *work) 782 { 783 struct vc4_dev *vc4 = 784 container_of(work, struct vc4_dev, job_done_work); 785 786 vc4_job_handle_completed(vc4); 787 } 788 789 static int 790 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 791 uint64_t seqno, 792 uint64_t *timeout_ns) 793 { 794 unsigned long start = jiffies; 795 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 796 797 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 798 uint64_t delta = jiffies_to_nsecs(jiffies - start); 799 800 if (*timeout_ns >= delta) 801 *timeout_ns -= delta; 802 } 803 804 return ret; 805 } 806 807 int 808 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 809 struct drm_file *file_priv) 810 { 811 struct drm_vc4_wait_seqno *args = data; 812 813 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 814 &args->timeout_ns); 815 } 816 817 int 818 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 819 struct drm_file *file_priv) 820 { 821 int ret; 822 struct drm_vc4_wait_bo *args = data; 823 struct drm_gem_object *gem_obj; 824 struct vc4_bo *bo; 825 826 if (args->pad != 0) 827 return -EINVAL; 828 829 gem_obj = drm_gem_object_lookup(file_priv, args->handle); 830 if (!gem_obj) { 831 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 832 return -EINVAL; 833 } 834 bo = to_vc4_bo(gem_obj); 835 836 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 837 &args->timeout_ns); 838 839 drm_gem_object_unreference_unlocked(gem_obj); 840 return ret; 841 } 842 843 /** 844 * Submits a command list to the VC4. 845 * 846 * This is what is called batchbuffer emitting on other hardware. 847 */ 848 int 849 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 850 struct drm_file *file_priv) 851 { 852 struct vc4_dev *vc4 = to_vc4_dev(dev); 853 struct drm_vc4_submit_cl *args = data; 854 struct vc4_exec_info *exec; 855 int ret = 0; 856 857 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 858 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 859 return -EINVAL; 860 } 861 862 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 863 if (!exec) { 864 DRM_ERROR("malloc failure on exec struct\n"); 865 return -ENOMEM; 866 } 867 868 mutex_lock(&vc4->power_lock); 869 if (vc4->power_refcount++ == 0) 870 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 871 mutex_unlock(&vc4->power_lock); 872 if (ret < 0) { 873 kfree(exec); 874 return ret; 875 } 876 877 exec->args = args; 878 INIT_LIST_HEAD(&exec->unref_list); 879 880 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 881 if (ret) 882 goto fail; 883 884 if (exec->args->bin_cl_size != 0) { 885 ret = vc4_get_bcl(dev, exec); 886 if (ret) 887 goto fail; 888 } else { 889 exec->ct0ca = 0; 890 exec->ct0ea = 0; 891 } 892 893 ret = vc4_get_rcl(dev, exec); 894 if (ret) 895 goto fail; 896 897 /* Clear this out of the struct we'll be putting in the queue, 898 * since it's part of our stack. 899 */ 900 exec->args = NULL; 901 902 vc4_queue_submit(dev, exec); 903 904 /* Return the seqno for our job. */ 905 args->seqno = vc4->emit_seqno; 906 907 return 0; 908 909 fail: 910 vc4_complete_exec(vc4->dev, exec); 911 912 return ret; 913 } 914 915 void 916 vc4_gem_init(struct drm_device *dev) 917 { 918 struct vc4_dev *vc4 = to_vc4_dev(dev); 919 920 INIT_LIST_HEAD(&vc4->bin_job_list); 921 INIT_LIST_HEAD(&vc4->render_job_list); 922 INIT_LIST_HEAD(&vc4->job_done_list); 923 INIT_LIST_HEAD(&vc4->seqno_cb_list); 924 spin_lock_init(&vc4->job_lock); 925 926 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 927 setup_timer(&vc4->hangcheck.timer, 928 vc4_hangcheck_elapsed, 929 (unsigned long)dev); 930 931 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 932 933 mutex_init(&vc4->power_lock); 934 } 935 936 void 937 vc4_gem_destroy(struct drm_device *dev) 938 { 939 struct vc4_dev *vc4 = to_vc4_dev(dev); 940 941 /* Waiting for exec to finish would need to be done before 942 * unregistering V3D. 943 */ 944 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 945 946 /* V3D should already have disabled its interrupt and cleared 947 * the overflow allocation registers. Now free the object. 948 */ 949 if (vc4->overflow_mem) { 950 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 951 vc4->overflow_mem = NULL; 952 } 953 954 if (vc4->hang_state) 955 vc4_free_hang_state(dev, vc4->hang_state); 956 957 vc4_bo_cache_destroy(dev); 958 } 959