1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 mutex_lock(&dev->struct_mutex); 57 for (i = 0; i < state->user_state.bo_count; i++) 58 drm_gem_object_unreference(state->bo[i]); 59 mutex_unlock(&dev->struct_mutex); 60 61 kfree(state); 62 } 63 64 int 65 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 66 struct drm_file *file_priv) 67 { 68 struct drm_vc4_get_hang_state *get_state = data; 69 struct drm_vc4_get_hang_state_bo *bo_state; 70 struct vc4_hang_state *kernel_state; 71 struct drm_vc4_get_hang_state *state; 72 struct vc4_dev *vc4 = to_vc4_dev(dev); 73 unsigned long irqflags; 74 u32 i; 75 int ret = 0; 76 77 spin_lock_irqsave(&vc4->job_lock, irqflags); 78 kernel_state = vc4->hang_state; 79 if (!kernel_state) { 80 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 81 return -ENOENT; 82 } 83 state = &kernel_state->user_state; 84 85 /* If the user's array isn't big enough, just return the 86 * required array size. 87 */ 88 if (get_state->bo_count < state->bo_count) { 89 get_state->bo_count = state->bo_count; 90 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 91 return 0; 92 } 93 94 vc4->hang_state = NULL; 95 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 96 97 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 98 state->bo = get_state->bo; 99 memcpy(get_state, state, sizeof(*state)); 100 101 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 102 if (!bo_state) { 103 ret = -ENOMEM; 104 goto err_free; 105 } 106 107 for (i = 0; i < state->bo_count; i++) { 108 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 109 u32 handle; 110 111 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 112 &handle); 113 114 if (ret) { 115 state->bo_count = i - 1; 116 goto err; 117 } 118 bo_state[i].handle = handle; 119 bo_state[i].paddr = vc4_bo->base.paddr; 120 bo_state[i].size = vc4_bo->base.base.size; 121 } 122 123 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 124 bo_state, 125 state->bo_count * sizeof(*bo_state))) 126 ret = -EFAULT; 127 128 kfree(bo_state); 129 130 err_free: 131 132 vc4_free_hang_state(dev, kernel_state); 133 134 err: 135 return ret; 136 } 137 138 static void 139 vc4_save_hang_state(struct drm_device *dev) 140 { 141 struct vc4_dev *vc4 = to_vc4_dev(dev); 142 struct drm_vc4_get_hang_state *state; 143 struct vc4_hang_state *kernel_state; 144 struct vc4_exec_info *exec; 145 struct vc4_bo *bo; 146 unsigned long irqflags; 147 unsigned int i, unref_list_count; 148 149 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 150 if (!kernel_state) 151 return; 152 153 state = &kernel_state->user_state; 154 155 spin_lock_irqsave(&vc4->job_lock, irqflags); 156 exec = vc4_first_job(vc4); 157 if (!exec) { 158 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 159 return; 160 } 161 162 unref_list_count = 0; 163 list_for_each_entry(bo, &exec->unref_list, unref_head) 164 unref_list_count++; 165 166 state->bo_count = exec->bo_count + unref_list_count; 167 kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), 168 GFP_ATOMIC); 169 if (!kernel_state->bo) { 170 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 171 return; 172 } 173 174 for (i = 0; i < exec->bo_count; i++) { 175 drm_gem_object_reference(&exec->bo[i]->base); 176 kernel_state->bo[i] = &exec->bo[i]->base; 177 } 178 179 list_for_each_entry(bo, &exec->unref_list, unref_head) { 180 drm_gem_object_reference(&bo->base.base); 181 kernel_state->bo[i] = &bo->base.base; 182 i++; 183 } 184 185 state->start_bin = exec->ct0ca; 186 state->start_render = exec->ct1ca; 187 188 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 189 190 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 191 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 192 193 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 194 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 195 196 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 197 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 198 199 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 200 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 201 202 state->bpca = V3D_READ(V3D_BPCA); 203 state->bpcs = V3D_READ(V3D_BPCS); 204 state->bpoa = V3D_READ(V3D_BPOA); 205 state->bpos = V3D_READ(V3D_BPOS); 206 207 state->vpmbase = V3D_READ(V3D_VPMBASE); 208 209 state->dbge = V3D_READ(V3D_DBGE); 210 state->fdbgo = V3D_READ(V3D_FDBGO); 211 state->fdbgb = V3D_READ(V3D_FDBGB); 212 state->fdbgr = V3D_READ(V3D_FDBGR); 213 state->fdbgs = V3D_READ(V3D_FDBGS); 214 state->errstat = V3D_READ(V3D_ERRSTAT); 215 216 spin_lock_irqsave(&vc4->job_lock, irqflags); 217 if (vc4->hang_state) { 218 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 219 vc4_free_hang_state(dev, kernel_state); 220 } else { 221 vc4->hang_state = kernel_state; 222 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 223 } 224 } 225 226 static void 227 vc4_reset(struct drm_device *dev) 228 { 229 struct vc4_dev *vc4 = to_vc4_dev(dev); 230 231 DRM_INFO("Resetting GPU.\n"); 232 233 mutex_lock(&vc4->power_lock); 234 if (vc4->power_refcount) { 235 /* Power the device off and back on the by dropping the 236 * reference on runtime PM. 237 */ 238 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 239 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 240 } 241 mutex_unlock(&vc4->power_lock); 242 243 vc4_irq_reset(dev); 244 245 /* Rearm the hangcheck -- another job might have been waiting 246 * for our hung one to get kicked off, and vc4_irq_reset() 247 * would have started it. 248 */ 249 vc4_queue_hangcheck(dev); 250 } 251 252 static void 253 vc4_reset_work(struct work_struct *work) 254 { 255 struct vc4_dev *vc4 = 256 container_of(work, struct vc4_dev, hangcheck.reset_work); 257 258 vc4_save_hang_state(vc4->dev); 259 260 vc4_reset(vc4->dev); 261 } 262 263 static void 264 vc4_hangcheck_elapsed(unsigned long data) 265 { 266 struct drm_device *dev = (struct drm_device *)data; 267 struct vc4_dev *vc4 = to_vc4_dev(dev); 268 uint32_t ct0ca, ct1ca; 269 unsigned long irqflags; 270 struct vc4_exec_info *exec; 271 272 spin_lock_irqsave(&vc4->job_lock, irqflags); 273 exec = vc4_first_job(vc4); 274 275 /* If idle, we can stop watching for hangs. */ 276 if (!exec) { 277 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 278 return; 279 } 280 281 ct0ca = V3D_READ(V3D_CTNCA(0)); 282 ct1ca = V3D_READ(V3D_CTNCA(1)); 283 284 /* If we've made any progress in execution, rearm the timer 285 * and wait. 286 */ 287 if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) { 288 exec->last_ct0ca = ct0ca; 289 exec->last_ct1ca = ct1ca; 290 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 291 vc4_queue_hangcheck(dev); 292 return; 293 } 294 295 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 297 /* We've gone too long with no progress, reset. This has to 298 * be done from a work struct, since resetting can sleep and 299 * this timer hook isn't allowed to. 300 */ 301 schedule_work(&vc4->hangcheck.reset_work); 302 } 303 304 static void 305 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 306 { 307 struct vc4_dev *vc4 = to_vc4_dev(dev); 308 309 /* Set the current and end address of the control list. 310 * Writing the end register is what starts the job. 311 */ 312 V3D_WRITE(V3D_CTNCA(thread), start); 313 V3D_WRITE(V3D_CTNEA(thread), end); 314 } 315 316 int 317 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 318 bool interruptible) 319 { 320 struct vc4_dev *vc4 = to_vc4_dev(dev); 321 int ret = 0; 322 unsigned long timeout_expire; 323 DEFINE_WAIT(wait); 324 325 if (vc4->finished_seqno >= seqno) 326 return 0; 327 328 if (timeout_ns == 0) 329 return -ETIME; 330 331 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 332 333 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 334 for (;;) { 335 prepare_to_wait(&vc4->job_wait_queue, &wait, 336 interruptible ? TASK_INTERRUPTIBLE : 337 TASK_UNINTERRUPTIBLE); 338 339 if (interruptible && signal_pending(current)) { 340 ret = -ERESTARTSYS; 341 break; 342 } 343 344 if (vc4->finished_seqno >= seqno) 345 break; 346 347 if (timeout_ns != ~0ull) { 348 if (time_after_eq(jiffies, timeout_expire)) { 349 ret = -ETIME; 350 break; 351 } 352 schedule_timeout(timeout_expire - jiffies); 353 } else { 354 schedule(); 355 } 356 } 357 358 finish_wait(&vc4->job_wait_queue, &wait); 359 trace_vc4_wait_for_seqno_end(dev, seqno); 360 361 return ret; 362 } 363 364 static void 365 vc4_flush_caches(struct drm_device *dev) 366 { 367 struct vc4_dev *vc4 = to_vc4_dev(dev); 368 369 /* Flush the GPU L2 caches. These caches sit on top of system 370 * L3 (the 128kb or so shared with the CPU), and are 371 * non-allocating in the L3. 372 */ 373 V3D_WRITE(V3D_L2CACTL, 374 V3D_L2CACTL_L2CCLR); 375 376 V3D_WRITE(V3D_SLCACTL, 377 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 378 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 379 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 380 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 381 } 382 383 /* Sets the registers for the next job to be actually be executed in 384 * the hardware. 385 * 386 * The job_lock should be held during this. 387 */ 388 void 389 vc4_submit_next_job(struct drm_device *dev) 390 { 391 struct vc4_dev *vc4 = to_vc4_dev(dev); 392 struct vc4_exec_info *exec = vc4_first_job(vc4); 393 394 if (!exec) 395 return; 396 397 vc4_flush_caches(dev); 398 399 /* Disable the binner's pre-loaded overflow memory address */ 400 V3D_WRITE(V3D_BPOA, 0); 401 V3D_WRITE(V3D_BPOS, 0); 402 403 if (exec->ct0ca != exec->ct0ea) 404 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 405 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 406 } 407 408 static void 409 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 410 { 411 struct vc4_bo *bo; 412 unsigned i; 413 414 for (i = 0; i < exec->bo_count; i++) { 415 bo = to_vc4_bo(&exec->bo[i]->base); 416 bo->seqno = seqno; 417 } 418 419 list_for_each_entry(bo, &exec->unref_list, unref_head) { 420 bo->seqno = seqno; 421 } 422 } 423 424 /* Queues a struct vc4_exec_info for execution. If no job is 425 * currently executing, then submits it. 426 * 427 * Unlike most GPUs, our hardware only handles one command list at a 428 * time. To queue multiple jobs at once, we'd need to edit the 429 * previous command list to have a jump to the new one at the end, and 430 * then bump the end address. That's a change for a later date, 431 * though. 432 */ 433 static void 434 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 435 { 436 struct vc4_dev *vc4 = to_vc4_dev(dev); 437 uint64_t seqno; 438 unsigned long irqflags; 439 440 spin_lock_irqsave(&vc4->job_lock, irqflags); 441 442 seqno = ++vc4->emit_seqno; 443 exec->seqno = seqno; 444 vc4_update_bo_seqnos(exec, seqno); 445 446 list_add_tail(&exec->head, &vc4->job_list); 447 448 /* If no job was executing, kick ours off. Otherwise, it'll 449 * get started when the previous job's frame done interrupt 450 * occurs. 451 */ 452 if (vc4_first_job(vc4) == exec) { 453 vc4_submit_next_job(dev); 454 vc4_queue_hangcheck(dev); 455 } 456 457 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 458 } 459 460 /** 461 * Looks up a bunch of GEM handles for BOs and stores the array for 462 * use in the command validator that actually writes relocated 463 * addresses pointing to them. 464 */ 465 static int 466 vc4_cl_lookup_bos(struct drm_device *dev, 467 struct drm_file *file_priv, 468 struct vc4_exec_info *exec) 469 { 470 struct drm_vc4_submit_cl *args = exec->args; 471 uint32_t *handles; 472 int ret = 0; 473 int i; 474 475 exec->bo_count = args->bo_handle_count; 476 477 if (!exec->bo_count) { 478 /* See comment on bo_index for why we have to check 479 * this. 480 */ 481 DRM_ERROR("Rendering requires BOs to validate\n"); 482 return -EINVAL; 483 } 484 485 exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), 486 GFP_KERNEL); 487 if (!exec->bo) { 488 DRM_ERROR("Failed to allocate validated BO pointers\n"); 489 return -ENOMEM; 490 } 491 492 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 493 if (!handles) { 494 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 495 goto fail; 496 } 497 498 ret = copy_from_user(handles, 499 (void __user *)(uintptr_t)args->bo_handles, 500 exec->bo_count * sizeof(uint32_t)); 501 if (ret) { 502 DRM_ERROR("Failed to copy in GEM handles\n"); 503 goto fail; 504 } 505 506 spin_lock(&file_priv->table_lock); 507 for (i = 0; i < exec->bo_count; i++) { 508 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 509 handles[i]); 510 if (!bo) { 511 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 512 i, handles[i]); 513 ret = -EINVAL; 514 spin_unlock(&file_priv->table_lock); 515 goto fail; 516 } 517 drm_gem_object_reference(bo); 518 exec->bo[i] = (struct drm_gem_cma_object *)bo; 519 } 520 spin_unlock(&file_priv->table_lock); 521 522 fail: 523 kfree(handles); 524 return 0; 525 } 526 527 static int 528 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 529 { 530 struct drm_vc4_submit_cl *args = exec->args; 531 void *temp = NULL; 532 void *bin; 533 int ret = 0; 534 uint32_t bin_offset = 0; 535 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 536 16); 537 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 538 uint32_t exec_size = uniforms_offset + args->uniforms_size; 539 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 540 args->shader_rec_count); 541 struct vc4_bo *bo; 542 543 if (uniforms_offset < shader_rec_offset || 544 exec_size < uniforms_offset || 545 args->shader_rec_count >= (UINT_MAX / 546 sizeof(struct vc4_shader_state)) || 547 temp_size < exec_size) { 548 DRM_ERROR("overflow in exec arguments\n"); 549 goto fail; 550 } 551 552 /* Allocate space where we'll store the copied in user command lists 553 * and shader records. 554 * 555 * We don't just copy directly into the BOs because we need to 556 * read the contents back for validation, and I think the 557 * bo->vaddr is uncached access. 558 */ 559 temp = kmalloc(temp_size, GFP_KERNEL); 560 if (!temp) { 561 DRM_ERROR("Failed to allocate storage for copying " 562 "in bin/render CLs.\n"); 563 ret = -ENOMEM; 564 goto fail; 565 } 566 bin = temp + bin_offset; 567 exec->shader_rec_u = temp + shader_rec_offset; 568 exec->uniforms_u = temp + uniforms_offset; 569 exec->shader_state = temp + exec_size; 570 exec->shader_state_size = args->shader_rec_count; 571 572 if (copy_from_user(bin, 573 (void __user *)(uintptr_t)args->bin_cl, 574 args->bin_cl_size)) { 575 ret = -EFAULT; 576 goto fail; 577 } 578 579 if (copy_from_user(exec->shader_rec_u, 580 (void __user *)(uintptr_t)args->shader_rec, 581 args->shader_rec_size)) { 582 ret = -EFAULT; 583 goto fail; 584 } 585 586 if (copy_from_user(exec->uniforms_u, 587 (void __user *)(uintptr_t)args->uniforms, 588 args->uniforms_size)) { 589 ret = -EFAULT; 590 goto fail; 591 } 592 593 bo = vc4_bo_create(dev, exec_size, true); 594 if (IS_ERR(bo)) { 595 DRM_ERROR("Couldn't allocate BO for binning\n"); 596 ret = PTR_ERR(bo); 597 goto fail; 598 } 599 exec->exec_bo = &bo->base; 600 601 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 602 &exec->unref_list); 603 604 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 605 606 exec->bin_u = bin; 607 608 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 609 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 610 exec->shader_rec_size = args->shader_rec_size; 611 612 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 613 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 614 exec->uniforms_size = args->uniforms_size; 615 616 ret = vc4_validate_bin_cl(dev, 617 exec->exec_bo->vaddr + bin_offset, 618 bin, 619 exec); 620 if (ret) 621 goto fail; 622 623 ret = vc4_validate_shader_recs(dev, exec); 624 625 fail: 626 kfree(temp); 627 return ret; 628 } 629 630 static void 631 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 632 { 633 struct vc4_dev *vc4 = to_vc4_dev(dev); 634 unsigned i; 635 636 /* Need the struct lock for drm_gem_object_unreference(). */ 637 mutex_lock(&dev->struct_mutex); 638 if (exec->bo) { 639 for (i = 0; i < exec->bo_count; i++) 640 drm_gem_object_unreference(&exec->bo[i]->base); 641 kfree(exec->bo); 642 } 643 644 while (!list_empty(&exec->unref_list)) { 645 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 646 struct vc4_bo, unref_head); 647 list_del(&bo->unref_head); 648 drm_gem_object_unreference(&bo->base.base); 649 } 650 mutex_unlock(&dev->struct_mutex); 651 652 mutex_lock(&vc4->power_lock); 653 if (--vc4->power_refcount == 0) 654 pm_runtime_put(&vc4->v3d->pdev->dev); 655 mutex_unlock(&vc4->power_lock); 656 657 kfree(exec); 658 } 659 660 void 661 vc4_job_handle_completed(struct vc4_dev *vc4) 662 { 663 unsigned long irqflags; 664 struct vc4_seqno_cb *cb, *cb_temp; 665 666 spin_lock_irqsave(&vc4->job_lock, irqflags); 667 while (!list_empty(&vc4->job_done_list)) { 668 struct vc4_exec_info *exec = 669 list_first_entry(&vc4->job_done_list, 670 struct vc4_exec_info, head); 671 list_del(&exec->head); 672 673 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 674 vc4_complete_exec(vc4->dev, exec); 675 spin_lock_irqsave(&vc4->job_lock, irqflags); 676 } 677 678 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 679 if (cb->seqno <= vc4->finished_seqno) { 680 list_del_init(&cb->work.entry); 681 schedule_work(&cb->work); 682 } 683 } 684 685 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 686 } 687 688 static void vc4_seqno_cb_work(struct work_struct *work) 689 { 690 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 691 692 cb->func(cb); 693 } 694 695 int vc4_queue_seqno_cb(struct drm_device *dev, 696 struct vc4_seqno_cb *cb, uint64_t seqno, 697 void (*func)(struct vc4_seqno_cb *cb)) 698 { 699 struct vc4_dev *vc4 = to_vc4_dev(dev); 700 int ret = 0; 701 unsigned long irqflags; 702 703 cb->func = func; 704 INIT_WORK(&cb->work, vc4_seqno_cb_work); 705 706 spin_lock_irqsave(&vc4->job_lock, irqflags); 707 if (seqno > vc4->finished_seqno) { 708 cb->seqno = seqno; 709 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 710 } else { 711 schedule_work(&cb->work); 712 } 713 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 714 715 return ret; 716 } 717 718 /* Scheduled when any job has been completed, this walks the list of 719 * jobs that had completed and unrefs their BOs and frees their exec 720 * structs. 721 */ 722 static void 723 vc4_job_done_work(struct work_struct *work) 724 { 725 struct vc4_dev *vc4 = 726 container_of(work, struct vc4_dev, job_done_work); 727 728 vc4_job_handle_completed(vc4); 729 } 730 731 static int 732 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 733 uint64_t seqno, 734 uint64_t *timeout_ns) 735 { 736 unsigned long start = jiffies; 737 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 738 739 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 740 uint64_t delta = jiffies_to_nsecs(jiffies - start); 741 742 if (*timeout_ns >= delta) 743 *timeout_ns -= delta; 744 } 745 746 return ret; 747 } 748 749 int 750 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 751 struct drm_file *file_priv) 752 { 753 struct drm_vc4_wait_seqno *args = data; 754 755 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 756 &args->timeout_ns); 757 } 758 759 int 760 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 761 struct drm_file *file_priv) 762 { 763 int ret; 764 struct drm_vc4_wait_bo *args = data; 765 struct drm_gem_object *gem_obj; 766 struct vc4_bo *bo; 767 768 if (args->pad != 0) 769 return -EINVAL; 770 771 gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle); 772 if (!gem_obj) { 773 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 774 return -EINVAL; 775 } 776 bo = to_vc4_bo(gem_obj); 777 778 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 779 &args->timeout_ns); 780 781 drm_gem_object_unreference_unlocked(gem_obj); 782 return ret; 783 } 784 785 /** 786 * Submits a command list to the VC4. 787 * 788 * This is what is called batchbuffer emitting on other hardware. 789 */ 790 int 791 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 792 struct drm_file *file_priv) 793 { 794 struct vc4_dev *vc4 = to_vc4_dev(dev); 795 struct drm_vc4_submit_cl *args = data; 796 struct vc4_exec_info *exec; 797 int ret = 0; 798 799 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 800 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 801 return -EINVAL; 802 } 803 804 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 805 if (!exec) { 806 DRM_ERROR("malloc failure on exec struct\n"); 807 return -ENOMEM; 808 } 809 810 mutex_lock(&vc4->power_lock); 811 if (vc4->power_refcount++ == 0) 812 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 813 mutex_unlock(&vc4->power_lock); 814 if (ret < 0) { 815 kfree(exec); 816 return ret; 817 } 818 819 exec->args = args; 820 INIT_LIST_HEAD(&exec->unref_list); 821 822 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 823 if (ret) 824 goto fail; 825 826 if (exec->args->bin_cl_size != 0) { 827 ret = vc4_get_bcl(dev, exec); 828 if (ret) 829 goto fail; 830 } else { 831 exec->ct0ca = 0; 832 exec->ct0ea = 0; 833 } 834 835 ret = vc4_get_rcl(dev, exec); 836 if (ret) 837 goto fail; 838 839 /* Clear this out of the struct we'll be putting in the queue, 840 * since it's part of our stack. 841 */ 842 exec->args = NULL; 843 844 vc4_queue_submit(dev, exec); 845 846 /* Return the seqno for our job. */ 847 args->seqno = vc4->emit_seqno; 848 849 return 0; 850 851 fail: 852 vc4_complete_exec(vc4->dev, exec); 853 854 return ret; 855 } 856 857 void 858 vc4_gem_init(struct drm_device *dev) 859 { 860 struct vc4_dev *vc4 = to_vc4_dev(dev); 861 862 INIT_LIST_HEAD(&vc4->job_list); 863 INIT_LIST_HEAD(&vc4->job_done_list); 864 INIT_LIST_HEAD(&vc4->seqno_cb_list); 865 spin_lock_init(&vc4->job_lock); 866 867 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 868 setup_timer(&vc4->hangcheck.timer, 869 vc4_hangcheck_elapsed, 870 (unsigned long)dev); 871 872 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 873 874 mutex_init(&vc4->power_lock); 875 } 876 877 void 878 vc4_gem_destroy(struct drm_device *dev) 879 { 880 struct vc4_dev *vc4 = to_vc4_dev(dev); 881 882 /* Waiting for exec to finish would need to be done before 883 * unregistering V3D. 884 */ 885 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 886 887 /* V3D should already have disabled its interrupt and cleared 888 * the overflow allocation registers. Now free the object. 889 */ 890 if (vc4->overflow_mem) { 891 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 892 vc4->overflow_mem = NULL; 893 } 894 895 vc4_bo_cache_destroy(dev); 896 897 if (vc4->hang_state) 898 vc4_free_hang_state(dev, vc4->hang_state); 899 } 900