1d5b1a78aSEric Anholt /* 2d5b1a78aSEric Anholt * Copyright © 2014 Broadcom 3d5b1a78aSEric Anholt * 4d5b1a78aSEric Anholt * Permission is hereby granted, free of charge, to any person obtaining a 5d5b1a78aSEric Anholt * copy of this software and associated documentation files (the "Software"), 6d5b1a78aSEric Anholt * to deal in the Software without restriction, including without limitation 7d5b1a78aSEric Anholt * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d5b1a78aSEric Anholt * and/or sell copies of the Software, and to permit persons to whom the 9d5b1a78aSEric Anholt * Software is furnished to do so, subject to the following conditions: 10d5b1a78aSEric Anholt * 11d5b1a78aSEric Anholt * The above copyright notice and this permission notice (including the next 12d5b1a78aSEric Anholt * paragraph) shall be included in all copies or substantial portions of the 13d5b1a78aSEric Anholt * Software. 14d5b1a78aSEric Anholt * 15d5b1a78aSEric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16d5b1a78aSEric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17d5b1a78aSEric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18d5b1a78aSEric Anholt * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19d5b1a78aSEric Anholt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20d5b1a78aSEric Anholt * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21d5b1a78aSEric Anholt * IN THE SOFTWARE. 22d5b1a78aSEric Anholt */ 23d5b1a78aSEric Anholt 24d5b1a78aSEric Anholt #include <linux/module.h> 25d5b1a78aSEric Anholt #include <linux/platform_device.h> 26001bdb55SEric Anholt #include <linux/pm_runtime.h> 27d5b1a78aSEric Anholt #include <linux/device.h> 28d5b1a78aSEric Anholt #include <linux/io.h> 29174cd4b1SIngo Molnar #include <linux/sched/signal.h> 30818f5c8fSStefan Schake #include <linux/dma-fence-array.h> 31d5b1a78aSEric Anholt 32fd6d6d80SSam Ravnborg #include <drm/drm_syncobj.h> 33fd6d6d80SSam Ravnborg 34d5b1a78aSEric Anholt #include "uapi/drm/vc4_drm.h" 35d5b1a78aSEric Anholt #include "vc4_drv.h" 36d5b1a78aSEric Anholt #include "vc4_regs.h" 37d5b1a78aSEric Anholt #include "vc4_trace.h" 38d5b1a78aSEric Anholt 39d5b1a78aSEric Anholt static void 40d5b1a78aSEric Anholt vc4_queue_hangcheck(struct drm_device *dev) 41d5b1a78aSEric Anholt { 42d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 43d5b1a78aSEric Anholt 44d5b1a78aSEric Anholt mod_timer(&vc4->hangcheck.timer, 45d5b1a78aSEric Anholt round_jiffies_up(jiffies + msecs_to_jiffies(100))); 46d5b1a78aSEric Anholt } 47d5b1a78aSEric Anholt 4821461365SEric Anholt struct vc4_hang_state { 4921461365SEric Anholt struct drm_vc4_get_hang_state user_state; 5021461365SEric Anholt 5121461365SEric Anholt u32 bo_count; 5221461365SEric Anholt struct drm_gem_object **bo; 5321461365SEric Anholt }; 5421461365SEric Anholt 5521461365SEric Anholt static void 5621461365SEric Anholt vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 5721461365SEric Anholt { 5821461365SEric Anholt unsigned int i; 5921461365SEric Anholt 6021461365SEric Anholt for (i = 0; i < state->user_state.bo_count; i++) 61f7a8cd30SEmil Velikov drm_gem_object_put(state->bo[i]); 6221461365SEric Anholt 6321461365SEric Anholt kfree(state); 6421461365SEric Anholt } 6521461365SEric Anholt 6621461365SEric Anholt int 6721461365SEric Anholt vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 6821461365SEric Anholt struct drm_file *file_priv) 6921461365SEric Anholt { 7021461365SEric Anholt struct drm_vc4_get_hang_state *get_state = data; 7121461365SEric Anholt struct drm_vc4_get_hang_state_bo *bo_state; 7221461365SEric Anholt struct vc4_hang_state *kernel_state; 7321461365SEric Anholt struct drm_vc4_get_hang_state *state; 7421461365SEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 7521461365SEric Anholt unsigned long irqflags; 7621461365SEric Anholt u32 i; 7765c4777dSDan Carpenter int ret = 0; 7821461365SEric Anholt 79ffc26740SEric Anholt if (!vc4->v3d) { 80ffc26740SEric Anholt DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n"); 81ffc26740SEric Anholt return -ENODEV; 82ffc26740SEric Anholt } 83ffc26740SEric Anholt 8421461365SEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 8521461365SEric Anholt kernel_state = vc4->hang_state; 8621461365SEric Anholt if (!kernel_state) { 8721461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 8821461365SEric Anholt return -ENOENT; 8921461365SEric Anholt } 9021461365SEric Anholt state = &kernel_state->user_state; 9121461365SEric Anholt 9221461365SEric Anholt /* If the user's array isn't big enough, just return the 9321461365SEric Anholt * required array size. 9421461365SEric Anholt */ 9521461365SEric Anholt if (get_state->bo_count < state->bo_count) { 9621461365SEric Anholt get_state->bo_count = state->bo_count; 9721461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 9821461365SEric Anholt return 0; 9921461365SEric Anholt } 10021461365SEric Anholt 10121461365SEric Anholt vc4->hang_state = NULL; 10221461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 10321461365SEric Anholt 10421461365SEric Anholt /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 10521461365SEric Anholt state->bo = get_state->bo; 10621461365SEric Anholt memcpy(get_state, state, sizeof(*state)); 10721461365SEric Anholt 10821461365SEric Anholt bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 10921461365SEric Anholt if (!bo_state) { 11021461365SEric Anholt ret = -ENOMEM; 11121461365SEric Anholt goto err_free; 11221461365SEric Anholt } 11321461365SEric Anholt 11421461365SEric Anholt for (i = 0; i < state->bo_count; i++) { 11521461365SEric Anholt struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 11621461365SEric Anholt u32 handle; 11721461365SEric Anholt 11821461365SEric Anholt ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 11921461365SEric Anholt &handle); 12021461365SEric Anholt 12121461365SEric Anholt if (ret) { 122d0b1d259SChristophe JAILLET state->bo_count = i; 123d0b1d259SChristophe JAILLET goto err_delete_handle; 12421461365SEric Anholt } 12521461365SEric Anholt bo_state[i].handle = handle; 12621461365SEric Anholt bo_state[i].paddr = vc4_bo->base.paddr; 12721461365SEric Anholt bo_state[i].size = vc4_bo->base.base.size; 12821461365SEric Anholt } 12921461365SEric Anholt 13095d7cbcbSEric Anholt if (copy_to_user(u64_to_user_ptr(get_state->bo), 13121461365SEric Anholt bo_state, 13265c4777dSDan Carpenter state->bo_count * sizeof(*bo_state))) 13365c4777dSDan Carpenter ret = -EFAULT; 13465c4777dSDan Carpenter 135d0b1d259SChristophe JAILLET err_delete_handle: 136d0b1d259SChristophe JAILLET if (ret) { 137d0b1d259SChristophe JAILLET for (i = 0; i < state->bo_count; i++) 138d0b1d259SChristophe JAILLET drm_gem_handle_delete(file_priv, bo_state[i].handle); 139d0b1d259SChristophe JAILLET } 14021461365SEric Anholt 14121461365SEric Anholt err_free: 14221461365SEric Anholt vc4_free_hang_state(dev, kernel_state); 143d0b1d259SChristophe JAILLET kfree(bo_state); 14421461365SEric Anholt 14521461365SEric Anholt return ret; 14621461365SEric Anholt } 14721461365SEric Anholt 14821461365SEric Anholt static void 14921461365SEric Anholt vc4_save_hang_state(struct drm_device *dev) 15021461365SEric Anholt { 15121461365SEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 15221461365SEric Anholt struct drm_vc4_get_hang_state *state; 15321461365SEric Anholt struct vc4_hang_state *kernel_state; 154ca26d28bSVarad Gautam struct vc4_exec_info *exec[2]; 15521461365SEric Anholt struct vc4_bo *bo; 15621461365SEric Anholt unsigned long irqflags; 15717b11b76SBoris Brezillon unsigned int i, j, k, unref_list_count; 15821461365SEric Anholt 1597e5082fbSDan Carpenter kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 16021461365SEric Anholt if (!kernel_state) 16121461365SEric Anholt return; 16221461365SEric Anholt 16321461365SEric Anholt state = &kernel_state->user_state; 16421461365SEric Anholt 16521461365SEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 166ca26d28bSVarad Gautam exec[0] = vc4_first_bin_job(vc4); 167ca26d28bSVarad Gautam exec[1] = vc4_first_render_job(vc4); 168ca26d28bSVarad Gautam if (!exec[0] && !exec[1]) { 16921461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 17021461365SEric Anholt return; 17121461365SEric Anholt } 17221461365SEric Anholt 173ca26d28bSVarad Gautam /* Get the bos from both binner and renderer into hang state. */ 174ca26d28bSVarad Gautam state->bo_count = 0; 175ca26d28bSVarad Gautam for (i = 0; i < 2; i++) { 176ca26d28bSVarad Gautam if (!exec[i]) 177ca26d28bSVarad Gautam continue; 17821461365SEric Anholt 179ca26d28bSVarad Gautam unref_list_count = 0; 180ca26d28bSVarad Gautam list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 181ca26d28bSVarad Gautam unref_list_count++; 182ca26d28bSVarad Gautam state->bo_count += exec[i]->bo_count + unref_list_count; 183ca26d28bSVarad Gautam } 184ca26d28bSVarad Gautam 185ca26d28bSVarad Gautam kernel_state->bo = kcalloc(state->bo_count, 186ca26d28bSVarad Gautam sizeof(*kernel_state->bo), GFP_ATOMIC); 187ca26d28bSVarad Gautam 18821461365SEric Anholt if (!kernel_state->bo) { 18921461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 19021461365SEric Anholt return; 19121461365SEric Anholt } 19221461365SEric Anholt 19317b11b76SBoris Brezillon k = 0; 194ca26d28bSVarad Gautam for (i = 0; i < 2; i++) { 195ca26d28bSVarad Gautam if (!exec[i]) 196ca26d28bSVarad Gautam continue; 197ca26d28bSVarad Gautam 198ca26d28bSVarad Gautam for (j = 0; j < exec[i]->bo_count; j++) { 199b9f19259SBoris Brezillon bo = to_vc4_bo(&exec[i]->bo[j]->base); 200b9f19259SBoris Brezillon 201b9f19259SBoris Brezillon /* Retain BOs just in case they were marked purgeable. 202b9f19259SBoris Brezillon * This prevents the BO from being purged before 203b9f19259SBoris Brezillon * someone had a chance to dump the hang state. 204b9f19259SBoris Brezillon */ 205b9f19259SBoris Brezillon WARN_ON(!refcount_read(&bo->usecnt)); 206b9f19259SBoris Brezillon refcount_inc(&bo->usecnt); 2071d5494e9SCihangir Akturk drm_gem_object_get(&exec[i]->bo[j]->base); 20817b11b76SBoris Brezillon kernel_state->bo[k++] = &exec[i]->bo[j]->base; 20921461365SEric Anholt } 21021461365SEric Anholt 211ca26d28bSVarad Gautam list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 212b9f19259SBoris Brezillon /* No need to retain BOs coming from the ->unref_list 213b9f19259SBoris Brezillon * because they are naturally unpurgeable. 214b9f19259SBoris Brezillon */ 2151d5494e9SCihangir Akturk drm_gem_object_get(&bo->base.base); 21617b11b76SBoris Brezillon kernel_state->bo[k++] = &bo->base.base; 217ca26d28bSVarad Gautam } 21821461365SEric Anholt } 21921461365SEric Anholt 22017b11b76SBoris Brezillon WARN_ON_ONCE(k != state->bo_count); 22117b11b76SBoris Brezillon 222ca26d28bSVarad Gautam if (exec[0]) 223ca26d28bSVarad Gautam state->start_bin = exec[0]->ct0ca; 224ca26d28bSVarad Gautam if (exec[1]) 225ca26d28bSVarad Gautam state->start_render = exec[1]->ct1ca; 22621461365SEric Anholt 22721461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 22821461365SEric Anholt 22921461365SEric Anholt state->ct0ca = V3D_READ(V3D_CTNCA(0)); 23021461365SEric Anholt state->ct0ea = V3D_READ(V3D_CTNEA(0)); 23121461365SEric Anholt 23221461365SEric Anholt state->ct1ca = V3D_READ(V3D_CTNCA(1)); 23321461365SEric Anholt state->ct1ea = V3D_READ(V3D_CTNEA(1)); 23421461365SEric Anholt 23521461365SEric Anholt state->ct0cs = V3D_READ(V3D_CTNCS(0)); 23621461365SEric Anholt state->ct1cs = V3D_READ(V3D_CTNCS(1)); 23721461365SEric Anholt 23821461365SEric Anholt state->ct0ra0 = V3D_READ(V3D_CT00RA0); 23921461365SEric Anholt state->ct1ra0 = V3D_READ(V3D_CT01RA0); 24021461365SEric Anholt 24121461365SEric Anholt state->bpca = V3D_READ(V3D_BPCA); 24221461365SEric Anholt state->bpcs = V3D_READ(V3D_BPCS); 24321461365SEric Anholt state->bpoa = V3D_READ(V3D_BPOA); 24421461365SEric Anholt state->bpos = V3D_READ(V3D_BPOS); 24521461365SEric Anholt 24621461365SEric Anholt state->vpmbase = V3D_READ(V3D_VPMBASE); 24721461365SEric Anholt 24821461365SEric Anholt state->dbge = V3D_READ(V3D_DBGE); 24921461365SEric Anholt state->fdbgo = V3D_READ(V3D_FDBGO); 25021461365SEric Anholt state->fdbgb = V3D_READ(V3D_FDBGB); 25121461365SEric Anholt state->fdbgr = V3D_READ(V3D_FDBGR); 25221461365SEric Anholt state->fdbgs = V3D_READ(V3D_FDBGS); 25321461365SEric Anholt state->errstat = V3D_READ(V3D_ERRSTAT); 25421461365SEric Anholt 255b9f19259SBoris Brezillon /* We need to turn purgeable BOs into unpurgeable ones so that 256b9f19259SBoris Brezillon * userspace has a chance to dump the hang state before the kernel 257b9f19259SBoris Brezillon * decides to purge those BOs. 258b9f19259SBoris Brezillon * Note that BO consistency at dump time cannot be guaranteed. For 259b9f19259SBoris Brezillon * example, if the owner of these BOs decides to re-use them or mark 260b9f19259SBoris Brezillon * them purgeable again there's nothing we can do to prevent it. 261b9f19259SBoris Brezillon */ 262b9f19259SBoris Brezillon for (i = 0; i < kernel_state->user_state.bo_count; i++) { 263b9f19259SBoris Brezillon struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]); 264b9f19259SBoris Brezillon 265b9f19259SBoris Brezillon if (bo->madv == __VC4_MADV_NOTSUPP) 266b9f19259SBoris Brezillon continue; 267b9f19259SBoris Brezillon 268b9f19259SBoris Brezillon mutex_lock(&bo->madv_lock); 269b9f19259SBoris Brezillon if (!WARN_ON(bo->madv == __VC4_MADV_PURGED)) 270b9f19259SBoris Brezillon bo->madv = VC4_MADV_WILLNEED; 271b9f19259SBoris Brezillon refcount_dec(&bo->usecnt); 272b9f19259SBoris Brezillon mutex_unlock(&bo->madv_lock); 273b9f19259SBoris Brezillon } 274b9f19259SBoris Brezillon 27521461365SEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 27621461365SEric Anholt if (vc4->hang_state) { 27721461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 27821461365SEric Anholt vc4_free_hang_state(dev, kernel_state); 27921461365SEric Anholt } else { 28021461365SEric Anholt vc4->hang_state = kernel_state; 28121461365SEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 28221461365SEric Anholt } 28321461365SEric Anholt } 28421461365SEric Anholt 285d5b1a78aSEric Anholt static void 286d5b1a78aSEric Anholt vc4_reset(struct drm_device *dev) 287d5b1a78aSEric Anholt { 288d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 289d5b1a78aSEric Anholt 290d5b1a78aSEric Anholt DRM_INFO("Resetting GPU.\n"); 29136cb6253SEric Anholt 29236cb6253SEric Anholt mutex_lock(&vc4->power_lock); 29336cb6253SEric Anholt if (vc4->power_refcount) { 29436cb6253SEric Anholt /* Power the device off and back on the by dropping the 29536cb6253SEric Anholt * reference on runtime PM. 29636cb6253SEric Anholt */ 29736cb6253SEric Anholt pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 29836cb6253SEric Anholt pm_runtime_get_sync(&vc4->v3d->pdev->dev); 29936cb6253SEric Anholt } 30036cb6253SEric Anholt mutex_unlock(&vc4->power_lock); 301d5b1a78aSEric Anholt 302d5b1a78aSEric Anholt vc4_irq_reset(dev); 303d5b1a78aSEric Anholt 304d5b1a78aSEric Anholt /* Rearm the hangcheck -- another job might have been waiting 305d5b1a78aSEric Anholt * for our hung one to get kicked off, and vc4_irq_reset() 306d5b1a78aSEric Anholt * would have started it. 307d5b1a78aSEric Anholt */ 308d5b1a78aSEric Anholt vc4_queue_hangcheck(dev); 309d5b1a78aSEric Anholt } 310d5b1a78aSEric Anholt 311d5b1a78aSEric Anholt static void 312d5b1a78aSEric Anholt vc4_reset_work(struct work_struct *work) 313d5b1a78aSEric Anholt { 314d5b1a78aSEric Anholt struct vc4_dev *vc4 = 315d5b1a78aSEric Anholt container_of(work, struct vc4_dev, hangcheck.reset_work); 316d5b1a78aSEric Anholt 31784d7d472SMaxime Ripard vc4_save_hang_state(&vc4->base); 31821461365SEric Anholt 31984d7d472SMaxime Ripard vc4_reset(&vc4->base); 320d5b1a78aSEric Anholt } 321d5b1a78aSEric Anholt 322d5b1a78aSEric Anholt static void 3230078730fSKees Cook vc4_hangcheck_elapsed(struct timer_list *t) 324d5b1a78aSEric Anholt { 3250078730fSKees Cook struct vc4_dev *vc4 = from_timer(vc4, t, hangcheck.timer); 32684d7d472SMaxime Ripard struct drm_device *dev = &vc4->base; 327d5b1a78aSEric Anholt uint32_t ct0ca, ct1ca; 328c4ce60dcSEric Anholt unsigned long irqflags; 329ca26d28bSVarad Gautam struct vc4_exec_info *bin_exec, *render_exec; 330c4ce60dcSEric Anholt 331c4ce60dcSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 332ca26d28bSVarad Gautam 333ca26d28bSVarad Gautam bin_exec = vc4_first_bin_job(vc4); 334ca26d28bSVarad Gautam render_exec = vc4_first_render_job(vc4); 335d5b1a78aSEric Anholt 336d5b1a78aSEric Anholt /* If idle, we can stop watching for hangs. */ 337ca26d28bSVarad Gautam if (!bin_exec && !render_exec) { 338c4ce60dcSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 339d5b1a78aSEric Anholt return; 340c4ce60dcSEric Anholt } 341d5b1a78aSEric Anholt 342d5b1a78aSEric Anholt ct0ca = V3D_READ(V3D_CTNCA(0)); 343d5b1a78aSEric Anholt ct1ca = V3D_READ(V3D_CTNCA(1)); 344d5b1a78aSEric Anholt 345d5b1a78aSEric Anholt /* If we've made any progress in execution, rearm the timer 346d5b1a78aSEric Anholt * and wait. 347d5b1a78aSEric Anholt */ 348ca26d28bSVarad Gautam if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 349ca26d28bSVarad Gautam (render_exec && ct1ca != render_exec->last_ct1ca)) { 350ca26d28bSVarad Gautam if (bin_exec) 351ca26d28bSVarad Gautam bin_exec->last_ct0ca = ct0ca; 352ca26d28bSVarad Gautam if (render_exec) 353ca26d28bSVarad Gautam render_exec->last_ct1ca = ct1ca; 354c4ce60dcSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 355d5b1a78aSEric Anholt vc4_queue_hangcheck(dev); 356d5b1a78aSEric Anholt return; 357d5b1a78aSEric Anholt } 358d5b1a78aSEric Anholt 359c4ce60dcSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 360c4ce60dcSEric Anholt 361d5b1a78aSEric Anholt /* We've gone too long with no progress, reset. This has to 362d5b1a78aSEric Anholt * be done from a work struct, since resetting can sleep and 363d5b1a78aSEric Anholt * this timer hook isn't allowed to. 364d5b1a78aSEric Anholt */ 365d5b1a78aSEric Anholt schedule_work(&vc4->hangcheck.reset_work); 366d5b1a78aSEric Anholt } 367d5b1a78aSEric Anholt 368d5b1a78aSEric Anholt static void 369d5b1a78aSEric Anholt submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 370d5b1a78aSEric Anholt { 371d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 372d5b1a78aSEric Anholt 373d5b1a78aSEric Anholt /* Set the current and end address of the control list. 374d5b1a78aSEric Anholt * Writing the end register is what starts the job. 375d5b1a78aSEric Anholt */ 376d5b1a78aSEric Anholt V3D_WRITE(V3D_CTNCA(thread), start); 377d5b1a78aSEric Anholt V3D_WRITE(V3D_CTNEA(thread), end); 378d5b1a78aSEric Anholt } 379d5b1a78aSEric Anholt 380d5b1a78aSEric Anholt int 381d5b1a78aSEric Anholt vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 382d5b1a78aSEric Anholt bool interruptible) 383d5b1a78aSEric Anholt { 384d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 385d5b1a78aSEric Anholt int ret = 0; 386d5b1a78aSEric Anholt unsigned long timeout_expire; 387d5b1a78aSEric Anholt DEFINE_WAIT(wait); 388d5b1a78aSEric Anholt 389d5b1a78aSEric Anholt if (vc4->finished_seqno >= seqno) 390d5b1a78aSEric Anholt return 0; 391d5b1a78aSEric Anholt 392d5b1a78aSEric Anholt if (timeout_ns == 0) 393d5b1a78aSEric Anholt return -ETIME; 394d5b1a78aSEric Anholt 395d5b1a78aSEric Anholt timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 396d5b1a78aSEric Anholt 397d5b1a78aSEric Anholt trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 398d5b1a78aSEric Anholt for (;;) { 399d5b1a78aSEric Anholt prepare_to_wait(&vc4->job_wait_queue, &wait, 400d5b1a78aSEric Anholt interruptible ? TASK_INTERRUPTIBLE : 401d5b1a78aSEric Anholt TASK_UNINTERRUPTIBLE); 402d5b1a78aSEric Anholt 403d5b1a78aSEric Anholt if (interruptible && signal_pending(current)) { 404d5b1a78aSEric Anholt ret = -ERESTARTSYS; 405d5b1a78aSEric Anholt break; 406d5b1a78aSEric Anholt } 407d5b1a78aSEric Anholt 408d5b1a78aSEric Anholt if (vc4->finished_seqno >= seqno) 409d5b1a78aSEric Anholt break; 410d5b1a78aSEric Anholt 411d5b1a78aSEric Anholt if (timeout_ns != ~0ull) { 412d5b1a78aSEric Anholt if (time_after_eq(jiffies, timeout_expire)) { 413d5b1a78aSEric Anholt ret = -ETIME; 414d5b1a78aSEric Anholt break; 415d5b1a78aSEric Anholt } 416d5b1a78aSEric Anholt schedule_timeout(timeout_expire - jiffies); 417d5b1a78aSEric Anholt } else { 418d5b1a78aSEric Anholt schedule(); 419d5b1a78aSEric Anholt } 420d5b1a78aSEric Anholt } 421d5b1a78aSEric Anholt 422d5b1a78aSEric Anholt finish_wait(&vc4->job_wait_queue, &wait); 423d5b1a78aSEric Anholt trace_vc4_wait_for_seqno_end(dev, seqno); 424d5b1a78aSEric Anholt 42513cf8909SEric Anholt return ret; 426d5b1a78aSEric Anholt } 427d5b1a78aSEric Anholt 428d5b1a78aSEric Anholt static void 429d5b1a78aSEric Anholt vc4_flush_caches(struct drm_device *dev) 430d5b1a78aSEric Anholt { 431d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 432d5b1a78aSEric Anholt 433d5b1a78aSEric Anholt /* Flush the GPU L2 caches. These caches sit on top of system 434d5b1a78aSEric Anholt * L3 (the 128kb or so shared with the CPU), and are 435d5b1a78aSEric Anholt * non-allocating in the L3. 436d5b1a78aSEric Anholt */ 437d5b1a78aSEric Anholt V3D_WRITE(V3D_L2CACTL, 438d5b1a78aSEric Anholt V3D_L2CACTL_L2CCLR); 439d5b1a78aSEric Anholt 440d5b1a78aSEric Anholt V3D_WRITE(V3D_SLCACTL, 441d5b1a78aSEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 442d5b1a78aSEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 443d5b1a78aSEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 444d5b1a78aSEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 445d5b1a78aSEric Anholt } 446d5b1a78aSEric Anholt 447f61145f1SEric Anholt static void 448f61145f1SEric Anholt vc4_flush_texture_caches(struct drm_device *dev) 449f61145f1SEric Anholt { 450f61145f1SEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 451f61145f1SEric Anholt 452f61145f1SEric Anholt V3D_WRITE(V3D_L2CACTL, 453f61145f1SEric Anholt V3D_L2CACTL_L2CCLR); 454f61145f1SEric Anholt 455f61145f1SEric Anholt V3D_WRITE(V3D_SLCACTL, 456f61145f1SEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 457f61145f1SEric Anholt VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC)); 458f61145f1SEric Anholt } 459f61145f1SEric Anholt 460d5b1a78aSEric Anholt /* Sets the registers for the next job to be actually be executed in 461d5b1a78aSEric Anholt * the hardware. 462d5b1a78aSEric Anholt * 463d5b1a78aSEric Anholt * The job_lock should be held during this. 464d5b1a78aSEric Anholt */ 465d5b1a78aSEric Anholt void 466ca26d28bSVarad Gautam vc4_submit_next_bin_job(struct drm_device *dev) 467d5b1a78aSEric Anholt { 468d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 469ca26d28bSVarad Gautam struct vc4_exec_info *exec; 470d5b1a78aSEric Anholt 471ca26d28bSVarad Gautam again: 472ca26d28bSVarad Gautam exec = vc4_first_bin_job(vc4); 473d5b1a78aSEric Anholt if (!exec) 474d5b1a78aSEric Anholt return; 475d5b1a78aSEric Anholt 476d5b1a78aSEric Anholt vc4_flush_caches(dev); 477d5b1a78aSEric Anholt 47865101d8cSBoris Brezillon /* Only start the perfmon if it was not already started by a previous 47965101d8cSBoris Brezillon * job. 48065101d8cSBoris Brezillon */ 48165101d8cSBoris Brezillon if (exec->perfmon && vc4->active_perfmon != exec->perfmon) 48265101d8cSBoris Brezillon vc4_perfmon_start(vc4, exec->perfmon); 48365101d8cSBoris Brezillon 484ca26d28bSVarad Gautam /* Either put the job in the binner if it uses the binner, or 485ca26d28bSVarad Gautam * immediately move it to the to-be-rendered queue. 486ca26d28bSVarad Gautam */ 487ca26d28bSVarad Gautam if (exec->ct0ca != exec->ct0ea) { 488*044feb97SMelissa Wen trace_vc4_submit_cl(dev, false, exec->seqno, exec->ct0ca, 489*044feb97SMelissa Wen exec->ct0ea); 490d5b1a78aSEric Anholt submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 491ca26d28bSVarad Gautam } else { 49265101d8cSBoris Brezillon struct vc4_exec_info *next; 49365101d8cSBoris Brezillon 494ca26d28bSVarad Gautam vc4_move_job_to_render(dev, exec); 49565101d8cSBoris Brezillon next = vc4_first_bin_job(vc4); 49665101d8cSBoris Brezillon 49765101d8cSBoris Brezillon /* We can't start the next bin job if the previous job had a 49865101d8cSBoris Brezillon * different perfmon instance attached to it. The same goes 49965101d8cSBoris Brezillon * if one of them had a perfmon attached to it and the other 50065101d8cSBoris Brezillon * one doesn't. 50165101d8cSBoris Brezillon */ 50265101d8cSBoris Brezillon if (next && next->perfmon == exec->perfmon) 503ca26d28bSVarad Gautam goto again; 504ca26d28bSVarad Gautam } 505ca26d28bSVarad Gautam } 506ca26d28bSVarad Gautam 507ca26d28bSVarad Gautam void 508ca26d28bSVarad Gautam vc4_submit_next_render_job(struct drm_device *dev) 509ca26d28bSVarad Gautam { 510ca26d28bSVarad Gautam struct vc4_dev *vc4 = to_vc4_dev(dev); 511ca26d28bSVarad Gautam struct vc4_exec_info *exec = vc4_first_render_job(vc4); 512ca26d28bSVarad Gautam 513ca26d28bSVarad Gautam if (!exec) 514ca26d28bSVarad Gautam return; 515ca26d28bSVarad Gautam 516f61145f1SEric Anholt /* A previous RCL may have written to one of our textures, and 517f61145f1SEric Anholt * our full cache flush at bin time may have occurred before 518f61145f1SEric Anholt * that RCL completed. Flush the texture cache now, but not 519f61145f1SEric Anholt * the instructions or uniforms (since we don't write those 520f61145f1SEric Anholt * from an RCL). 521f61145f1SEric Anholt */ 522f61145f1SEric Anholt vc4_flush_texture_caches(dev); 523f61145f1SEric Anholt 524*044feb97SMelissa Wen trace_vc4_submit_cl(dev, true, exec->seqno, exec->ct1ca, exec->ct1ea); 525d5b1a78aSEric Anholt submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 526d5b1a78aSEric Anholt } 527d5b1a78aSEric Anholt 528ca26d28bSVarad Gautam void 529ca26d28bSVarad Gautam vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 530ca26d28bSVarad Gautam { 531ca26d28bSVarad Gautam struct vc4_dev *vc4 = to_vc4_dev(dev); 532ca26d28bSVarad Gautam bool was_empty = list_empty(&vc4->render_job_list); 533ca26d28bSVarad Gautam 534ca26d28bSVarad Gautam list_move_tail(&exec->head, &vc4->render_job_list); 535ca26d28bSVarad Gautam if (was_empty) 536ca26d28bSVarad Gautam vc4_submit_next_render_job(dev); 537ca26d28bSVarad Gautam } 538ca26d28bSVarad Gautam 539d5b1a78aSEric Anholt static void 540d5b1a78aSEric Anholt vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 541d5b1a78aSEric Anholt { 542d5b1a78aSEric Anholt struct vc4_bo *bo; 543d5b1a78aSEric Anholt unsigned i; 544d5b1a78aSEric Anholt 545d5b1a78aSEric Anholt for (i = 0; i < exec->bo_count; i++) { 546d5b1a78aSEric Anholt bo = to_vc4_bo(&exec->bo[i]->base); 547d5b1a78aSEric Anholt bo->seqno = seqno; 548cdec4d36SEric Anholt 54952791eeeSChristian König dma_resv_add_shared_fence(bo->base.base.resv, exec->fence); 550d5b1a78aSEric Anholt } 551d5b1a78aSEric Anholt 552d5b1a78aSEric Anholt list_for_each_entry(bo, &exec->unref_list, unref_head) { 553d5b1a78aSEric Anholt bo->seqno = seqno; 554d5b1a78aSEric Anholt } 5557edabee0SEric Anholt 5567edabee0SEric Anholt for (i = 0; i < exec->rcl_write_bo_count; i++) { 5577edabee0SEric Anholt bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 5587edabee0SEric Anholt bo->write_seqno = seqno; 559cdec4d36SEric Anholt 56052791eeeSChristian König dma_resv_add_excl_fence(bo->base.base.resv, exec->fence); 5617edabee0SEric Anholt } 562d5b1a78aSEric Anholt } 563d5b1a78aSEric Anholt 564cdec4d36SEric Anholt static void 565cdec4d36SEric Anholt vc4_unlock_bo_reservations(struct drm_device *dev, 566cdec4d36SEric Anholt struct vc4_exec_info *exec, 567cdec4d36SEric Anholt struct ww_acquire_ctx *acquire_ctx) 568cdec4d36SEric Anholt { 569cdec4d36SEric Anholt int i; 570cdec4d36SEric Anholt 571cdec4d36SEric Anholt for (i = 0; i < exec->bo_count; i++) { 572bd7de1e8SRob Herring struct drm_gem_object *bo = &exec->bo[i]->base; 573cdec4d36SEric Anholt 574616b549bSDaniel Vetter dma_resv_unlock(bo->resv); 575cdec4d36SEric Anholt } 576cdec4d36SEric Anholt 577cdec4d36SEric Anholt ww_acquire_fini(acquire_ctx); 578cdec4d36SEric Anholt } 579cdec4d36SEric Anholt 580cdec4d36SEric Anholt /* Takes the reservation lock on all the BOs being referenced, so that 581cdec4d36SEric Anholt * at queue submit time we can update the reservations. 582cdec4d36SEric Anholt * 583cdec4d36SEric Anholt * We don't lock the RCL the tile alloc/state BOs, or overflow memory 584cdec4d36SEric Anholt * (all of which are on exec->unref_list). They're entirely private 585cdec4d36SEric Anholt * to vc4, so we don't attach dma-buf fences to them. 586cdec4d36SEric Anholt */ 587cdec4d36SEric Anholt static int 588cdec4d36SEric Anholt vc4_lock_bo_reservations(struct drm_device *dev, 589cdec4d36SEric Anholt struct vc4_exec_info *exec, 590cdec4d36SEric Anholt struct ww_acquire_ctx *acquire_ctx) 591cdec4d36SEric Anholt { 592cdec4d36SEric Anholt int contended_lock = -1; 593cdec4d36SEric Anholt int i, ret; 594bd7de1e8SRob Herring struct drm_gem_object *bo; 595cdec4d36SEric Anholt 596cdec4d36SEric Anholt ww_acquire_init(acquire_ctx, &reservation_ww_class); 597cdec4d36SEric Anholt 598cdec4d36SEric Anholt retry: 599cdec4d36SEric Anholt if (contended_lock != -1) { 600bd7de1e8SRob Herring bo = &exec->bo[contended_lock]->base; 601616b549bSDaniel Vetter ret = dma_resv_lock_slow_interruptible(bo->resv, acquire_ctx); 602cdec4d36SEric Anholt if (ret) { 603cdec4d36SEric Anholt ww_acquire_done(acquire_ctx); 604cdec4d36SEric Anholt return ret; 605cdec4d36SEric Anholt } 606cdec4d36SEric Anholt } 607cdec4d36SEric Anholt 608cdec4d36SEric Anholt for (i = 0; i < exec->bo_count; i++) { 609cdec4d36SEric Anholt if (i == contended_lock) 610cdec4d36SEric Anholt continue; 611cdec4d36SEric Anholt 612bd7de1e8SRob Herring bo = &exec->bo[i]->base; 613cdec4d36SEric Anholt 614616b549bSDaniel Vetter ret = dma_resv_lock_interruptible(bo->resv, acquire_ctx); 615cdec4d36SEric Anholt if (ret) { 616cdec4d36SEric Anholt int j; 617cdec4d36SEric Anholt 618cdec4d36SEric Anholt for (j = 0; j < i; j++) { 619bd7de1e8SRob Herring bo = &exec->bo[j]->base; 620616b549bSDaniel Vetter dma_resv_unlock(bo->resv); 621cdec4d36SEric Anholt } 622cdec4d36SEric Anholt 623cdec4d36SEric Anholt if (contended_lock != -1 && contended_lock >= i) { 624bd7de1e8SRob Herring bo = &exec->bo[contended_lock]->base; 625cdec4d36SEric Anholt 626616b549bSDaniel Vetter dma_resv_unlock(bo->resv); 627cdec4d36SEric Anholt } 628cdec4d36SEric Anholt 629cdec4d36SEric Anholt if (ret == -EDEADLK) { 630cdec4d36SEric Anholt contended_lock = i; 631cdec4d36SEric Anholt goto retry; 632cdec4d36SEric Anholt } 633cdec4d36SEric Anholt 634cdec4d36SEric Anholt ww_acquire_done(acquire_ctx); 635cdec4d36SEric Anholt return ret; 636cdec4d36SEric Anholt } 637cdec4d36SEric Anholt } 638cdec4d36SEric Anholt 639cdec4d36SEric Anholt ww_acquire_done(acquire_ctx); 640cdec4d36SEric Anholt 641cdec4d36SEric Anholt /* Reserve space for our shared (read-only) fence references, 642cdec4d36SEric Anholt * before we commit the CL to the hardware. 643cdec4d36SEric Anholt */ 644cdec4d36SEric Anholt for (i = 0; i < exec->bo_count; i++) { 645bd7de1e8SRob Herring bo = &exec->bo[i]->base; 646cdec4d36SEric Anholt 64752791eeeSChristian König ret = dma_resv_reserve_shared(bo->resv, 1); 648cdec4d36SEric Anholt if (ret) { 649cdec4d36SEric Anholt vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 650cdec4d36SEric Anholt return ret; 651cdec4d36SEric Anholt } 652cdec4d36SEric Anholt } 653cdec4d36SEric Anholt 654cdec4d36SEric Anholt return 0; 655cdec4d36SEric Anholt } 656cdec4d36SEric Anholt 657d5b1a78aSEric Anholt /* Queues a struct vc4_exec_info for execution. If no job is 658d5b1a78aSEric Anholt * currently executing, then submits it. 659d5b1a78aSEric Anholt * 660d5b1a78aSEric Anholt * Unlike most GPUs, our hardware only handles one command list at a 661d5b1a78aSEric Anholt * time. To queue multiple jobs at once, we'd need to edit the 662d5b1a78aSEric Anholt * previous command list to have a jump to the new one at the end, and 663d5b1a78aSEric Anholt * then bump the end address. That's a change for a later date, 664d5b1a78aSEric Anholt * though. 665d5b1a78aSEric Anholt */ 666cdec4d36SEric Anholt static int 667cdec4d36SEric Anholt vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, 668e84fcb95SStefan Schake struct ww_acquire_ctx *acquire_ctx, 669e84fcb95SStefan Schake struct drm_syncobj *out_sync) 670d5b1a78aSEric Anholt { 671d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 67265101d8cSBoris Brezillon struct vc4_exec_info *renderjob; 673d5b1a78aSEric Anholt uint64_t seqno; 674d5b1a78aSEric Anholt unsigned long irqflags; 675cdec4d36SEric Anholt struct vc4_fence *fence; 676cdec4d36SEric Anholt 677cdec4d36SEric Anholt fence = kzalloc(sizeof(*fence), GFP_KERNEL); 678cdec4d36SEric Anholt if (!fence) 679cdec4d36SEric Anholt return -ENOMEM; 680cdec4d36SEric Anholt fence->dev = dev; 681d5b1a78aSEric Anholt 682d5b1a78aSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 683d5b1a78aSEric Anholt 684d5b1a78aSEric Anholt seqno = ++vc4->emit_seqno; 685d5b1a78aSEric Anholt exec->seqno = seqno; 686cdec4d36SEric Anholt 687cdec4d36SEric Anholt dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, 688cdec4d36SEric Anholt vc4->dma_fence_context, exec->seqno); 689cdec4d36SEric Anholt fence->seqno = exec->seqno; 690cdec4d36SEric Anholt exec->fence = &fence->base; 691cdec4d36SEric Anholt 692e84fcb95SStefan Schake if (out_sync) 6930b258ed1SChristian König drm_syncobj_replace_fence(out_sync, exec->fence); 694e84fcb95SStefan Schake 695d5b1a78aSEric Anholt vc4_update_bo_seqnos(exec, seqno); 696d5b1a78aSEric Anholt 697cdec4d36SEric Anholt vc4_unlock_bo_reservations(dev, exec, acquire_ctx); 698cdec4d36SEric Anholt 699ca26d28bSVarad Gautam list_add_tail(&exec->head, &vc4->bin_job_list); 700d5b1a78aSEric Anholt 70165101d8cSBoris Brezillon /* If no bin job was executing and if the render job (if any) has the 70265101d8cSBoris Brezillon * same perfmon as our job attached to it (or if both jobs don't have 70365101d8cSBoris Brezillon * perfmon activated), then kick ours off. Otherwise, it'll get 70465101d8cSBoris Brezillon * started when the previous job's flush/render done interrupt occurs. 705d5b1a78aSEric Anholt */ 70665101d8cSBoris Brezillon renderjob = vc4_first_render_job(vc4); 70765101d8cSBoris Brezillon if (vc4_first_bin_job(vc4) == exec && 70865101d8cSBoris Brezillon (!renderjob || renderjob->perfmon == exec->perfmon)) { 709ca26d28bSVarad Gautam vc4_submit_next_bin_job(dev); 710d5b1a78aSEric Anholt vc4_queue_hangcheck(dev); 711d5b1a78aSEric Anholt } 712d5b1a78aSEric Anholt 713d5b1a78aSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 714cdec4d36SEric Anholt 715cdec4d36SEric Anholt return 0; 716d5b1a78aSEric Anholt } 717d5b1a78aSEric Anholt 718d5b1a78aSEric Anholt /** 71972f793f1SEric Anholt * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects 72072f793f1SEric Anholt * referenced by the job. 72172f793f1SEric Anholt * @dev: DRM device 72272f793f1SEric Anholt * @file_priv: DRM file for this fd 72372f793f1SEric Anholt * @exec: V3D job being set up 72472f793f1SEric Anholt * 72572f793f1SEric Anholt * The command validator needs to reference BOs by their index within 72672f793f1SEric Anholt * the submitted job's BO list. This does the validation of the job's 72772f793f1SEric Anholt * BO list and reference counting for the lifetime of the job. 728d5b1a78aSEric Anholt */ 729d5b1a78aSEric Anholt static int 730d5b1a78aSEric Anholt vc4_cl_lookup_bos(struct drm_device *dev, 731d5b1a78aSEric Anholt struct drm_file *file_priv, 732d5b1a78aSEric Anholt struct vc4_exec_info *exec) 733d5b1a78aSEric Anholt { 734d5b1a78aSEric Anholt struct drm_vc4_submit_cl *args = exec->args; 735d5b1a78aSEric Anholt uint32_t *handles; 736d5b1a78aSEric Anholt int ret = 0; 737d5b1a78aSEric Anholt int i; 738d5b1a78aSEric Anholt 739d5b1a78aSEric Anholt exec->bo_count = args->bo_handle_count; 740d5b1a78aSEric Anholt 741d5b1a78aSEric Anholt if (!exec->bo_count) { 742d5b1a78aSEric Anholt /* See comment on bo_index for why we have to check 743d5b1a78aSEric Anholt * this. 744d5b1a78aSEric Anholt */ 745fb95992aSEric Anholt DRM_DEBUG("Rendering requires BOs to validate\n"); 746d5b1a78aSEric Anholt return -EINVAL; 747d5b1a78aSEric Anholt } 748d5b1a78aSEric Anholt 7492098105eSMichal Hocko exec->bo = kvmalloc_array(exec->bo_count, 7502098105eSMichal Hocko sizeof(struct drm_gem_cma_object *), 7512098105eSMichal Hocko GFP_KERNEL | __GFP_ZERO); 752d5b1a78aSEric Anholt if (!exec->bo) { 753d5b1a78aSEric Anholt DRM_ERROR("Failed to allocate validated BO pointers\n"); 754d5b1a78aSEric Anholt return -ENOMEM; 755d5b1a78aSEric Anholt } 756d5b1a78aSEric Anholt 7572098105eSMichal Hocko handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); 758d5b1a78aSEric Anholt if (!handles) { 759b2cdeb19SDan Carpenter ret = -ENOMEM; 760d5b1a78aSEric Anholt DRM_ERROR("Failed to allocate incoming GEM handles\n"); 761d5b1a78aSEric Anholt goto fail; 762d5b1a78aSEric Anholt } 763d5b1a78aSEric Anholt 76495d7cbcbSEric Anholt if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles), 765b2cdeb19SDan Carpenter exec->bo_count * sizeof(uint32_t))) { 766b2cdeb19SDan Carpenter ret = -EFAULT; 767d5b1a78aSEric Anholt DRM_ERROR("Failed to copy in GEM handles\n"); 768d5b1a78aSEric Anholt goto fail; 769d5b1a78aSEric Anholt } 770d5b1a78aSEric Anholt 771d5b1a78aSEric Anholt spin_lock(&file_priv->table_lock); 772d5b1a78aSEric Anholt for (i = 0; i < exec->bo_count; i++) { 773d5b1a78aSEric Anholt struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 774d5b1a78aSEric Anholt handles[i]); 775d5b1a78aSEric Anholt if (!bo) { 776fb95992aSEric Anholt DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 777d5b1a78aSEric Anholt i, handles[i]); 778d5b1a78aSEric Anholt ret = -EINVAL; 779b9f19259SBoris Brezillon break; 780d5b1a78aSEric Anholt } 781b9f19259SBoris Brezillon 7821d5494e9SCihangir Akturk drm_gem_object_get(bo); 783d5b1a78aSEric Anholt exec->bo[i] = (struct drm_gem_cma_object *)bo; 784d5b1a78aSEric Anholt } 785d5b1a78aSEric Anholt spin_unlock(&file_priv->table_lock); 786d5b1a78aSEric Anholt 787b9f19259SBoris Brezillon if (ret) 788b9f19259SBoris Brezillon goto fail_put_bo; 789b9f19259SBoris Brezillon 790b9f19259SBoris Brezillon for (i = 0; i < exec->bo_count; i++) { 791b9f19259SBoris Brezillon ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base)); 792b9f19259SBoris Brezillon if (ret) 793b9f19259SBoris Brezillon goto fail_dec_usecnt; 794b9f19259SBoris Brezillon } 795b9f19259SBoris Brezillon 796b9f19259SBoris Brezillon kvfree(handles); 797b9f19259SBoris Brezillon return 0; 798b9f19259SBoris Brezillon 799b9f19259SBoris Brezillon fail_dec_usecnt: 800b9f19259SBoris Brezillon /* Decrease usecnt on acquired objects. 801b9f19259SBoris Brezillon * We cannot rely on vc4_complete_exec() to release resources here, 802b9f19259SBoris Brezillon * because vc4_complete_exec() has no information about which BO has 803b9f19259SBoris Brezillon * had its ->usecnt incremented. 804b9f19259SBoris Brezillon * To make things easier we just free everything explicitly and set 805b9f19259SBoris Brezillon * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release' 806b9f19259SBoris Brezillon * step. 807b9f19259SBoris Brezillon */ 808b9f19259SBoris Brezillon for (i-- ; i >= 0; i--) 809b9f19259SBoris Brezillon vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base)); 810b9f19259SBoris Brezillon 811b9f19259SBoris Brezillon fail_put_bo: 812b9f19259SBoris Brezillon /* Release any reference to acquired objects. */ 813b9f19259SBoris Brezillon for (i = 0; i < exec->bo_count && exec->bo[i]; i++) 814f7a8cd30SEmil Velikov drm_gem_object_put(&exec->bo[i]->base); 815b9f19259SBoris Brezillon 816d5b1a78aSEric Anholt fail: 8172098105eSMichal Hocko kvfree(handles); 818b9f19259SBoris Brezillon kvfree(exec->bo); 819b9f19259SBoris Brezillon exec->bo = NULL; 820552416c1SEric Anholt return ret; 821d5b1a78aSEric Anholt } 822d5b1a78aSEric Anholt 823d5b1a78aSEric Anholt static int 824d5b1a78aSEric Anholt vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 825d5b1a78aSEric Anholt { 826d5b1a78aSEric Anholt struct drm_vc4_submit_cl *args = exec->args; 82735c8b4b2SPaul Kocialkowski struct vc4_dev *vc4 = to_vc4_dev(dev); 828d5b1a78aSEric Anholt void *temp = NULL; 829d5b1a78aSEric Anholt void *bin; 830d5b1a78aSEric Anholt int ret = 0; 831d5b1a78aSEric Anholt uint32_t bin_offset = 0; 832d5b1a78aSEric Anholt uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 833d5b1a78aSEric Anholt 16); 834d5b1a78aSEric Anholt uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 835d5b1a78aSEric Anholt uint32_t exec_size = uniforms_offset + args->uniforms_size; 836d5b1a78aSEric Anholt uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 837d5b1a78aSEric Anholt args->shader_rec_count); 838d5b1a78aSEric Anholt struct vc4_bo *bo; 839d5b1a78aSEric Anholt 8400f2ff82eSEric Anholt if (shader_rec_offset < args->bin_cl_size || 8410f2ff82eSEric Anholt uniforms_offset < shader_rec_offset || 842d5b1a78aSEric Anholt exec_size < uniforms_offset || 843d5b1a78aSEric Anholt args->shader_rec_count >= (UINT_MAX / 844d5b1a78aSEric Anholt sizeof(struct vc4_shader_state)) || 845d5b1a78aSEric Anholt temp_size < exec_size) { 846fb95992aSEric Anholt DRM_DEBUG("overflow in exec arguments\n"); 8476b8ac638SEric Anholt ret = -EINVAL; 848d5b1a78aSEric Anholt goto fail; 849d5b1a78aSEric Anholt } 850d5b1a78aSEric Anholt 851d5b1a78aSEric Anholt /* Allocate space where we'll store the copied in user command lists 852d5b1a78aSEric Anholt * and shader records. 853d5b1a78aSEric Anholt * 854d5b1a78aSEric Anholt * We don't just copy directly into the BOs because we need to 855d5b1a78aSEric Anholt * read the contents back for validation, and I think the 856d5b1a78aSEric Anholt * bo->vaddr is uncached access. 857d5b1a78aSEric Anholt */ 8582098105eSMichal Hocko temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); 859d5b1a78aSEric Anholt if (!temp) { 860d5b1a78aSEric Anholt DRM_ERROR("Failed to allocate storage for copying " 861d5b1a78aSEric Anholt "in bin/render CLs.\n"); 862d5b1a78aSEric Anholt ret = -ENOMEM; 863d5b1a78aSEric Anholt goto fail; 864d5b1a78aSEric Anholt } 865d5b1a78aSEric Anholt bin = temp + bin_offset; 866d5b1a78aSEric Anholt exec->shader_rec_u = temp + shader_rec_offset; 867d5b1a78aSEric Anholt exec->uniforms_u = temp + uniforms_offset; 868d5b1a78aSEric Anholt exec->shader_state = temp + exec_size; 869d5b1a78aSEric Anholt exec->shader_state_size = args->shader_rec_count; 870d5b1a78aSEric Anholt 87165c4777dSDan Carpenter if (copy_from_user(bin, 87295d7cbcbSEric Anholt u64_to_user_ptr(args->bin_cl), 87365c4777dSDan Carpenter args->bin_cl_size)) { 87465c4777dSDan Carpenter ret = -EFAULT; 875d5b1a78aSEric Anholt goto fail; 876d5b1a78aSEric Anholt } 877d5b1a78aSEric Anholt 87865c4777dSDan Carpenter if (copy_from_user(exec->shader_rec_u, 87995d7cbcbSEric Anholt u64_to_user_ptr(args->shader_rec), 88065c4777dSDan Carpenter args->shader_rec_size)) { 88165c4777dSDan Carpenter ret = -EFAULT; 882d5b1a78aSEric Anholt goto fail; 883d5b1a78aSEric Anholt } 884d5b1a78aSEric Anholt 88565c4777dSDan Carpenter if (copy_from_user(exec->uniforms_u, 88695d7cbcbSEric Anholt u64_to_user_ptr(args->uniforms), 88765c4777dSDan Carpenter args->uniforms_size)) { 88865c4777dSDan Carpenter ret = -EFAULT; 889d5b1a78aSEric Anholt goto fail; 890d5b1a78aSEric Anholt } 891d5b1a78aSEric Anholt 892f3099462SEric Anholt bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL); 8932c68f1fcSEric Anholt if (IS_ERR(bo)) { 894d5b1a78aSEric Anholt DRM_ERROR("Couldn't allocate BO for binning\n"); 8952c68f1fcSEric Anholt ret = PTR_ERR(bo); 896d5b1a78aSEric Anholt goto fail; 897d5b1a78aSEric Anholt } 898d5b1a78aSEric Anholt exec->exec_bo = &bo->base; 899d5b1a78aSEric Anholt 900d5b1a78aSEric Anholt list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 901d5b1a78aSEric Anholt &exec->unref_list); 902d5b1a78aSEric Anholt 903d5b1a78aSEric Anholt exec->ct0ca = exec->exec_bo->paddr + bin_offset; 904d5b1a78aSEric Anholt 905d5b1a78aSEric Anholt exec->bin_u = bin; 906d5b1a78aSEric Anholt 907d5b1a78aSEric Anholt exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 908d5b1a78aSEric Anholt exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 909d5b1a78aSEric Anholt exec->shader_rec_size = args->shader_rec_size; 910d5b1a78aSEric Anholt 911d5b1a78aSEric Anholt exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 912d5b1a78aSEric Anholt exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 913d5b1a78aSEric Anholt exec->uniforms_size = args->uniforms_size; 914d5b1a78aSEric Anholt 915d5b1a78aSEric Anholt ret = vc4_validate_bin_cl(dev, 916d5b1a78aSEric Anholt exec->exec_bo->vaddr + bin_offset, 917d5b1a78aSEric Anholt bin, 918d5b1a78aSEric Anholt exec); 919d5b1a78aSEric Anholt if (ret) 920d5b1a78aSEric Anholt goto fail; 921d5b1a78aSEric Anholt 922d5b1a78aSEric Anholt ret = vc4_validate_shader_recs(dev, exec); 9237edabee0SEric Anholt if (ret) 9247edabee0SEric Anholt goto fail; 9257edabee0SEric Anholt 92635c8b4b2SPaul Kocialkowski if (exec->found_tile_binning_mode_config_packet) { 92735c8b4b2SPaul Kocialkowski ret = vc4_v3d_bin_bo_get(vc4, &exec->bin_bo_used); 92835c8b4b2SPaul Kocialkowski if (ret) 92935c8b4b2SPaul Kocialkowski goto fail; 93035c8b4b2SPaul Kocialkowski } 93135c8b4b2SPaul Kocialkowski 9327edabee0SEric Anholt /* Block waiting on any previous rendering into the CS's VBO, 9337edabee0SEric Anholt * IB, or textures, so that pixels are actually written by the 9347edabee0SEric Anholt * time we try to read them. 9357edabee0SEric Anholt */ 9367edabee0SEric Anholt ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); 937d5b1a78aSEric Anholt 938d5b1a78aSEric Anholt fail: 9392098105eSMichal Hocko kvfree(temp); 940d5b1a78aSEric Anholt return ret; 941d5b1a78aSEric Anholt } 942d5b1a78aSEric Anholt 943d5b1a78aSEric Anholt static void 944d5b1a78aSEric Anholt vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 945d5b1a78aSEric Anholt { 946001bdb55SEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 947553c942fSEric Anholt unsigned long irqflags; 948d5b1a78aSEric Anholt unsigned i; 949d5b1a78aSEric Anholt 950cdec4d36SEric Anholt /* If we got force-completed because of GPU reset rather than 951cdec4d36SEric Anholt * through our IRQ handler, signal the fence now. 952cdec4d36SEric Anholt */ 953babc8110SStefan Schake if (exec->fence) { 954cdec4d36SEric Anholt dma_fence_signal(exec->fence); 955babc8110SStefan Schake dma_fence_put(exec->fence); 956babc8110SStefan Schake } 957cdec4d36SEric Anholt 958d5b1a78aSEric Anholt if (exec->bo) { 959b9f19259SBoris Brezillon for (i = 0; i < exec->bo_count; i++) { 960b9f19259SBoris Brezillon struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); 961b9f19259SBoris Brezillon 962b9f19259SBoris Brezillon vc4_bo_dec_usecnt(bo); 963f7a8cd30SEmil Velikov drm_gem_object_put(&exec->bo[i]->base); 964b9f19259SBoris Brezillon } 9652098105eSMichal Hocko kvfree(exec->bo); 966d5b1a78aSEric Anholt } 967d5b1a78aSEric Anholt 968d5b1a78aSEric Anholt while (!list_empty(&exec->unref_list)) { 969d5b1a78aSEric Anholt struct vc4_bo *bo = list_first_entry(&exec->unref_list, 970d5b1a78aSEric Anholt struct vc4_bo, unref_head); 971d5b1a78aSEric Anholt list_del(&bo->unref_head); 972f7a8cd30SEmil Velikov drm_gem_object_put(&bo->base.base); 973d5b1a78aSEric Anholt } 974d5b1a78aSEric Anholt 975553c942fSEric Anholt /* Free up the allocation of any bin slots we used. */ 976553c942fSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 977553c942fSEric Anholt vc4->bin_alloc_used &= ~exec->bin_slots; 978553c942fSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 979553c942fSEric Anholt 98035c8b4b2SPaul Kocialkowski /* Release the reference on the binner BO if needed. */ 98135c8b4b2SPaul Kocialkowski if (exec->bin_bo_used) 98235c8b4b2SPaul Kocialkowski vc4_v3d_bin_bo_put(vc4); 98335c8b4b2SPaul Kocialkowski 98465101d8cSBoris Brezillon /* Release the reference we had on the perf monitor. */ 98565101d8cSBoris Brezillon vc4_perfmon_put(exec->perfmon); 98665101d8cSBoris Brezillon 987cb74f6eeSEric Anholt vc4_v3d_pm_put(vc4); 988001bdb55SEric Anholt 989d5b1a78aSEric Anholt kfree(exec); 990d5b1a78aSEric Anholt } 991d5b1a78aSEric Anholt 992d5b1a78aSEric Anholt void 993d5b1a78aSEric Anholt vc4_job_handle_completed(struct vc4_dev *vc4) 994d5b1a78aSEric Anholt { 995d5b1a78aSEric Anholt unsigned long irqflags; 996b501baccSEric Anholt struct vc4_seqno_cb *cb, *cb_temp; 997d5b1a78aSEric Anholt 998d5b1a78aSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 999d5b1a78aSEric Anholt while (!list_empty(&vc4->job_done_list)) { 1000d5b1a78aSEric Anholt struct vc4_exec_info *exec = 1001d5b1a78aSEric Anholt list_first_entry(&vc4->job_done_list, 1002d5b1a78aSEric Anholt struct vc4_exec_info, head); 1003d5b1a78aSEric Anholt list_del(&exec->head); 1004d5b1a78aSEric Anholt 1005d5b1a78aSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 100684d7d472SMaxime Ripard vc4_complete_exec(&vc4->base, exec); 1007d5b1a78aSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 1008d5b1a78aSEric Anholt } 1009b501baccSEric Anholt 1010b501baccSEric Anholt list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 1011b501baccSEric Anholt if (cb->seqno <= vc4->finished_seqno) { 1012b501baccSEric Anholt list_del_init(&cb->work.entry); 1013b501baccSEric Anholt schedule_work(&cb->work); 1014b501baccSEric Anholt } 1015b501baccSEric Anholt } 1016b501baccSEric Anholt 1017d5b1a78aSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 1018d5b1a78aSEric Anholt } 1019d5b1a78aSEric Anholt 1020b501baccSEric Anholt static void vc4_seqno_cb_work(struct work_struct *work) 1021b501baccSEric Anholt { 1022b501baccSEric Anholt struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 1023b501baccSEric Anholt 1024b501baccSEric Anholt cb->func(cb); 1025b501baccSEric Anholt } 1026b501baccSEric Anholt 1027b501baccSEric Anholt int vc4_queue_seqno_cb(struct drm_device *dev, 1028b501baccSEric Anholt struct vc4_seqno_cb *cb, uint64_t seqno, 1029b501baccSEric Anholt void (*func)(struct vc4_seqno_cb *cb)) 1030b501baccSEric Anholt { 1031b501baccSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 1032b501baccSEric Anholt unsigned long irqflags; 1033b501baccSEric Anholt 1034b501baccSEric Anholt cb->func = func; 1035b501baccSEric Anholt INIT_WORK(&cb->work, vc4_seqno_cb_work); 1036b501baccSEric Anholt 1037b501baccSEric Anholt spin_lock_irqsave(&vc4->job_lock, irqflags); 1038b501baccSEric Anholt if (seqno > vc4->finished_seqno) { 1039b501baccSEric Anholt cb->seqno = seqno; 1040b501baccSEric Anholt list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 1041b501baccSEric Anholt } else { 1042b501baccSEric Anholt schedule_work(&cb->work); 1043b501baccSEric Anholt } 1044b501baccSEric Anholt spin_unlock_irqrestore(&vc4->job_lock, irqflags); 1045b501baccSEric Anholt 1046439dde0aSBernard Zhao return 0; 1047b501baccSEric Anholt } 1048b501baccSEric Anholt 1049d5b1a78aSEric Anholt /* Scheduled when any job has been completed, this walks the list of 1050d5b1a78aSEric Anholt * jobs that had completed and unrefs their BOs and frees their exec 1051d5b1a78aSEric Anholt * structs. 1052d5b1a78aSEric Anholt */ 1053d5b1a78aSEric Anholt static void 1054d5b1a78aSEric Anholt vc4_job_done_work(struct work_struct *work) 1055d5b1a78aSEric Anholt { 1056d5b1a78aSEric Anholt struct vc4_dev *vc4 = 1057d5b1a78aSEric Anholt container_of(work, struct vc4_dev, job_done_work); 1058d5b1a78aSEric Anholt 1059d5b1a78aSEric Anholt vc4_job_handle_completed(vc4); 1060d5b1a78aSEric Anholt } 1061d5b1a78aSEric Anholt 1062d5b1a78aSEric Anholt static int 1063d5b1a78aSEric Anholt vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 1064d5b1a78aSEric Anholt uint64_t seqno, 1065d5b1a78aSEric Anholt uint64_t *timeout_ns) 1066d5b1a78aSEric Anholt { 1067d5b1a78aSEric Anholt unsigned long start = jiffies; 1068d5b1a78aSEric Anholt int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 1069d5b1a78aSEric Anholt 1070d5b1a78aSEric Anholt if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 1071d5b1a78aSEric Anholt uint64_t delta = jiffies_to_nsecs(jiffies - start); 1072d5b1a78aSEric Anholt 1073d5b1a78aSEric Anholt if (*timeout_ns >= delta) 1074d5b1a78aSEric Anholt *timeout_ns -= delta; 1075d5b1a78aSEric Anholt } 1076d5b1a78aSEric Anholt 1077d5b1a78aSEric Anholt return ret; 1078d5b1a78aSEric Anholt } 1079d5b1a78aSEric Anholt 1080d5b1a78aSEric Anholt int 1081d5b1a78aSEric Anholt vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 1082d5b1a78aSEric Anholt struct drm_file *file_priv) 1083d5b1a78aSEric Anholt { 1084d5b1a78aSEric Anholt struct drm_vc4_wait_seqno *args = data; 1085d5b1a78aSEric Anholt 1086d5b1a78aSEric Anholt return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 1087d5b1a78aSEric Anholt &args->timeout_ns); 1088d5b1a78aSEric Anholt } 1089d5b1a78aSEric Anholt 1090d5b1a78aSEric Anholt int 1091d5b1a78aSEric Anholt vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 1092d5b1a78aSEric Anholt struct drm_file *file_priv) 1093d5b1a78aSEric Anholt { 1094d5b1a78aSEric Anholt int ret; 1095d5b1a78aSEric Anholt struct drm_vc4_wait_bo *args = data; 1096d5b1a78aSEric Anholt struct drm_gem_object *gem_obj; 1097d5b1a78aSEric Anholt struct vc4_bo *bo; 1098d5b1a78aSEric Anholt 1099e0015236SEric Anholt if (args->pad != 0) 1100e0015236SEric Anholt return -EINVAL; 1101e0015236SEric Anholt 1102a8ad0bd8SChris Wilson gem_obj = drm_gem_object_lookup(file_priv, args->handle); 1103d5b1a78aSEric Anholt if (!gem_obj) { 1104fb95992aSEric Anholt DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); 1105d5b1a78aSEric Anholt return -EINVAL; 1106d5b1a78aSEric Anholt } 1107d5b1a78aSEric Anholt bo = to_vc4_bo(gem_obj); 1108d5b1a78aSEric Anholt 1109d5b1a78aSEric Anholt ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 1110d5b1a78aSEric Anholt &args->timeout_ns); 1111d5b1a78aSEric Anholt 1112f7a8cd30SEmil Velikov drm_gem_object_put(gem_obj); 1113d5b1a78aSEric Anholt return ret; 1114d5b1a78aSEric Anholt } 1115d5b1a78aSEric Anholt 1116d5b1a78aSEric Anholt /** 111772f793f1SEric Anholt * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4. 111872f793f1SEric Anholt * @dev: DRM device 111972f793f1SEric Anholt * @data: ioctl argument 112072f793f1SEric Anholt * @file_priv: DRM file for this fd 1121d5b1a78aSEric Anholt * 112272f793f1SEric Anholt * This is the main entrypoint for userspace to submit a 3D frame to 112372f793f1SEric Anholt * the GPU. Userspace provides the binner command list (if 112472f793f1SEric Anholt * applicable), and the kernel sets up the render command list to draw 112572f793f1SEric Anholt * to the framebuffer described in the ioctl, using the command lists 112672f793f1SEric Anholt * that the 3D engine's binner will produce. 1127d5b1a78aSEric Anholt */ 1128d5b1a78aSEric Anholt int 1129d5b1a78aSEric Anholt vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 1130d5b1a78aSEric Anholt struct drm_file *file_priv) 1131d5b1a78aSEric Anholt { 1132d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 113365101d8cSBoris Brezillon struct vc4_file *vc4file = file_priv->driver_priv; 1134d5b1a78aSEric Anholt struct drm_vc4_submit_cl *args = data; 1135e84fcb95SStefan Schake struct drm_syncobj *out_sync = NULL; 1136d5b1a78aSEric Anholt struct vc4_exec_info *exec; 1137cdec4d36SEric Anholt struct ww_acquire_ctx acquire_ctx; 1138818f5c8fSStefan Schake struct dma_fence *in_fence; 113936cb6253SEric Anholt int ret = 0; 1140d5b1a78aSEric Anholt 1141*044feb97SMelissa Wen trace_vc4_submit_cl_ioctl(dev, args->bin_cl_size, 1142*044feb97SMelissa Wen args->shader_rec_size, 1143*044feb97SMelissa Wen args->bo_handle_count); 1144*044feb97SMelissa Wen 1145ffc26740SEric Anholt if (!vc4->v3d) { 1146ffc26740SEric Anholt DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n"); 1147ffc26740SEric Anholt return -ENODEV; 1148ffc26740SEric Anholt } 1149ffc26740SEric Anholt 11503be8edddSEric Anholt if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | 11513be8edddSEric Anholt VC4_SUBMIT_CL_FIXED_RCL_ORDER | 11523be8edddSEric Anholt VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | 11533be8edddSEric Anholt VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { 1154fb95992aSEric Anholt DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); 1155d5b1a78aSEric Anholt return -EINVAL; 1156d5b1a78aSEric Anholt } 1157d5b1a78aSEric Anholt 11584c70ac76SEric Anholt if (args->pad2 != 0) { 11594c70ac76SEric Anholt DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); 11604c70ac76SEric Anholt return -EINVAL; 11614c70ac76SEric Anholt } 11624c70ac76SEric Anholt 1163d5b1a78aSEric Anholt exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 1164d5b1a78aSEric Anholt if (!exec) { 1165d5b1a78aSEric Anholt DRM_ERROR("malloc failure on exec struct\n"); 1166d5b1a78aSEric Anholt return -ENOMEM; 1167d5b1a78aSEric Anholt } 1168d5b1a78aSEric Anholt 1169cb74f6eeSEric Anholt ret = vc4_v3d_pm_get(vc4); 1170cb74f6eeSEric Anholt if (ret) { 1171001bdb55SEric Anholt kfree(exec); 1172001bdb55SEric Anholt return ret; 1173001bdb55SEric Anholt } 1174001bdb55SEric Anholt 1175d5b1a78aSEric Anholt exec->args = args; 1176d5b1a78aSEric Anholt INIT_LIST_HEAD(&exec->unref_list); 1177d5b1a78aSEric Anholt 1178d5b1a78aSEric Anholt ret = vc4_cl_lookup_bos(dev, file_priv, exec); 1179d5b1a78aSEric Anholt if (ret) 1180d5b1a78aSEric Anholt goto fail; 1181d5b1a78aSEric Anholt 118265101d8cSBoris Brezillon if (args->perfmonid) { 118365101d8cSBoris Brezillon exec->perfmon = vc4_perfmon_find(vc4file, 118465101d8cSBoris Brezillon args->perfmonid); 118565101d8cSBoris Brezillon if (!exec->perfmon) { 118665101d8cSBoris Brezillon ret = -ENOENT; 118765101d8cSBoris Brezillon goto fail; 118865101d8cSBoris Brezillon } 118965101d8cSBoris Brezillon } 119065101d8cSBoris Brezillon 1191818f5c8fSStefan Schake if (args->in_sync) { 1192818f5c8fSStefan Schake ret = drm_syncobj_find_fence(file_priv, args->in_sync, 1193649fdce2SChunming Zhou 0, 0, &in_fence); 1194818f5c8fSStefan Schake if (ret) 1195818f5c8fSStefan Schake goto fail; 1196818f5c8fSStefan Schake 1197818f5c8fSStefan Schake /* When the fence (or fence array) is exclusively from our 1198818f5c8fSStefan Schake * context we can skip the wait since jobs are executed in 1199818f5c8fSStefan Schake * order of their submission through this ioctl and this can 1200818f5c8fSStefan Schake * only have fences from a prior job. 1201818f5c8fSStefan Schake */ 1202818f5c8fSStefan Schake if (!dma_fence_match_context(in_fence, 1203818f5c8fSStefan Schake vc4->dma_fence_context)) { 1204818f5c8fSStefan Schake ret = dma_fence_wait(in_fence, true); 1205818f5c8fSStefan Schake if (ret) { 1206818f5c8fSStefan Schake dma_fence_put(in_fence); 1207818f5c8fSStefan Schake goto fail; 1208818f5c8fSStefan Schake } 1209818f5c8fSStefan Schake } 1210818f5c8fSStefan Schake 1211818f5c8fSStefan Schake dma_fence_put(in_fence); 1212818f5c8fSStefan Schake } 1213818f5c8fSStefan Schake 1214d5b1a78aSEric Anholt if (exec->args->bin_cl_size != 0) { 1215d5b1a78aSEric Anholt ret = vc4_get_bcl(dev, exec); 1216d5b1a78aSEric Anholt if (ret) 1217d5b1a78aSEric Anholt goto fail; 1218d5b1a78aSEric Anholt } else { 1219d5b1a78aSEric Anholt exec->ct0ca = 0; 1220d5b1a78aSEric Anholt exec->ct0ea = 0; 1221d5b1a78aSEric Anholt } 1222d5b1a78aSEric Anholt 1223d5b1a78aSEric Anholt ret = vc4_get_rcl(dev, exec); 1224d5b1a78aSEric Anholt if (ret) 1225d5b1a78aSEric Anholt goto fail; 1226d5b1a78aSEric Anholt 1227cdec4d36SEric Anholt ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); 1228cdec4d36SEric Anholt if (ret) 1229cdec4d36SEric Anholt goto fail; 1230cdec4d36SEric Anholt 1231e84fcb95SStefan Schake if (args->out_sync) { 1232e84fcb95SStefan Schake out_sync = drm_syncobj_find(file_priv, args->out_sync); 1233e84fcb95SStefan Schake if (!out_sync) { 1234e84fcb95SStefan Schake ret = -EINVAL; 1235e84fcb95SStefan Schake goto fail; 1236e84fcb95SStefan Schake } 1237e84fcb95SStefan Schake 1238e84fcb95SStefan Schake /* We replace the fence in out_sync in vc4_queue_submit since 1239e84fcb95SStefan Schake * the render job could execute immediately after that call. 1240e84fcb95SStefan Schake * If it finishes before our ioctl processing resumes the 1241e84fcb95SStefan Schake * render job fence could already have been freed. 1242e84fcb95SStefan Schake */ 1243e84fcb95SStefan Schake } 1244e84fcb95SStefan Schake 1245d5b1a78aSEric Anholt /* Clear this out of the struct we'll be putting in the queue, 1246d5b1a78aSEric Anholt * since it's part of our stack. 1247d5b1a78aSEric Anholt */ 1248d5b1a78aSEric Anholt exec->args = NULL; 1249d5b1a78aSEric Anholt 1250e84fcb95SStefan Schake ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); 1251e84fcb95SStefan Schake 1252e84fcb95SStefan Schake /* The syncobj isn't part of the exec data and we need to free our 1253e84fcb95SStefan Schake * reference even if job submission failed. 1254e84fcb95SStefan Schake */ 1255e84fcb95SStefan Schake if (out_sync) 1256e84fcb95SStefan Schake drm_syncobj_put(out_sync); 1257e84fcb95SStefan Schake 1258cdec4d36SEric Anholt if (ret) 1259cdec4d36SEric Anholt goto fail; 1260d5b1a78aSEric Anholt 1261d5b1a78aSEric Anholt /* Return the seqno for our job. */ 1262d5b1a78aSEric Anholt args->seqno = vc4->emit_seqno; 1263d5b1a78aSEric Anholt 1264d5b1a78aSEric Anholt return 0; 1265d5b1a78aSEric Anholt 1266d5b1a78aSEric Anholt fail: 126784d7d472SMaxime Ripard vc4_complete_exec(&vc4->base, exec); 1268d5b1a78aSEric Anholt 1269d5b1a78aSEric Anholt return ret; 1270d5b1a78aSEric Anholt } 1271d5b1a78aSEric Anholt 1272171a072bSMaxime Ripard static void vc4_gem_destroy(struct drm_device *dev, void *unused); 1273171a072bSMaxime Ripard int vc4_gem_init(struct drm_device *dev) 1274d5b1a78aSEric Anholt { 1275d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 1276d5b1a78aSEric Anholt 1277cdec4d36SEric Anholt vc4->dma_fence_context = dma_fence_context_alloc(1); 1278cdec4d36SEric Anholt 1279ca26d28bSVarad Gautam INIT_LIST_HEAD(&vc4->bin_job_list); 1280ca26d28bSVarad Gautam INIT_LIST_HEAD(&vc4->render_job_list); 1281d5b1a78aSEric Anholt INIT_LIST_HEAD(&vc4->job_done_list); 1282b501baccSEric Anholt INIT_LIST_HEAD(&vc4->seqno_cb_list); 1283d5b1a78aSEric Anholt spin_lock_init(&vc4->job_lock); 1284d5b1a78aSEric Anholt 1285d5b1a78aSEric Anholt INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 12860078730fSKees Cook timer_setup(&vc4->hangcheck.timer, vc4_hangcheck_elapsed, 0); 1287d5b1a78aSEric Anholt 1288d5b1a78aSEric Anholt INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 128936cb6253SEric Anholt 129036cb6253SEric Anholt mutex_init(&vc4->power_lock); 1291b9f19259SBoris Brezillon 1292b9f19259SBoris Brezillon INIT_LIST_HEAD(&vc4->purgeable.list); 1293b9f19259SBoris Brezillon mutex_init(&vc4->purgeable.lock); 1294171a072bSMaxime Ripard 1295171a072bSMaxime Ripard return drmm_add_action_or_reset(dev, vc4_gem_destroy, NULL); 1296d5b1a78aSEric Anholt } 1297d5b1a78aSEric Anholt 1298171a072bSMaxime Ripard static void vc4_gem_destroy(struct drm_device *dev, void *unused) 1299d5b1a78aSEric Anholt { 1300d5b1a78aSEric Anholt struct vc4_dev *vc4 = to_vc4_dev(dev); 1301d5b1a78aSEric Anholt 1302d5b1a78aSEric Anholt /* Waiting for exec to finish would need to be done before 1303d5b1a78aSEric Anholt * unregistering V3D. 1304d5b1a78aSEric Anholt */ 1305d5b1a78aSEric Anholt WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 1306d5b1a78aSEric Anholt 1307d5b1a78aSEric Anholt /* V3D should already have disabled its interrupt and cleared 1308d5b1a78aSEric Anholt * the overflow allocation registers. Now free the object. 1309d5b1a78aSEric Anholt */ 1310553c942fSEric Anholt if (vc4->bin_bo) { 1311f7a8cd30SEmil Velikov drm_gem_object_put(&vc4->bin_bo->base.base); 1312553c942fSEric Anholt vc4->bin_bo = NULL; 1313d5b1a78aSEric Anholt } 1314d5b1a78aSEric Anholt 131521461365SEric Anholt if (vc4->hang_state) 131621461365SEric Anholt vc4_free_hang_state(dev, vc4->hang_state); 1317d5b1a78aSEric Anholt } 1318b9f19259SBoris Brezillon 1319b9f19259SBoris Brezillon int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data, 1320b9f19259SBoris Brezillon struct drm_file *file_priv) 1321b9f19259SBoris Brezillon { 1322b9f19259SBoris Brezillon struct drm_vc4_gem_madvise *args = data; 1323b9f19259SBoris Brezillon struct drm_gem_object *gem_obj; 1324b9f19259SBoris Brezillon struct vc4_bo *bo; 1325b9f19259SBoris Brezillon int ret; 1326b9f19259SBoris Brezillon 1327b9f19259SBoris Brezillon switch (args->madv) { 1328b9f19259SBoris Brezillon case VC4_MADV_DONTNEED: 1329b9f19259SBoris Brezillon case VC4_MADV_WILLNEED: 1330b9f19259SBoris Brezillon break; 1331b9f19259SBoris Brezillon default: 1332b9f19259SBoris Brezillon return -EINVAL; 1333b9f19259SBoris Brezillon } 1334b9f19259SBoris Brezillon 1335b9f19259SBoris Brezillon if (args->pad != 0) 1336b9f19259SBoris Brezillon return -EINVAL; 1337b9f19259SBoris Brezillon 1338b9f19259SBoris Brezillon gem_obj = drm_gem_object_lookup(file_priv, args->handle); 1339b9f19259SBoris Brezillon if (!gem_obj) { 1340b9f19259SBoris Brezillon DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); 1341b9f19259SBoris Brezillon return -ENOENT; 1342b9f19259SBoris Brezillon } 1343b9f19259SBoris Brezillon 1344b9f19259SBoris Brezillon bo = to_vc4_bo(gem_obj); 1345b9f19259SBoris Brezillon 1346b9f19259SBoris Brezillon /* Only BOs exposed to userspace can be purged. */ 1347b9f19259SBoris Brezillon if (bo->madv == __VC4_MADV_NOTSUPP) { 1348b9f19259SBoris Brezillon DRM_DEBUG("madvise not supported on this BO\n"); 1349b9f19259SBoris Brezillon ret = -EINVAL; 1350b9f19259SBoris Brezillon goto out_put_gem; 1351b9f19259SBoris Brezillon } 1352b9f19259SBoris Brezillon 1353b9f19259SBoris Brezillon /* Not sure it's safe to purge imported BOs. Let's just assume it's 1354b9f19259SBoris Brezillon * not until proven otherwise. 1355b9f19259SBoris Brezillon */ 1356b9f19259SBoris Brezillon if (gem_obj->import_attach) { 1357b9f19259SBoris Brezillon DRM_DEBUG("madvise not supported on imported BOs\n"); 1358b9f19259SBoris Brezillon ret = -EINVAL; 1359b9f19259SBoris Brezillon goto out_put_gem; 1360b9f19259SBoris Brezillon } 1361b9f19259SBoris Brezillon 1362b9f19259SBoris Brezillon mutex_lock(&bo->madv_lock); 1363b9f19259SBoris Brezillon 1364b9f19259SBoris Brezillon if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED && 1365b9f19259SBoris Brezillon !refcount_read(&bo->usecnt)) { 1366b9f19259SBoris Brezillon /* If the BO is about to be marked as purgeable, is not used 1367b9f19259SBoris Brezillon * and is not already purgeable or purged, add it to the 1368b9f19259SBoris Brezillon * purgeable list. 1369b9f19259SBoris Brezillon */ 1370b9f19259SBoris Brezillon vc4_bo_add_to_purgeable_pool(bo); 1371b9f19259SBoris Brezillon } else if (args->madv == VC4_MADV_WILLNEED && 1372b9f19259SBoris Brezillon bo->madv == VC4_MADV_DONTNEED && 1373b9f19259SBoris Brezillon !refcount_read(&bo->usecnt)) { 1374b9f19259SBoris Brezillon /* The BO has not been purged yet, just remove it from 1375b9f19259SBoris Brezillon * the purgeable list. 1376b9f19259SBoris Brezillon */ 1377b9f19259SBoris Brezillon vc4_bo_remove_from_purgeable_pool(bo); 1378b9f19259SBoris Brezillon } 1379b9f19259SBoris Brezillon 1380b9f19259SBoris Brezillon /* Save the purged state. */ 1381b9f19259SBoris Brezillon args->retained = bo->madv != __VC4_MADV_PURGED; 1382b9f19259SBoris Brezillon 1383b9f19259SBoris Brezillon /* Update internal madv state only if the bo was not purged. */ 1384b9f19259SBoris Brezillon if (bo->madv != __VC4_MADV_PURGED) 1385b9f19259SBoris Brezillon bo->madv = args->madv; 1386b9f19259SBoris Brezillon 1387b9f19259SBoris Brezillon mutex_unlock(&bo->madv_lock); 1388b9f19259SBoris Brezillon 1389b9f19259SBoris Brezillon ret = 0; 1390b9f19259SBoris Brezillon 1391b9f19259SBoris Brezillon out_put_gem: 1392f7a8cd30SEmil Velikov drm_gem_object_put(gem_obj); 1393b9f19259SBoris Brezillon 1394b9f19259SBoris Brezillon return ret; 1395b9f19259SBoris Brezillon } 1396