xref: /openbmc/linux/drivers/gpu/drm/v3d/v3d_gem.c (revision 23966841)
1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (C) 2014-2018 Broadcom */
3 
4 #include <drm/drmP.h>
5 #include <drm/drm_syncobj.h>
6 #include <linux/module.h>
7 #include <linux/platform_device.h>
8 #include <linux/pm_runtime.h>
9 #include <linux/reset.h>
10 #include <linux/device.h>
11 #include <linux/io.h>
12 #include <linux/sched/signal.h>
13 
14 #include "uapi/drm/v3d_drm.h"
15 #include "v3d_drv.h"
16 #include "v3d_regs.h"
17 #include "v3d_trace.h"
18 
19 static void
20 v3d_init_core(struct v3d_dev *v3d, int core)
21 {
22 	/* Set OVRTMUOUT, which means that the texture sampler uniform
23 	 * configuration's tmu output type field is used, instead of
24 	 * using the hardware default behavior based on the texture
25 	 * type.  If you want the default behavior, you can still put
26 	 * "2" in the indirect texture state's output_type field.
27 	 */
28 	if (v3d->ver < 40)
29 		V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
30 
31 	/* Whenever we flush the L2T cache, we always want to flush
32 	 * the whole thing.
33 	 */
34 	V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
35 	V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
36 }
37 
38 /* Sets invariant state for the HW. */
39 static void
40 v3d_init_hw_state(struct v3d_dev *v3d)
41 {
42 	v3d_init_core(v3d, 0);
43 }
44 
45 static void
46 v3d_idle_axi(struct v3d_dev *v3d, int core)
47 {
48 	V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
49 
50 	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
51 		      (V3D_GMP_STATUS_RD_COUNT_MASK |
52 		       V3D_GMP_STATUS_WR_COUNT_MASK |
53 		       V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
54 		DRM_ERROR("Failed to wait for safe GMP shutdown\n");
55 	}
56 }
57 
58 static void
59 v3d_idle_gca(struct v3d_dev *v3d)
60 {
61 	if (v3d->ver >= 41)
62 		return;
63 
64 	V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
65 
66 	if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
67 		      V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
68 		     V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
69 		DRM_ERROR("Failed to wait for safe GCA shutdown\n");
70 	}
71 }
72 
73 static void
74 v3d_reset_by_bridge(struct v3d_dev *v3d)
75 {
76 	int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
77 
78 	if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
79 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
80 				 V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
81 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
82 
83 		/* GFXH-1383: The SW_INIT may cause a stray write to address 0
84 		 * of the unit, so reset it to its power-on value here.
85 		 */
86 		V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
87 	} else {
88 		WARN_ON_ONCE(V3D_GET_FIELD(version,
89 					   V3D_TOP_GR_BRIDGE_MAJOR) != 7);
90 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
91 				 V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
92 		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
93 	}
94 }
95 
96 static void
97 v3d_reset_v3d(struct v3d_dev *v3d)
98 {
99 	if (v3d->reset)
100 		reset_control_reset(v3d->reset);
101 	else
102 		v3d_reset_by_bridge(v3d);
103 
104 	v3d_init_hw_state(v3d);
105 }
106 
107 void
108 v3d_reset(struct v3d_dev *v3d)
109 {
110 	struct drm_device *dev = &v3d->drm;
111 
112 	DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
113 	DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
114 		      V3D_CORE_READ(0, V3D_ERR_STAT));
115 	trace_v3d_reset_begin(dev);
116 
117 	/* XXX: only needed for safe powerdown, not reset. */
118 	if (false)
119 		v3d_idle_axi(v3d, 0);
120 
121 	v3d_idle_gca(v3d);
122 	v3d_reset_v3d(v3d);
123 
124 	v3d_mmu_set_page_table(v3d);
125 	v3d_irq_reset(v3d);
126 
127 	trace_v3d_reset_end(dev);
128 }
129 
130 static void
131 v3d_flush_l3(struct v3d_dev *v3d)
132 {
133 	if (v3d->ver < 41) {
134 		u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
135 
136 		V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
137 			      gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
138 
139 		if (v3d->ver < 33) {
140 			V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
141 				      gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
142 		}
143 	}
144 }
145 
146 /* Invalidates the (read-only) L2C cache.  This was the L2 cache for
147  * uniforms and instructions on V3D 3.2.
148  */
149 static void
150 v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
151 {
152 	if (v3d->ver > 32)
153 		return;
154 
155 	V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
156 		       V3D_L2CACTL_L2CCLR |
157 		       V3D_L2CACTL_L2CENA);
158 }
159 
160 /* Invalidates texture L2 cachelines */
161 static void
162 v3d_flush_l2t(struct v3d_dev *v3d, int core)
163 {
164 	/* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
165 	 * need to wait for completion before dispatching the job --
166 	 * L2T accesses will be stalled until the flush has completed.
167 	 * However, we do need to make sure we don't try to trigger a
168 	 * new flush while the L2_CLEAN queue is trying to
169 	 * synchronously clean after a job.
170 	 */
171 	mutex_lock(&v3d->cache_clean_lock);
172 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
173 		       V3D_L2TCACTL_L2TFLS |
174 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
175 	mutex_unlock(&v3d->cache_clean_lock);
176 }
177 
178 /* Cleans texture L1 and L2 cachelines (writing back dirty data).
179  *
180  * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
181  * executed, we need to make sure that the clean is done before
182  * signaling job completion.  So, we synchronously wait before
183  * returning, and we make sure that L2 invalidates don't happen in the
184  * meantime to confuse our are-we-done checks.
185  */
186 void
187 v3d_clean_caches(struct v3d_dev *v3d)
188 {
189 	struct drm_device *dev = &v3d->drm;
190 	int core = 0;
191 
192 	trace_v3d_cache_clean_begin(dev);
193 
194 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
195 	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
196 		       V3D_L2TCACTL_L2TFLS), 100)) {
197 		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
198 	}
199 
200 	mutex_lock(&v3d->cache_clean_lock);
201 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
202 		       V3D_L2TCACTL_L2TFLS |
203 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
204 
205 	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
206 		       V3D_L2TCACTL_L2TFLS), 100)) {
207 		DRM_ERROR("Timeout waiting for L2T clean\n");
208 	}
209 
210 	mutex_unlock(&v3d->cache_clean_lock);
211 
212 	trace_v3d_cache_clean_end(dev);
213 }
214 
215 /* Invalidates the slice caches.  These are read-only caches. */
216 static void
217 v3d_invalidate_slices(struct v3d_dev *v3d, int core)
218 {
219 	V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
220 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
221 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
222 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
223 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
224 }
225 
226 void
227 v3d_invalidate_caches(struct v3d_dev *v3d)
228 {
229 	/* Invalidate the caches from the outside in.  That way if
230 	 * another CL's concurrent use of nearby memory were to pull
231 	 * an invalidated cacheline back in, we wouldn't leave stale
232 	 * data in the inner cache.
233 	 */
234 	v3d_flush_l3(v3d);
235 	v3d_invalidate_l2c(v3d, 0);
236 	v3d_flush_l2t(v3d, 0);
237 	v3d_invalidate_slices(v3d, 0);
238 }
239 
240 /* Takes the reservation lock on all the BOs being referenced, so that
241  * at queue submit time we can update the reservations.
242  *
243  * We don't lock the RCL the tile alloc/state BOs, or overflow memory
244  * (all of which are on exec->unref_list).  They're entirely private
245  * to v3d, so we don't attach dma-buf fences to them.
246  */
247 static int
248 v3d_lock_bo_reservations(struct v3d_job *job,
249 			 struct ww_acquire_ctx *acquire_ctx)
250 {
251 	int i, ret;
252 
253 	ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
254 	if (ret)
255 		return ret;
256 
257 	for (i = 0; i < job->bo_count; i++) {
258 		ret = drm_gem_fence_array_add_implicit(&job->deps,
259 						       job->bo[i], true);
260 		if (ret) {
261 			drm_gem_unlock_reservations(job->bo, job->bo_count,
262 						    acquire_ctx);
263 			return ret;
264 		}
265 	}
266 
267 	return 0;
268 }
269 
270 /**
271  * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
272  * referenced by the job.
273  * @dev: DRM device
274  * @file_priv: DRM file for this fd
275  * @job: V3D job being set up
276  *
277  * The command validator needs to reference BOs by their index within
278  * the submitted job's BO list.  This does the validation of the job's
279  * BO list and reference counting for the lifetime of the job.
280  *
281  * Note that this function doesn't need to unreference the BOs on
282  * failure, because that will happen at v3d_exec_cleanup() time.
283  */
284 static int
285 v3d_lookup_bos(struct drm_device *dev,
286 	       struct drm_file *file_priv,
287 	       struct v3d_job *job,
288 	       u64 bo_handles,
289 	       u32 bo_count)
290 {
291 	u32 *handles;
292 	int ret = 0;
293 	int i;
294 
295 	job->bo_count = bo_count;
296 
297 	if (!job->bo_count) {
298 		/* See comment on bo_index for why we have to check
299 		 * this.
300 		 */
301 		DRM_DEBUG("Rendering requires BOs\n");
302 		return -EINVAL;
303 	}
304 
305 	job->bo = kvmalloc_array(job->bo_count,
306 				 sizeof(struct drm_gem_cma_object *),
307 				 GFP_KERNEL | __GFP_ZERO);
308 	if (!job->bo) {
309 		DRM_DEBUG("Failed to allocate validated BO pointers\n");
310 		return -ENOMEM;
311 	}
312 
313 	handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
314 	if (!handles) {
315 		ret = -ENOMEM;
316 		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
317 		goto fail;
318 	}
319 
320 	if (copy_from_user(handles,
321 			   (void __user *)(uintptr_t)bo_handles,
322 			   job->bo_count * sizeof(u32))) {
323 		ret = -EFAULT;
324 		DRM_DEBUG("Failed to copy in GEM handles\n");
325 		goto fail;
326 	}
327 
328 	spin_lock(&file_priv->table_lock);
329 	for (i = 0; i < job->bo_count; i++) {
330 		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
331 						     handles[i]);
332 		if (!bo) {
333 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
334 				  i, handles[i]);
335 			ret = -ENOENT;
336 			spin_unlock(&file_priv->table_lock);
337 			goto fail;
338 		}
339 		drm_gem_object_get(bo);
340 		job->bo[i] = bo;
341 	}
342 	spin_unlock(&file_priv->table_lock);
343 
344 fail:
345 	kvfree(handles);
346 	return ret;
347 }
348 
349 static void
350 v3d_job_free(struct kref *ref)
351 {
352 	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
353 	unsigned long index;
354 	struct dma_fence *fence;
355 	int i;
356 
357 	for (i = 0; i < job->bo_count; i++) {
358 		if (job->bo[i])
359 			drm_gem_object_put_unlocked(job->bo[i]);
360 	}
361 	kvfree(job->bo);
362 
363 	xa_for_each(&job->deps, index, fence) {
364 		dma_fence_put(fence);
365 	}
366 	xa_destroy(&job->deps);
367 
368 	dma_fence_put(job->irq_fence);
369 	dma_fence_put(job->done_fence);
370 
371 	pm_runtime_mark_last_busy(job->v3d->dev);
372 	pm_runtime_put_autosuspend(job->v3d->dev);
373 
374 	kfree(job);
375 }
376 
377 static void
378 v3d_render_job_free(struct kref *ref)
379 {
380 	struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
381 						  base.refcount);
382 	struct v3d_bo *bo, *save;
383 
384 	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
385 		drm_gem_object_put_unlocked(&bo->base.base);
386 	}
387 
388 	v3d_job_free(ref);
389 }
390 
391 void v3d_job_put(struct v3d_job *job)
392 {
393 	kref_put(&job->refcount, job->free);
394 }
395 
396 int
397 v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
398 		  struct drm_file *file_priv)
399 {
400 	int ret;
401 	struct drm_v3d_wait_bo *args = data;
402 	ktime_t start = ktime_get();
403 	u64 delta_ns;
404 	unsigned long timeout_jiffies =
405 		nsecs_to_jiffies_timeout(args->timeout_ns);
406 
407 	if (args->pad != 0)
408 		return -EINVAL;
409 
410 	ret = drm_gem_reservation_object_wait(file_priv, args->handle,
411 					      true, timeout_jiffies);
412 
413 	/* Decrement the user's timeout, in case we got interrupted
414 	 * such that the ioctl will be restarted.
415 	 */
416 	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
417 	if (delta_ns < args->timeout_ns)
418 		args->timeout_ns -= delta_ns;
419 	else
420 		args->timeout_ns = 0;
421 
422 	/* Asked to wait beyond the jiffie/scheduler precision? */
423 	if (ret == -ETIME && args->timeout_ns)
424 		ret = -EAGAIN;
425 
426 	return ret;
427 }
428 
429 static int
430 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
431 	     struct v3d_job *job, void (*free)(struct kref *ref),
432 	     u32 in_sync)
433 {
434 	struct dma_fence *in_fence = NULL;
435 	int ret;
436 
437 	job->v3d = v3d;
438 	job->free = free;
439 
440 	ret = pm_runtime_get_sync(v3d->dev);
441 	if (ret < 0)
442 		return ret;
443 
444 	xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
445 
446 	ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence);
447 	if (ret == -EINVAL)
448 		goto fail;
449 
450 	ret = drm_gem_fence_array_add(&job->deps, in_fence);
451 	if (ret)
452 		goto fail;
453 
454 	kref_init(&job->refcount);
455 
456 	return 0;
457 fail:
458 	xa_destroy(&job->deps);
459 	pm_runtime_put_autosuspend(v3d->dev);
460 	return ret;
461 }
462 
463 static int
464 v3d_push_job(struct v3d_file_priv *v3d_priv,
465 	     struct v3d_job *job, enum v3d_queue queue)
466 {
467 	int ret;
468 
469 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
470 				 v3d_priv);
471 	if (ret)
472 		return ret;
473 
474 	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
475 
476 	/* put by scheduler job completion */
477 	kref_get(&job->refcount);
478 
479 	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
480 
481 	return 0;
482 }
483 
484 static void
485 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
486 					 struct v3d_job *job,
487 					 struct ww_acquire_ctx *acquire_ctx,
488 					 u32 out_sync,
489 					 struct dma_fence *done_fence)
490 {
491 	struct drm_syncobj *sync_out;
492 	int i;
493 
494 	for (i = 0; i < job->bo_count; i++) {
495 		/* XXX: Use shared fences for read-only objects. */
496 		reservation_object_add_excl_fence(job->bo[i]->resv,
497 						  job->done_fence);
498 	}
499 
500 	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
501 
502 	/* Update the return sync object for the job */
503 	sync_out = drm_syncobj_find(file_priv, out_sync);
504 	if (sync_out) {
505 		drm_syncobj_replace_fence(sync_out, done_fence);
506 		drm_syncobj_put(sync_out);
507 	}
508 }
509 
510 /**
511  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
512  * @dev: DRM device
513  * @data: ioctl argument
514  * @file_priv: DRM file for this fd
515  *
516  * This is the main entrypoint for userspace to submit a 3D frame to
517  * the GPU.  Userspace provides the binner command list (if
518  * applicable), and the kernel sets up the render command list to draw
519  * to the framebuffer described in the ioctl, using the command lists
520  * that the 3D engine's binner will produce.
521  */
522 int
523 v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
524 		    struct drm_file *file_priv)
525 {
526 	struct v3d_dev *v3d = to_v3d_dev(dev);
527 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
528 	struct drm_v3d_submit_cl *args = data;
529 	struct v3d_bin_job *bin = NULL;
530 	struct v3d_render_job *render;
531 	struct ww_acquire_ctx acquire_ctx;
532 	int ret = 0;
533 
534 	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
535 
536 	if (args->pad != 0) {
537 		DRM_INFO("pad must be zero: %d\n", args->pad);
538 		return -EINVAL;
539 	}
540 
541 	render = kcalloc(1, sizeof(*render), GFP_KERNEL);
542 	if (!render)
543 		return -ENOMEM;
544 
545 	render->start = args->rcl_start;
546 	render->end = args->rcl_end;
547 	INIT_LIST_HEAD(&render->unref_list);
548 
549 	ret = v3d_job_init(v3d, file_priv, &render->base,
550 			   v3d_render_job_free, args->in_sync_rcl);
551 	if (ret) {
552 		kfree(render);
553 		return ret;
554 	}
555 
556 	if (args->bcl_start != args->bcl_end) {
557 		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
558 		if (!bin)
559 			return -ENOMEM;
560 
561 		ret = v3d_job_init(v3d, file_priv, &bin->base,
562 				   v3d_job_free, args->in_sync_bcl);
563 		if (ret) {
564 			v3d_job_put(&render->base);
565 			return ret;
566 		}
567 
568 		bin->start = args->bcl_start;
569 		bin->end = args->bcl_end;
570 		bin->qma = args->qma;
571 		bin->qms = args->qms;
572 		bin->qts = args->qts;
573 		bin->render = render;
574 	}
575 
576 	ret = v3d_lookup_bos(dev, file_priv, &render->base,
577 			     args->bo_handles, args->bo_handle_count);
578 	if (ret)
579 		goto fail;
580 
581 	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
582 	if (ret)
583 		goto fail;
584 
585 	mutex_lock(&v3d->sched_lock);
586 	if (bin) {
587 		ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
588 		if (ret)
589 			goto fail_unreserve;
590 
591 		ret = drm_gem_fence_array_add(&render->base.deps,
592 					      dma_fence_get(bin->base.done_fence));
593 		if (ret)
594 			goto fail_unreserve;
595 	}
596 
597 	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
598 	if (ret)
599 		goto fail_unreserve;
600 	mutex_unlock(&v3d->sched_lock);
601 
602 	v3d_attach_fences_and_unlock_reservation(file_priv,
603 						 &render->base,
604 						 &acquire_ctx,
605 						 args->out_sync,
606 						 render->base.done_fence);
607 
608 	if (bin)
609 		v3d_job_put(&bin->base);
610 	v3d_job_put(&render->base);
611 
612 	return 0;
613 
614 fail_unreserve:
615 	mutex_unlock(&v3d->sched_lock);
616 	drm_gem_unlock_reservations(render->base.bo,
617 				    render->base.bo_count, &acquire_ctx);
618 fail:
619 	if (bin)
620 		v3d_job_put(&bin->base);
621 	v3d_job_put(&render->base);
622 
623 	return ret;
624 }
625 
626 /**
627  * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
628  * @dev: DRM device
629  * @data: ioctl argument
630  * @file_priv: DRM file for this fd
631  *
632  * Userspace provides the register setup for the TFU, which we don't
633  * need to validate since the TFU is behind the MMU.
634  */
635 int
636 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
637 		     struct drm_file *file_priv)
638 {
639 	struct v3d_dev *v3d = to_v3d_dev(dev);
640 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
641 	struct drm_v3d_submit_tfu *args = data;
642 	struct v3d_tfu_job *job;
643 	struct ww_acquire_ctx acquire_ctx;
644 	int ret = 0;
645 
646 	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
647 
648 	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
649 	if (!job)
650 		return -ENOMEM;
651 
652 	ret = v3d_job_init(v3d, file_priv, &job->base,
653 			   v3d_job_free, args->in_sync);
654 	if (ret) {
655 		kfree(job);
656 		return ret;
657 	}
658 
659 	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
660 			       sizeof(*job->base.bo), GFP_KERNEL);
661 	if (!job->base.bo) {
662 		v3d_job_put(&job->base);
663 		return -ENOMEM;
664 	}
665 
666 	job->args = *args;
667 
668 	spin_lock(&file_priv->table_lock);
669 	for (job->base.bo_count = 0;
670 	     job->base.bo_count < ARRAY_SIZE(args->bo_handles);
671 	     job->base.bo_count++) {
672 		struct drm_gem_object *bo;
673 
674 		if (!args->bo_handles[job->base.bo_count])
675 			break;
676 
677 		bo = idr_find(&file_priv->object_idr,
678 			      args->bo_handles[job->base.bo_count]);
679 		if (!bo) {
680 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
681 				  job->base.bo_count,
682 				  args->bo_handles[job->base.bo_count]);
683 			ret = -ENOENT;
684 			spin_unlock(&file_priv->table_lock);
685 			goto fail;
686 		}
687 		drm_gem_object_get(bo);
688 		job->base.bo[job->base.bo_count] = bo;
689 	}
690 	spin_unlock(&file_priv->table_lock);
691 
692 	ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
693 	if (ret)
694 		goto fail;
695 
696 	mutex_lock(&v3d->sched_lock);
697 	ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
698 	if (ret)
699 		goto fail_unreserve;
700 	mutex_unlock(&v3d->sched_lock);
701 
702 	v3d_attach_fences_and_unlock_reservation(file_priv,
703 						 &job->base, &acquire_ctx,
704 						 args->out_sync,
705 						 job->base.done_fence);
706 
707 	v3d_job_put(&job->base);
708 
709 	return 0;
710 
711 fail_unreserve:
712 	mutex_unlock(&v3d->sched_lock);
713 	drm_gem_unlock_reservations(job->base.bo, job->base.bo_count,
714 				    &acquire_ctx);
715 fail:
716 	v3d_job_put(&job->base);
717 
718 	return ret;
719 }
720 
721 /**
722  * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
723  * @dev: DRM device
724  * @data: ioctl argument
725  * @file_priv: DRM file for this fd
726  *
727  * Userspace provides the register setup for the CSD, which we don't
728  * need to validate since the CSD is behind the MMU.
729  */
730 int
731 v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
732 		     struct drm_file *file_priv)
733 {
734 	struct v3d_dev *v3d = to_v3d_dev(dev);
735 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
736 	struct drm_v3d_submit_csd *args = data;
737 	struct v3d_csd_job *job;
738 	struct v3d_job *clean_job;
739 	struct ww_acquire_ctx acquire_ctx;
740 	int ret;
741 
742 	trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
743 
744 	if (!v3d_has_csd(v3d)) {
745 		DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
746 		return -EINVAL;
747 	}
748 
749 	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
750 	if (!job)
751 		return -ENOMEM;
752 
753 	ret = v3d_job_init(v3d, file_priv, &job->base,
754 			   v3d_job_free, args->in_sync);
755 	if (ret) {
756 		kfree(job);
757 		return ret;
758 	}
759 
760 	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
761 	if (!clean_job) {
762 		v3d_job_put(&job->base);
763 		kfree(job);
764 		return -ENOMEM;
765 	}
766 
767 	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
768 	if (ret) {
769 		v3d_job_put(&job->base);
770 		kfree(clean_job);
771 		return ret;
772 	}
773 
774 	job->args = *args;
775 
776 	ret = v3d_lookup_bos(dev, file_priv, clean_job,
777 			     args->bo_handles, args->bo_handle_count);
778 	if (ret)
779 		goto fail;
780 
781 	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
782 	if (ret)
783 		goto fail;
784 
785 	mutex_lock(&v3d->sched_lock);
786 	ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
787 	if (ret)
788 		goto fail_unreserve;
789 
790 	ret = drm_gem_fence_array_add(&clean_job->deps,
791 				      dma_fence_get(job->base.done_fence));
792 	if (ret)
793 		goto fail_unreserve;
794 
795 	ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
796 	if (ret)
797 		goto fail_unreserve;
798 	mutex_unlock(&v3d->sched_lock);
799 
800 	v3d_attach_fences_and_unlock_reservation(file_priv,
801 						 clean_job,
802 						 &acquire_ctx,
803 						 args->out_sync,
804 						 clean_job->done_fence);
805 
806 	v3d_job_put(&job->base);
807 	v3d_job_put(clean_job);
808 
809 	return 0;
810 
811 fail_unreserve:
812 	mutex_unlock(&v3d->sched_lock);
813 	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
814 				    &acquire_ctx);
815 fail:
816 	v3d_job_put(&job->base);
817 	v3d_job_put(clean_job);
818 
819 	return ret;
820 }
821 
822 int
823 v3d_gem_init(struct drm_device *dev)
824 {
825 	struct v3d_dev *v3d = to_v3d_dev(dev);
826 	u32 pt_size = 4096 * 1024;
827 	int ret, i;
828 
829 	for (i = 0; i < V3D_MAX_QUEUES; i++)
830 		v3d->queue[i].fence_context = dma_fence_context_alloc(1);
831 
832 	spin_lock_init(&v3d->mm_lock);
833 	spin_lock_init(&v3d->job_lock);
834 	mutex_init(&v3d->bo_lock);
835 	mutex_init(&v3d->reset_lock);
836 	mutex_init(&v3d->sched_lock);
837 	mutex_init(&v3d->cache_clean_lock);
838 
839 	/* Note: We don't allocate address 0.  Various bits of HW
840 	 * treat 0 as special, such as the occlusion query counters
841 	 * where 0 means "disabled".
842 	 */
843 	drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
844 
845 	v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
846 			       &v3d->pt_paddr,
847 			       GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
848 	if (!v3d->pt) {
849 		drm_mm_takedown(&v3d->mm);
850 		dev_err(v3d->dev,
851 			"Failed to allocate page tables. "
852 			"Please ensure you have CMA enabled.\n");
853 		return -ENOMEM;
854 	}
855 
856 	v3d_init_hw_state(v3d);
857 	v3d_mmu_set_page_table(v3d);
858 
859 	ret = v3d_sched_init(v3d);
860 	if (ret) {
861 		drm_mm_takedown(&v3d->mm);
862 		dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
863 				  v3d->pt_paddr);
864 	}
865 
866 	return 0;
867 }
868 
869 void
870 v3d_gem_destroy(struct drm_device *dev)
871 {
872 	struct v3d_dev *v3d = to_v3d_dev(dev);
873 
874 	v3d_sched_fini(v3d);
875 
876 	/* Waiting for jobs to finish would need to be done before
877 	 * unregistering V3D.
878 	 */
879 	WARN_ON(v3d->bin_job);
880 	WARN_ON(v3d->render_job);
881 
882 	drm_mm_takedown(&v3d->mm);
883 
884 	dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
885 }
886