1 // SPDX-License-Identifier: MIT
2 
3 #include <drm/drm_exec.h>
4 
5 #include "nouveau_drv.h"
6 #include "nouveau_gem.h"
7 #include "nouveau_mem.h"
8 #include "nouveau_dma.h"
9 #include "nouveau_exec.h"
10 #include "nouveau_abi16.h"
11 #include "nouveau_chan.h"
12 #include "nouveau_sched.h"
13 #include "nouveau_uvmm.h"
14 
15 /**
16  * DOC: Overview
17  *
18  * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
19  * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
20  *
21  * In order to use the UAPI firstly a user client must initialize the VA space
22  * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
23  * should be managed by the kernel and which by the UMD.
24  *
25  * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
26  * userspace-managable portion of the VA space. It provides operations to map
27  * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
28  * backed by a GEM object and the kernel will ignore GEM handles provided
29  * alongside a sparse mapping.
30  *
31  * Userspace may request memory backed mappings either within or outside of the
32  * bounds (but not crossing those bounds) of a previously mapped sparse
33  * mapping. Subsequently requested memory backed mappings within a sparse
34  * mapping will take precedence over the corresponding range of the sparse
35  * mapping. If such memory backed mappings are unmapped the kernel will make
36  * sure that the corresponding sparse mapping will take their place again.
37  * Requests to unmap a sparse mapping that still contains memory backed mappings
38  * will result in those memory backed mappings being unmapped first.
39  *
40  * Unmap requests are not bound to the range of existing mappings and can even
41  * overlap the bounds of sparse mappings. For such a request the kernel will
42  * make sure to unmap all memory backed mappings within the given range,
43  * splitting up memory backed mappings which are only partially contained
44  * within the given range. Unmap requests with the sparse flag set must match
45  * the range of a previously mapped sparse mapping exactly though.
46  *
47  * While the kernel generally permits arbitrary sequences and ranges of memory
48  * backed mappings being mapped and unmapped, either within a single or multiple
49  * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
50  *
51  * The kernel does not permit to:
52  *   - unmap non-existent sparse mappings
53  *   - unmap a sparse mapping and map a new sparse mapping overlapping the range
54  *     of the previously unmapped sparse mapping within the same VM_BIND ioctl
55  *   - unmap a sparse mapping and map new memory backed mappings overlapping the
56  *     range of the previously unmapped sparse mapping within the same VM_BIND
57  *     ioctl
58  *
59  * When using the VM_BIND ioctl to request the kernel to map memory to a given
60  * virtual address in the GPU's VA space there is no guarantee that the actual
61  * mappings are created in the GPU's MMU. If the given memory is swapped out
62  * at the time the bind operation is executed the kernel will stash the mapping
63  * details into it's internal alloctor and create the actual MMU mappings once
64  * the memory is swapped back in. While this is transparent for userspace, it is
65  * guaranteed that all the backing memory is swapped back in and all the memory
66  * mappings, as requested by userspace previously, are actually mapped once the
67  * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
68  *
69  * A VM_BIND job can be executed either synchronously or asynchronously. If
70  * exectued asynchronously, userspace may provide a list of syncobjs this job
71  * will wait for and/or a list of syncobj the kernel will signal once the
72  * VM_BIND job finished execution. If executed synchronously the ioctl will
73  * block until the bind job is finished. For synchronous jobs the kernel will
74  * not permit any syncobjs submitted to the kernel.
75  *
76  * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
77  * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
78  * the option to synchronize them with syncobjs.
79  *
80  * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
81  *
82  * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
83  * an up to date view of the VA space. However, the actual mappings might still
84  * be pending. Hence, EXEC jobs require to have the particular fences - of
85  * the corresponding VM_BIND jobs they depent on - attached to them.
86  */
87 
88 static int
89 nouveau_exec_job_submit(struct nouveau_job *job)
90 {
91 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
92 	struct nouveau_cli *cli = job->cli;
93 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
94 	struct drm_exec *exec = &job->exec;
95 	struct drm_gem_object *obj;
96 	unsigned long index;
97 	int ret;
98 
99 	ret = nouveau_fence_new(&exec_job->fence);
100 	if (ret)
101 		return ret;
102 
103 	nouveau_uvmm_lock(uvmm);
104 	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
105 			    DRM_EXEC_IGNORE_DUPLICATES);
106 	drm_exec_until_all_locked(exec) {
107 		struct drm_gpuva *va;
108 
109 		drm_gpuva_for_each_va(va, &uvmm->umgr) {
110 			if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
111 				continue;
112 
113 			ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
114 			drm_exec_retry_on_contention(exec);
115 			if (ret)
116 				goto err_uvmm_unlock;
117 		}
118 	}
119 	nouveau_uvmm_unlock(uvmm);
120 
121 	drm_exec_for_each_locked_object(exec, index, obj) {
122 		struct nouveau_bo *nvbo = nouveau_gem_object(obj);
123 
124 		ret = nouveau_bo_validate(nvbo, true, false);
125 		if (ret)
126 			goto err_exec_fini;
127 	}
128 
129 	return 0;
130 
131 err_uvmm_unlock:
132 	nouveau_uvmm_unlock(uvmm);
133 err_exec_fini:
134 	drm_exec_fini(exec);
135 	return ret;
136 
137 }
138 
139 static void
140 nouveau_exec_job_armed_submit(struct nouveau_job *job)
141 {
142 	struct drm_exec *exec = &job->exec;
143 	struct drm_gem_object *obj;
144 	unsigned long index;
145 
146 	drm_exec_for_each_locked_object(exec, index, obj)
147 		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
148 
149 	drm_exec_fini(exec);
150 }
151 
152 static struct dma_fence *
153 nouveau_exec_job_run(struct nouveau_job *job)
154 {
155 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
156 	struct nouveau_channel *chan = exec_job->chan;
157 	struct nouveau_fence *fence = exec_job->fence;
158 	int i, ret;
159 
160 	ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
161 	if (ret) {
162 		NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
163 		return ERR_PTR(ret);
164 	}
165 
166 	for (i = 0; i < exec_job->push.count; i++) {
167 		struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
168 		bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
169 
170 		nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
171 	}
172 
173 	ret = nouveau_fence_emit(fence, chan);
174 	if (ret) {
175 		NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
176 		WIND_RING(chan);
177 		return ERR_PTR(ret);
178 	}
179 
180 	exec_job->fence = NULL;
181 
182 	return &fence->base;
183 }
184 
185 static void
186 nouveau_exec_job_free(struct nouveau_job *job)
187 {
188 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
189 
190 	nouveau_job_free(job);
191 
192 	nouveau_fence_unref(&exec_job->fence);
193 	kfree(exec_job->push.s);
194 	kfree(exec_job);
195 }
196 
197 static enum drm_gpu_sched_stat
198 nouveau_exec_job_timeout(struct nouveau_job *job)
199 {
200 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
201 	struct nouveau_channel *chan = exec_job->chan;
202 
203 	if (unlikely(!atomic_read(&chan->killed)))
204 		nouveau_channel_kill(chan);
205 
206 	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
207 		  chan->chid);
208 
209 	nouveau_sched_entity_fini(job->entity);
210 
211 	return DRM_GPU_SCHED_STAT_ENODEV;
212 }
213 
214 static struct nouveau_job_ops nouveau_exec_job_ops = {
215 	.submit = nouveau_exec_job_submit,
216 	.armed_submit = nouveau_exec_job_armed_submit,
217 	.run = nouveau_exec_job_run,
218 	.free = nouveau_exec_job_free,
219 	.timeout = nouveau_exec_job_timeout,
220 };
221 
222 int
223 nouveau_exec_job_init(struct nouveau_exec_job **pjob,
224 		      struct nouveau_exec_job_args *__args)
225 {
226 	struct nouveau_exec_job *job;
227 	struct nouveau_job_args args = {};
228 	int i, ret;
229 
230 	for (i = 0; i < __args->push.count; i++) {
231 		struct drm_nouveau_exec_push *p = &__args->push.s[i];
232 
233 		if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
234 			NV_PRINTK(err, nouveau_cli(__args->file_priv),
235 				  "pushbuf size exceeds limit: 0x%x max 0x%x\n",
236 				  p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
237 			return -EINVAL;
238 		}
239 	}
240 
241 	job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
242 	if (!job)
243 		return -ENOMEM;
244 
245 	job->push.count = __args->push.count;
246 	if (__args->push.count) {
247 		job->push.s = kmemdup(__args->push.s,
248 				      sizeof(*__args->push.s) *
249 				      __args->push.count,
250 				      GFP_KERNEL);
251 		if (!job->push.s) {
252 			ret = -ENOMEM;
253 			goto err_free_job;
254 		}
255 	}
256 
257 	job->chan = __args->chan;
258 
259 	args.sched_entity = __args->sched_entity;
260 	args.file_priv = __args->file_priv;
261 
262 	args.in_sync.count = __args->in_sync.count;
263 	args.in_sync.s = __args->in_sync.s;
264 
265 	args.out_sync.count = __args->out_sync.count;
266 	args.out_sync.s = __args->out_sync.s;
267 
268 	args.ops = &nouveau_exec_job_ops;
269 	args.resv_usage = DMA_RESV_USAGE_WRITE;
270 
271 	ret = nouveau_job_init(&job->base, &args);
272 	if (ret)
273 		goto err_free_pushs;
274 
275 	return 0;
276 
277 err_free_pushs:
278 	kfree(job->push.s);
279 err_free_job:
280 	kfree(job);
281 	*pjob = NULL;
282 
283 	return ret;
284 }
285 
286 static int
287 nouveau_exec(struct nouveau_exec_job_args *args)
288 {
289 	struct nouveau_exec_job *job;
290 	int ret;
291 
292 	ret = nouveau_exec_job_init(&job, args);
293 	if (ret)
294 		return ret;
295 
296 	ret = nouveau_job_submit(&job->base);
297 	if (ret)
298 		goto err_job_fini;
299 
300 	return 0;
301 
302 err_job_fini:
303 	nouveau_job_fini(&job->base);
304 	return ret;
305 }
306 
307 static int
308 nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
309 		   struct drm_nouveau_exec *req)
310 {
311 	struct drm_nouveau_sync **s;
312 	u32 inc = req->wait_count;
313 	u64 ins = req->wait_ptr;
314 	u32 outc = req->sig_count;
315 	u64 outs = req->sig_ptr;
316 	u32 pushc = req->push_count;
317 	u64 pushs = req->push_ptr;
318 	int ret;
319 
320 	if (pushc) {
321 		args->push.count = pushc;
322 		args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
323 		if (IS_ERR(args->push.s))
324 			return PTR_ERR(args->push.s);
325 	}
326 
327 	if (inc) {
328 		s = &args->in_sync.s;
329 
330 		args->in_sync.count = inc;
331 		*s = u_memcpya(ins, inc, sizeof(**s));
332 		if (IS_ERR(*s)) {
333 			ret = PTR_ERR(*s);
334 			goto err_free_pushs;
335 		}
336 	}
337 
338 	if (outc) {
339 		s = &args->out_sync.s;
340 
341 		args->out_sync.count = outc;
342 		*s = u_memcpya(outs, outc, sizeof(**s));
343 		if (IS_ERR(*s)) {
344 			ret = PTR_ERR(*s);
345 			goto err_free_ins;
346 		}
347 	}
348 
349 	return 0;
350 
351 err_free_pushs:
352 	u_free(args->push.s);
353 err_free_ins:
354 	u_free(args->in_sync.s);
355 	return ret;
356 }
357 
358 static void
359 nouveau_exec_ufree(struct nouveau_exec_job_args *args)
360 {
361 	u_free(args->push.s);
362 	u_free(args->in_sync.s);
363 	u_free(args->out_sync.s);
364 }
365 
366 int
367 nouveau_exec_ioctl_exec(struct drm_device *dev,
368 			void *data,
369 			struct drm_file *file_priv)
370 {
371 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
372 	struct nouveau_cli *cli = nouveau_cli(file_priv);
373 	struct nouveau_abi16_chan *chan16;
374 	struct nouveau_channel *chan = NULL;
375 	struct nouveau_exec_job_args args = {};
376 	struct drm_nouveau_exec *req = data;
377 	int ret = 0;
378 
379 	if (unlikely(!abi16))
380 		return -ENOMEM;
381 
382 	/* abi16 locks already */
383 	if (unlikely(!nouveau_cli_uvmm(cli)))
384 		return nouveau_abi16_put(abi16, -ENOSYS);
385 
386 	list_for_each_entry(chan16, &abi16->channels, head) {
387 		if (chan16->chan->chid == req->channel) {
388 			chan = chan16->chan;
389 			break;
390 		}
391 	}
392 
393 	if (!chan)
394 		return nouveau_abi16_put(abi16, -ENOENT);
395 
396 	if (unlikely(atomic_read(&chan->killed)))
397 		return nouveau_abi16_put(abi16, -ENODEV);
398 
399 	if (!chan->dma.ib_max)
400 		return nouveau_abi16_put(abi16, -ENOSYS);
401 
402 	if (unlikely(req->push_count > NOUVEAU_GEM_MAX_PUSH)) {
403 		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
404 			 req->push_count, NOUVEAU_GEM_MAX_PUSH);
405 		return nouveau_abi16_put(abi16, -EINVAL);
406 	}
407 
408 	ret = nouveau_exec_ucopy(&args, req);
409 	if (ret)
410 		goto out;
411 
412 	args.sched_entity = &chan16->sched_entity;
413 	args.file_priv = file_priv;
414 	args.chan = chan;
415 
416 	ret = nouveau_exec(&args);
417 	if (ret)
418 		goto out_free_args;
419 
420 out_free_args:
421 	nouveau_exec_ufree(&args);
422 out:
423 	return nouveau_abi16_put(abi16, ret);
424 }
425