1 // SPDX-License-Identifier: MIT 2 3 #include <drm/drm_exec.h> 4 5 #include "nouveau_drv.h" 6 #include "nouveau_gem.h" 7 #include "nouveau_mem.h" 8 #include "nouveau_dma.h" 9 #include "nouveau_exec.h" 10 #include "nouveau_abi16.h" 11 #include "nouveau_chan.h" 12 #include "nouveau_sched.h" 13 #include "nouveau_uvmm.h" 14 15 /** 16 * DOC: Overview 17 * 18 * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT, 19 * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC. 20 * 21 * In order to use the UAPI firstly a user client must initialize the VA space 22 * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space 23 * should be managed by the kernel and which by the UMD. 24 * 25 * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the 26 * userspace-managable portion of the VA space. It provides operations to map 27 * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not 28 * backed by a GEM object and the kernel will ignore GEM handles provided 29 * alongside a sparse mapping. 30 * 31 * Userspace may request memory backed mappings either within or outside of the 32 * bounds (but not crossing those bounds) of a previously mapped sparse 33 * mapping. Subsequently requested memory backed mappings within a sparse 34 * mapping will take precedence over the corresponding range of the sparse 35 * mapping. If such memory backed mappings are unmapped the kernel will make 36 * sure that the corresponding sparse mapping will take their place again. 37 * Requests to unmap a sparse mapping that still contains memory backed mappings 38 * will result in those memory backed mappings being unmapped first. 39 * 40 * Unmap requests are not bound to the range of existing mappings and can even 41 * overlap the bounds of sparse mappings. For such a request the kernel will 42 * make sure to unmap all memory backed mappings within the given range, 43 * splitting up memory backed mappings which are only partially contained 44 * within the given range. Unmap requests with the sparse flag set must match 45 * the range of a previously mapped sparse mapping exactly though. 46 * 47 * While the kernel generally permits arbitrary sequences and ranges of memory 48 * backed mappings being mapped and unmapped, either within a single or multiple 49 * VM_BIND ioctl calls, there are some restrictions for sparse mappings. 50 * 51 * The kernel does not permit to: 52 * - unmap non-existent sparse mappings 53 * - unmap a sparse mapping and map a new sparse mapping overlapping the range 54 * of the previously unmapped sparse mapping within the same VM_BIND ioctl 55 * - unmap a sparse mapping and map new memory backed mappings overlapping the 56 * range of the previously unmapped sparse mapping within the same VM_BIND 57 * ioctl 58 * 59 * When using the VM_BIND ioctl to request the kernel to map memory to a given 60 * virtual address in the GPU's VA space there is no guarantee that the actual 61 * mappings are created in the GPU's MMU. If the given memory is swapped out 62 * at the time the bind operation is executed the kernel will stash the mapping 63 * details into it's internal alloctor and create the actual MMU mappings once 64 * the memory is swapped back in. While this is transparent for userspace, it is 65 * guaranteed that all the backing memory is swapped back in and all the memory 66 * mappings, as requested by userspace previously, are actually mapped once the 67 * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. 68 * 69 * A VM_BIND job can be executed either synchronously or asynchronously. If 70 * exectued asynchronously, userspace may provide a list of syncobjs this job 71 * will wait for and/or a list of syncobj the kernel will signal once the 72 * VM_BIND job finished execution. If executed synchronously the ioctl will 73 * block until the bind job is finished. For synchronous jobs the kernel will 74 * not permit any syncobjs submitted to the kernel. 75 * 76 * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC 77 * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide 78 * the option to synchronize them with syncobjs. 79 * 80 * Besides that, EXEC jobs can be scheduled for a specified channel to execute on. 81 * 82 * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have 83 * an up to date view of the VA space. However, the actual mappings might still 84 * be pending. Hence, EXEC jobs require to have the particular fences - of 85 * the corresponding VM_BIND jobs they depent on - attached to them. 86 */ 87 88 static int 89 nouveau_exec_job_submit(struct nouveau_job *job) 90 { 91 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 92 struct nouveau_cli *cli = job->cli; 93 struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); 94 struct drm_exec *exec = &job->exec; 95 struct drm_gem_object *obj; 96 unsigned long index; 97 int ret; 98 99 ret = nouveau_fence_new(&exec_job->fence); 100 if (ret) 101 return ret; 102 103 nouveau_uvmm_lock(uvmm); 104 drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | 105 DRM_EXEC_IGNORE_DUPLICATES); 106 drm_exec_until_all_locked(exec) { 107 struct drm_gpuva *va; 108 109 drm_gpuva_for_each_va(va, &uvmm->umgr) { 110 if (unlikely(va == &uvmm->umgr.kernel_alloc_node)) 111 continue; 112 113 ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); 114 drm_exec_retry_on_contention(exec); 115 if (ret) 116 goto err_uvmm_unlock; 117 } 118 } 119 nouveau_uvmm_unlock(uvmm); 120 121 drm_exec_for_each_locked_object(exec, index, obj) { 122 struct nouveau_bo *nvbo = nouveau_gem_object(obj); 123 124 ret = nouveau_bo_validate(nvbo, true, false); 125 if (ret) 126 goto err_exec_fini; 127 } 128 129 return 0; 130 131 err_uvmm_unlock: 132 nouveau_uvmm_unlock(uvmm); 133 err_exec_fini: 134 drm_exec_fini(exec); 135 return ret; 136 137 } 138 139 static void 140 nouveau_exec_job_armed_submit(struct nouveau_job *job) 141 { 142 struct drm_exec *exec = &job->exec; 143 struct drm_gem_object *obj; 144 unsigned long index; 145 146 drm_exec_for_each_locked_object(exec, index, obj) 147 dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); 148 149 drm_exec_fini(exec); 150 } 151 152 static struct dma_fence * 153 nouveau_exec_job_run(struct nouveau_job *job) 154 { 155 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 156 struct nouveau_channel *chan = exec_job->chan; 157 struct nouveau_fence *fence = exec_job->fence; 158 int i, ret; 159 160 ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16); 161 if (ret) { 162 NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret); 163 return ERR_PTR(ret); 164 } 165 166 for (i = 0; i < exec_job->push.count; i++) { 167 nv50_dma_push(chan, exec_job->push.s[i].va, 168 exec_job->push.s[i].va_len); 169 } 170 171 ret = nouveau_fence_emit(fence, chan); 172 if (ret) { 173 NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret); 174 WIND_RING(chan); 175 return ERR_PTR(ret); 176 } 177 178 exec_job->fence = NULL; 179 180 return &fence->base; 181 } 182 183 static void 184 nouveau_exec_job_free(struct nouveau_job *job) 185 { 186 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 187 188 nouveau_job_free(job); 189 190 nouveau_fence_unref(&exec_job->fence); 191 kfree(exec_job->push.s); 192 kfree(exec_job); 193 } 194 195 static enum drm_gpu_sched_stat 196 nouveau_exec_job_timeout(struct nouveau_job *job) 197 { 198 struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); 199 struct nouveau_channel *chan = exec_job->chan; 200 201 if (unlikely(!atomic_read(&chan->killed))) 202 nouveau_channel_kill(chan); 203 204 NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", 205 chan->chid); 206 207 nouveau_sched_entity_fini(job->entity); 208 209 return DRM_GPU_SCHED_STAT_ENODEV; 210 } 211 212 static struct nouveau_job_ops nouveau_exec_job_ops = { 213 .submit = nouveau_exec_job_submit, 214 .armed_submit = nouveau_exec_job_armed_submit, 215 .run = nouveau_exec_job_run, 216 .free = nouveau_exec_job_free, 217 .timeout = nouveau_exec_job_timeout, 218 }; 219 220 int 221 nouveau_exec_job_init(struct nouveau_exec_job **pjob, 222 struct nouveau_exec_job_args *__args) 223 { 224 struct nouveau_exec_job *job; 225 struct nouveau_job_args args = {}; 226 int ret; 227 228 job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL); 229 if (!job) 230 return -ENOMEM; 231 232 job->push.count = __args->push.count; 233 if (__args->push.count) { 234 job->push.s = kmemdup(__args->push.s, 235 sizeof(*__args->push.s) * 236 __args->push.count, 237 GFP_KERNEL); 238 if (!job->push.s) { 239 ret = -ENOMEM; 240 goto err_free_job; 241 } 242 } 243 244 job->chan = __args->chan; 245 246 args.sched_entity = __args->sched_entity; 247 args.file_priv = __args->file_priv; 248 249 args.in_sync.count = __args->in_sync.count; 250 args.in_sync.s = __args->in_sync.s; 251 252 args.out_sync.count = __args->out_sync.count; 253 args.out_sync.s = __args->out_sync.s; 254 255 args.ops = &nouveau_exec_job_ops; 256 args.resv_usage = DMA_RESV_USAGE_WRITE; 257 258 ret = nouveau_job_init(&job->base, &args); 259 if (ret) 260 goto err_free_pushs; 261 262 return 0; 263 264 err_free_pushs: 265 kfree(job->push.s); 266 err_free_job: 267 kfree(job); 268 *pjob = NULL; 269 270 return ret; 271 } 272 273 static int 274 nouveau_exec(struct nouveau_exec_job_args *args) 275 { 276 struct nouveau_exec_job *job; 277 int ret; 278 279 ret = nouveau_exec_job_init(&job, args); 280 if (ret) 281 return ret; 282 283 ret = nouveau_job_submit(&job->base); 284 if (ret) 285 goto err_job_fini; 286 287 return 0; 288 289 err_job_fini: 290 nouveau_job_fini(&job->base); 291 return ret; 292 } 293 294 static int 295 nouveau_exec_ucopy(struct nouveau_exec_job_args *args, 296 struct drm_nouveau_exec *req) 297 { 298 struct drm_nouveau_sync **s; 299 u32 inc = req->wait_count; 300 u64 ins = req->wait_ptr; 301 u32 outc = req->sig_count; 302 u64 outs = req->sig_ptr; 303 u32 pushc = req->push_count; 304 u64 pushs = req->push_ptr; 305 int ret; 306 307 if (pushc) { 308 args->push.count = pushc; 309 args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s)); 310 if (IS_ERR(args->push.s)) 311 return PTR_ERR(args->push.s); 312 } 313 314 if (inc) { 315 s = &args->in_sync.s; 316 317 args->in_sync.count = inc; 318 *s = u_memcpya(ins, inc, sizeof(**s)); 319 if (IS_ERR(*s)) { 320 ret = PTR_ERR(*s); 321 goto err_free_pushs; 322 } 323 } 324 325 if (outc) { 326 s = &args->out_sync.s; 327 328 args->out_sync.count = outc; 329 *s = u_memcpya(outs, outc, sizeof(**s)); 330 if (IS_ERR(*s)) { 331 ret = PTR_ERR(*s); 332 goto err_free_ins; 333 } 334 } 335 336 return 0; 337 338 err_free_pushs: 339 u_free(args->push.s); 340 err_free_ins: 341 u_free(args->in_sync.s); 342 return ret; 343 } 344 345 static void 346 nouveau_exec_ufree(struct nouveau_exec_job_args *args) 347 { 348 u_free(args->push.s); 349 u_free(args->in_sync.s); 350 u_free(args->out_sync.s); 351 } 352 353 int 354 nouveau_exec_ioctl_exec(struct drm_device *dev, 355 void *data, 356 struct drm_file *file_priv) 357 { 358 struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); 359 struct nouveau_cli *cli = nouveau_cli(file_priv); 360 struct nouveau_abi16_chan *chan16; 361 struct nouveau_channel *chan = NULL; 362 struct nouveau_exec_job_args args = {}; 363 struct drm_nouveau_exec *req = data; 364 int ret = 0; 365 366 if (unlikely(!abi16)) 367 return -ENOMEM; 368 369 /* abi16 locks already */ 370 if (unlikely(!nouveau_cli_uvmm(cli))) 371 return nouveau_abi16_put(abi16, -ENOSYS); 372 373 list_for_each_entry(chan16, &abi16->channels, head) { 374 if (chan16->chan->chid == req->channel) { 375 chan = chan16->chan; 376 break; 377 } 378 } 379 380 if (!chan) 381 return nouveau_abi16_put(abi16, -ENOENT); 382 383 if (unlikely(atomic_read(&chan->killed))) 384 return nouveau_abi16_put(abi16, -ENODEV); 385 386 if (!chan->dma.ib_max) 387 return nouveau_abi16_put(abi16, -ENOSYS); 388 389 if (unlikely(req->push_count > NOUVEAU_GEM_MAX_PUSH)) { 390 NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", 391 req->push_count, NOUVEAU_GEM_MAX_PUSH); 392 return nouveau_abi16_put(abi16, -EINVAL); 393 } 394 395 ret = nouveau_exec_ucopy(&args, req); 396 if (ret) 397 goto out; 398 399 args.sched_entity = &chan16->sched_entity; 400 args.file_priv = file_priv; 401 args.chan = chan; 402 403 ret = nouveau_exec(&args); 404 if (ret) 405 goto out_free_args; 406 407 out_free_args: 408 nouveau_exec_ufree(&args); 409 out: 410 return nouveau_abi16_put(abi16, ret); 411 } 412