Lines Matching refs:p
44 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, in amdgpu_cs_parser_init() argument
54 memset(p, 0, sizeof(*p)); in amdgpu_cs_parser_init()
55 p->adev = adev; in amdgpu_cs_parser_init()
56 p->filp = filp; in amdgpu_cs_parser_init()
58 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); in amdgpu_cs_parser_init()
59 if (!p->ctx) in amdgpu_cs_parser_init()
62 if (atomic_read(&p->ctx->guilty)) { in amdgpu_cs_parser_init()
63 amdgpu_ctx_put(p->ctx); in amdgpu_cs_parser_init()
67 amdgpu_sync_create(&p->sync); in amdgpu_cs_parser_init()
68 drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | in amdgpu_cs_parser_init()
73 static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, in amdgpu_cs_job_idx() argument
80 r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, in amdgpu_cs_job_idx()
94 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_job_idx()
95 if (p->entities[i] == entity) in amdgpu_cs_job_idx()
102 p->entities[i] = entity; in amdgpu_cs_job_idx()
103 p->gang_size = i + 1; in amdgpu_cs_job_idx()
107 static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, in amdgpu_cs_p1_ib() argument
113 r = amdgpu_cs_job_idx(p, chunk_ib); in amdgpu_cs_p1_ib()
121 p->gang_leader_idx = r; in amdgpu_cs_p1_ib()
125 static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, in amdgpu_cs_p1_user_fence() argument
132 gobj = drm_gem_object_lookup(p->filp, data->handle); in amdgpu_cs_p1_user_fence()
136 p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); in amdgpu_cs_p1_user_fence()
139 size = amdgpu_bo_size(p->uf_bo); in amdgpu_cs_p1_user_fence()
143 if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) in amdgpu_cs_p1_user_fence()
150 static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, in amdgpu_cs_p1_bo_handles() argument
160 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, in amdgpu_cs_p1_bo_handles()
161 &p->bo_list); in amdgpu_cs_p1_bo_handles()
175 static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, in amdgpu_cs_pass1() argument
178 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_pass1()
201 p->nchunks = cs->in.num_chunks; in amdgpu_cs_pass1()
202 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), in amdgpu_cs_pass1()
204 if (!p->chunks) { in amdgpu_cs_pass1()
209 for (i = 0; i < p->nchunks; i++) { in amdgpu_cs_pass1()
221 p->chunks[i].chunk_id = user_chunk.chunk_id; in amdgpu_cs_pass1()
222 p->chunks[i].length_dw = user_chunk.length_dw; in amdgpu_cs_pass1()
224 size = p->chunks[i].length_dw; in amdgpu_cs_pass1()
227 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), in amdgpu_cs_pass1()
229 if (p->chunks[i].kdata == NULL) { in amdgpu_cs_pass1()
235 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { in amdgpu_cs_pass1()
242 switch (p->chunks[i].chunk_id) { in amdgpu_cs_pass1()
247 ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); in amdgpu_cs_pass1()
256 ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, in amdgpu_cs_pass1()
267 if (p->bo_list) in amdgpu_cs_pass1()
270 ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); in amdgpu_cs_pass1()
289 if (!p->gang_size) { in amdgpu_cs_pass1()
294 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_pass1()
295 ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, in amdgpu_cs_pass1()
296 num_ibs[i], &p->jobs[i]); in amdgpu_cs_pass1()
300 p->gang_leader = p->jobs[p->gang_leader_idx]; in amdgpu_cs_pass1()
302 if (p->ctx->generation != p->gang_leader->generation) { in amdgpu_cs_pass1()
307 if (p->uf_bo) in amdgpu_cs_pass1()
308 p->gang_leader->uf_addr = uf_offset; in amdgpu_cs_pass1()
317 i = p->nchunks - 1; in amdgpu_cs_pass1()
320 kvfree(p->chunks[i].kdata); in amdgpu_cs_pass1()
321 kvfree(p->chunks); in amdgpu_cs_pass1()
322 p->chunks = NULL; in amdgpu_cs_pass1()
323 p->nchunks = 0; in amdgpu_cs_pass1()
330 static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_ib() argument
336 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_ib()
343 r = amdgpu_cs_job_idx(p, chunk_ib); in amdgpu_cs_p2_ib()
347 job = p->jobs[r]; in amdgpu_cs_p2_ib()
352 if (p->uf_bo && ring->funcs->no_user_fence) in amdgpu_cs_p2_ib()
371 r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? in amdgpu_cs_p2_ib()
385 static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_dependencies() argument
389 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_p2_dependencies()
430 r = amdgpu_sync_fence(&p->sync, fence); in amdgpu_cs_p2_dependencies()
438 static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, in amdgpu_syncobj_lookup_and_add() argument
445 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); in amdgpu_syncobj_lookup_and_add()
452 r = amdgpu_sync_fence(&p->sync, fence); in amdgpu_syncobj_lookup_and_add()
457 static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_syncobj_in() argument
467 r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0); in amdgpu_cs_p2_syncobj_in()
475 static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_syncobj_timeline_wait() argument
485 r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle, in amdgpu_cs_p2_syncobj_timeline_wait()
495 static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_syncobj_out() argument
505 if (p->post_deps) in amdgpu_cs_p2_syncobj_out()
508 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_out()
510 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_out()
512 if (!p->post_deps) in amdgpu_cs_p2_syncobj_out()
517 p->post_deps[i].syncobj = in amdgpu_cs_p2_syncobj_out()
518 drm_syncobj_find(p->filp, deps[i].handle); in amdgpu_cs_p2_syncobj_out()
519 if (!p->post_deps[i].syncobj) in amdgpu_cs_p2_syncobj_out()
521 p->post_deps[i].chain = NULL; in amdgpu_cs_p2_syncobj_out()
522 p->post_deps[i].point = 0; in amdgpu_cs_p2_syncobj_out()
523 p->num_post_deps++; in amdgpu_cs_p2_syncobj_out()
529 static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_syncobj_timeline_signal() argument
539 if (p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
542 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), in amdgpu_cs_p2_syncobj_timeline_signal()
544 p->num_post_deps = 0; in amdgpu_cs_p2_syncobj_timeline_signal()
546 if (!p->post_deps) in amdgpu_cs_p2_syncobj_timeline_signal()
550 struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; in amdgpu_cs_p2_syncobj_timeline_signal()
559 dep->syncobj = drm_syncobj_find(p->filp, in amdgpu_cs_p2_syncobj_timeline_signal()
566 p->num_post_deps++; in amdgpu_cs_p2_syncobj_timeline_signal()
572 static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, in amdgpu_cs_p2_shadow() argument
581 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_p2_shadow()
582 p->jobs[i]->shadow_va = shadow->shadow_va; in amdgpu_cs_p2_shadow()
583 p->jobs[i]->csa_va = shadow->csa_va; in amdgpu_cs_p2_shadow()
584 p->jobs[i]->gds_va = shadow->gds_va; in amdgpu_cs_p2_shadow()
585 p->jobs[i]->init_shadow = in amdgpu_cs_p2_shadow()
592 static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) in amdgpu_cs_pass2() argument
597 for (i = 0; i < p->nchunks; ++i) { in amdgpu_cs_pass2()
600 chunk = &p->chunks[i]; in amdgpu_cs_pass2()
604 r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt); in amdgpu_cs_pass2()
610 r = amdgpu_cs_p2_dependencies(p, chunk); in amdgpu_cs_pass2()
615 r = amdgpu_cs_p2_syncobj_in(p, chunk); in amdgpu_cs_pass2()
620 r = amdgpu_cs_p2_syncobj_out(p, chunk); in amdgpu_cs_pass2()
625 r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); in amdgpu_cs_pass2()
630 r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk); in amdgpu_cs_pass2()
635 r = amdgpu_cs_p2_shadow(p, chunk); in amdgpu_cs_pass2()
785 struct amdgpu_cs_parser *p = param; in amdgpu_cs_bo_validate() local
800 if (p->bytes_moved < p->bytes_moved_threshold && in amdgpu_cs_bo_validate()
809 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) in amdgpu_cs_bo_validate()
824 p->bytes_moved += ctx.bytes_moved; in amdgpu_cs_bo_validate()
827 p->bytes_moved_vis += ctx.bytes_moved; in amdgpu_cs_bo_validate()
837 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, in amdgpu_cs_parser_bos() argument
840 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_parser_bos()
851 if (p->bo_list) in amdgpu_cs_parser_bos()
855 &p->bo_list); in amdgpu_cs_parser_bos()
858 } else if (!p->bo_list) { in amdgpu_cs_parser_bos()
860 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, in amdgpu_cs_parser_bos()
861 &p->bo_list); in amdgpu_cs_parser_bos()
866 mutex_lock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
872 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
902 drm_exec_until_all_locked(&p->exec) { in amdgpu_cs_parser_bos()
903 r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); in amdgpu_cs_parser_bos()
904 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
908 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
910 r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, in amdgpu_cs_parser_bos()
911 1 + p->gang_size); in amdgpu_cs_parser_bos()
912 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
919 if (p->uf_bo) { in amdgpu_cs_parser_bos()
920 r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, in amdgpu_cs_parser_bos()
921 1 + p->gang_size); in amdgpu_cs_parser_bos()
922 drm_exec_retry_on_contention(&p->exec); in amdgpu_cs_parser_bos()
928 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
954 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, in amdgpu_cs_parser_bos()
955 &p->bytes_moved_vis_threshold); in amdgpu_cs_parser_bos()
956 p->bytes_moved = 0; in amdgpu_cs_parser_bos()
957 p->bytes_moved_vis = 0; in amdgpu_cs_parser_bos()
959 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, in amdgpu_cs_parser_bos()
960 amdgpu_cs_bo_validate, p); in amdgpu_cs_parser_bos()
966 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_parser_bos()
967 r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj)); in amdgpu_cs_parser_bos()
972 if (p->uf_bo) { in amdgpu_cs_parser_bos()
973 r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); in amdgpu_cs_parser_bos()
977 p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); in amdgpu_cs_parser_bos()
980 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, in amdgpu_cs_parser_bos()
981 p->bytes_moved_vis); in amdgpu_cs_parser_bos()
983 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_parser_bos()
984 amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, in amdgpu_cs_parser_bos()
985 p->bo_list->gws_obj, in amdgpu_cs_parser_bos()
986 p->bo_list->oa_obj); in amdgpu_cs_parser_bos()
990 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_parser_bos()
1000 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_parser_bos()
1004 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p) in trace_amdgpu_cs_ibs() argument
1011 for (i = 0; i < p->gang_size; ++i) { in trace_amdgpu_cs_ibs()
1012 struct amdgpu_job *job = p->jobs[i]; in trace_amdgpu_cs_ibs()
1015 trace_amdgpu_cs(p, job, &job->ibs[j]); in trace_amdgpu_cs_ibs()
1019 static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, in amdgpu_cs_patch_ibs() argument
1038 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); in amdgpu_cs_patch_ibs()
1061 r = amdgpu_ring_parse_cs(ring, p, job, ib); in amdgpu_cs_patch_ibs()
1069 r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); in amdgpu_cs_patch_ibs()
1079 static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p) in amdgpu_cs_patch_jobs() argument
1084 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_patch_jobs()
1085 r = amdgpu_cs_patch_ibs(p, p->jobs[i]); in amdgpu_cs_patch_jobs()
1092 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) in amdgpu_cs_vm_handling() argument
1094 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_vm_handling()
1095 struct amdgpu_job *job = p->gang_leader; in amdgpu_cs_vm_handling()
1096 struct amdgpu_device *adev = p->adev; in amdgpu_cs_vm_handling()
1107 if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { in amdgpu_cs_vm_handling()
1108 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1109 struct drm_sched_entity *entity = p->entities[i]; in amdgpu_cs_vm_handling()
1126 r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); in amdgpu_cs_vm_handling()
1137 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); in amdgpu_cs_vm_handling()
1142 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1151 r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); in amdgpu_cs_vm_handling()
1164 r = amdgpu_sync_fence(&p->sync, vm->last_update); in amdgpu_cs_vm_handling()
1168 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_vm_handling()
1169 job = p->jobs[i]; in amdgpu_cs_vm_handling()
1179 amdgpu_bo_list_for_each_entry(e, p->bo_list) { in amdgpu_cs_vm_handling()
1193 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) in amdgpu_cs_sync_rings() argument
1195 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_sync_rings()
1203 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); in amdgpu_cs_sync_rings()
1210 drm_exec_for_each_locked_object(&p->exec, index, obj) { in amdgpu_cs_sync_rings()
1218 r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, in amdgpu_cs_sync_rings()
1224 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_sync_rings()
1225 r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); in amdgpu_cs_sync_rings()
1230 sched = p->gang_leader->base.entity->rq->sched; in amdgpu_cs_sync_rings()
1231 while ((fence = amdgpu_sync_get_fence(&p->sync))) { in amdgpu_cs_sync_rings()
1245 r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); in amdgpu_cs_sync_rings()
1253 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) in amdgpu_cs_post_dependencies() argument
1257 for (i = 0; i < p->num_post_deps; ++i) { in amdgpu_cs_post_dependencies()
1258 if (p->post_deps[i].chain && p->post_deps[i].point) { in amdgpu_cs_post_dependencies()
1259 drm_syncobj_add_point(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1260 p->post_deps[i].chain, in amdgpu_cs_post_dependencies()
1261 p->fence, p->post_deps[i].point); in amdgpu_cs_post_dependencies()
1262 p->post_deps[i].chain = NULL; in amdgpu_cs_post_dependencies()
1264 drm_syncobj_replace_fence(p->post_deps[i].syncobj, in amdgpu_cs_post_dependencies()
1265 p->fence); in amdgpu_cs_post_dependencies()
1270 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, in amdgpu_cs_submit() argument
1273 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; in amdgpu_cs_submit()
1274 struct amdgpu_job *leader = p->gang_leader; in amdgpu_cs_submit()
1282 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1283 drm_sched_job_arm(&p->jobs[i]->base); in amdgpu_cs_submit()
1285 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1288 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1291 fence = &p->jobs[i]->base.s_fence->scheduled; in amdgpu_cs_submit()
1300 if (p->gang_size > 1) { in amdgpu_cs_submit()
1301 for (i = 0; i < p->gang_size; ++i) in amdgpu_cs_submit()
1302 amdgpu_job_set_gang_leader(p->jobs[i], leader); in amdgpu_cs_submit()
1309 mutex_lock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1315 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { in amdgpu_cs_submit()
1322 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1326 p->fence = dma_fence_get(&leader->base.s_fence->finished); in amdgpu_cs_submit()
1327 drm_exec_for_each_locked_object(&p->exec, index, gobj) { in amdgpu_cs_submit()
1332 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1333 if (p->jobs[i] == leader) in amdgpu_cs_submit()
1337 &p->jobs[i]->base.s_fence->finished, in amdgpu_cs_submit()
1342 dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); in amdgpu_cs_submit()
1345 seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], in amdgpu_cs_submit()
1346 p->fence); in amdgpu_cs_submit()
1347 amdgpu_cs_post_dependencies(p); in amdgpu_cs_submit()
1350 !p->ctx->preamble_presented) { in amdgpu_cs_submit()
1352 p->ctx->preamble_presented = true; in amdgpu_cs_submit()
1358 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); in amdgpu_cs_submit()
1359 for (i = 0; i < p->gang_size; ++i) { in amdgpu_cs_submit()
1360 amdgpu_job_free_resources(p->jobs[i]); in amdgpu_cs_submit()
1361 trace_amdgpu_cs_ioctl(p->jobs[i]); in amdgpu_cs_submit()
1362 drm_sched_entity_push_job(&p->jobs[i]->base); in amdgpu_cs_submit()
1363 p->jobs[i] = NULL; in amdgpu_cs_submit()
1366 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); in amdgpu_cs_submit()
1368 mutex_unlock(&p->adev->notifier_lock); in amdgpu_cs_submit()
1369 mutex_unlock(&p->bo_list->bo_list_mutex); in amdgpu_cs_submit()