1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: monk liu <monk.liu@amd.com>
23  */
24 
25 #include <drm/drmP.h>
26 #include <drm/drm_auth.h>
27 #include "amdgpu.h"
28 #include "amdgpu_sched.h"
29 
30 #define to_amdgpu_ctx_entity(e)	\
31 	container_of((e), struct amdgpu_ctx_entity, entity)
32 
33 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
34 	[AMDGPU_HW_IP_GFX]	=	1,
35 	[AMDGPU_HW_IP_COMPUTE]	=	4,
36 	[AMDGPU_HW_IP_DMA]	=	2,
37 	[AMDGPU_HW_IP_UVD]	=	1,
38 	[AMDGPU_HW_IP_VCE]	=	1,
39 	[AMDGPU_HW_IP_UVD_ENC]	=	1,
40 	[AMDGPU_HW_IP_VCN_DEC]	=	1,
41 	[AMDGPU_HW_IP_VCN_ENC]	=	1,
42 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
43 };
44 
45 static int amdgput_ctx_total_num_entities(void)
46 {
47 	unsigned i, num_entities = 0;
48 
49 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
50 		num_entities += amdgpu_ctx_num_entities[i];
51 
52 	return num_entities;
53 }
54 
55 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
56 				      enum drm_sched_priority priority)
57 {
58 	/* NORMAL and below are accessible by everyone */
59 	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
60 		return 0;
61 
62 	if (capable(CAP_SYS_NICE))
63 		return 0;
64 
65 	if (drm_is_current_master(filp))
66 		return 0;
67 
68 	return -EACCES;
69 }
70 
71 static int amdgpu_ctx_init(struct amdgpu_device *adev,
72 			   enum drm_sched_priority priority,
73 			   struct drm_file *filp,
74 			   struct amdgpu_ctx *ctx)
75 {
76 	unsigned num_entities = amdgput_ctx_total_num_entities();
77 	unsigned i, j;
78 	int r;
79 
80 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
81 		return -EINVAL;
82 
83 	r = amdgpu_ctx_priority_permit(filp, priority);
84 	if (r)
85 		return r;
86 
87 	memset(ctx, 0, sizeof(*ctx));
88 	ctx->adev = adev;
89 
90 	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
91 			      sizeof(struct dma_fence*), GFP_KERNEL);
92 	if (!ctx->fences)
93 		return -ENOMEM;
94 
95 	ctx->entities[0] = kcalloc(num_entities,
96 				   sizeof(struct amdgpu_ctx_entity),
97 				   GFP_KERNEL);
98 	if (!ctx->entities[0]) {
99 		r = -ENOMEM;
100 		goto error_free_fences;
101 	}
102 
103 	for (i = 0; i < num_entities; ++i) {
104 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
105 
106 		entity->sequence = 1;
107 		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
108 	}
109 	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
110 		ctx->entities[i] = ctx->entities[i - 1] +
111 			amdgpu_ctx_num_entities[i - 1];
112 
113 	kref_init(&ctx->refcount);
114 	spin_lock_init(&ctx->ring_lock);
115 	mutex_init(&ctx->lock);
116 
117 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
118 	ctx->reset_counter_query = ctx->reset_counter;
119 	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
120 	ctx->init_priority = priority;
121 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
122 
123 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
124 		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
125 		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
126 		unsigned num_rings;
127 
128 		switch (i) {
129 		case AMDGPU_HW_IP_GFX:
130 			rings[0] = &adev->gfx.gfx_ring[0];
131 			num_rings = 1;
132 			break;
133 		case AMDGPU_HW_IP_COMPUTE:
134 			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
135 				rings[j] = &adev->gfx.compute_ring[j];
136 			num_rings = adev->gfx.num_compute_rings;
137 			break;
138 		case AMDGPU_HW_IP_DMA:
139 			for (j = 0; j < adev->sdma.num_instances; ++j)
140 				rings[j] = &adev->sdma.instance[j].ring;
141 			num_rings = adev->sdma.num_instances;
142 			break;
143 		case AMDGPU_HW_IP_UVD:
144 			rings[0] = &adev->uvd.inst[0].ring;
145 			num_rings = 1;
146 			break;
147 		case AMDGPU_HW_IP_VCE:
148 			rings[0] = &adev->vce.ring[0];
149 			num_rings = 1;
150 			break;
151 		case AMDGPU_HW_IP_UVD_ENC:
152 			rings[0] = &adev->uvd.inst[0].ring_enc[0];
153 			num_rings = 1;
154 			break;
155 		case AMDGPU_HW_IP_VCN_DEC:
156 			rings[0] = &adev->vcn.ring_dec;
157 			num_rings = 1;
158 			break;
159 		case AMDGPU_HW_IP_VCN_ENC:
160 			rings[0] = &adev->vcn.ring_enc[0];
161 			num_rings = 1;
162 			break;
163 		case AMDGPU_HW_IP_VCN_JPEG:
164 			rings[0] = &adev->vcn.ring_jpeg;
165 			num_rings = 1;
166 			break;
167 		}
168 
169 		for (j = 0; j < num_rings; ++j)
170 			rqs[j] = &rings[j]->sched.sched_rq[priority];
171 
172 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
173 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
174 						  rqs, num_rings, &ctx->guilty);
175 		if (r)
176 			goto error_cleanup_entities;
177 	}
178 
179 	return 0;
180 
181 error_cleanup_entities:
182 	for (i = 0; i < num_entities; ++i)
183 		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
184 	kfree(ctx->entities[0]);
185 
186 error_free_fences:
187 	kfree(ctx->fences);
188 	ctx->fences = NULL;
189 	return r;
190 }
191 
192 static void amdgpu_ctx_fini(struct kref *ref)
193 {
194 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
195 	unsigned num_entities = amdgput_ctx_total_num_entities();
196 	struct amdgpu_device *adev = ctx->adev;
197 	unsigned i, j;
198 
199 	if (!adev)
200 		return;
201 
202 	for (i = 0; i < num_entities; ++i)
203 		for (j = 0; j < amdgpu_sched_jobs; ++j)
204 			dma_fence_put(ctx->entities[0][i].fences[j]);
205 	kfree(ctx->fences);
206 	kfree(ctx->entities[0]);
207 
208 	mutex_destroy(&ctx->lock);
209 
210 	kfree(ctx);
211 }
212 
213 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
214 			  u32 ring, struct drm_sched_entity **entity)
215 {
216 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
217 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
218 		return -EINVAL;
219 	}
220 
221 	/* Right now all IPs have only one instance - multiple rings. */
222 	if (instance != 0) {
223 		DRM_DEBUG("invalid ip instance: %d\n", instance);
224 		return -EINVAL;
225 	}
226 
227 	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
228 		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
229 		return -EINVAL;
230 	}
231 
232 	*entity = &ctx->entities[hw_ip][ring].entity;
233 	return 0;
234 }
235 
236 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
237 			    struct amdgpu_fpriv *fpriv,
238 			    struct drm_file *filp,
239 			    enum drm_sched_priority priority,
240 			    uint32_t *id)
241 {
242 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
243 	struct amdgpu_ctx *ctx;
244 	int r;
245 
246 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
247 	if (!ctx)
248 		return -ENOMEM;
249 
250 	mutex_lock(&mgr->lock);
251 	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
252 	if (r < 0) {
253 		mutex_unlock(&mgr->lock);
254 		kfree(ctx);
255 		return r;
256 	}
257 
258 	*id = (uint32_t)r;
259 	r = amdgpu_ctx_init(adev, priority, filp, ctx);
260 	if (r) {
261 		idr_remove(&mgr->ctx_handles, *id);
262 		*id = 0;
263 		kfree(ctx);
264 	}
265 	mutex_unlock(&mgr->lock);
266 	return r;
267 }
268 
269 static void amdgpu_ctx_do_release(struct kref *ref)
270 {
271 	struct amdgpu_ctx *ctx;
272 	unsigned num_entities;
273 	u32 i;
274 
275 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
276 
277 	num_entities = 0;
278 	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
279 		num_entities += amdgpu_ctx_num_entities[i];
280 
281 	for (i = 0; i < num_entities; i++)
282 		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
283 
284 	amdgpu_ctx_fini(ref);
285 }
286 
287 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
288 {
289 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
290 	struct amdgpu_ctx *ctx;
291 
292 	mutex_lock(&mgr->lock);
293 	ctx = idr_remove(&mgr->ctx_handles, id);
294 	if (ctx)
295 		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
296 	mutex_unlock(&mgr->lock);
297 	return ctx ? 0 : -EINVAL;
298 }
299 
300 static int amdgpu_ctx_query(struct amdgpu_device *adev,
301 			    struct amdgpu_fpriv *fpriv, uint32_t id,
302 			    union drm_amdgpu_ctx_out *out)
303 {
304 	struct amdgpu_ctx *ctx;
305 	struct amdgpu_ctx_mgr *mgr;
306 	unsigned reset_counter;
307 
308 	if (!fpriv)
309 		return -EINVAL;
310 
311 	mgr = &fpriv->ctx_mgr;
312 	mutex_lock(&mgr->lock);
313 	ctx = idr_find(&mgr->ctx_handles, id);
314 	if (!ctx) {
315 		mutex_unlock(&mgr->lock);
316 		return -EINVAL;
317 	}
318 
319 	/* TODO: these two are always zero */
320 	out->state.flags = 0x0;
321 	out->state.hangs = 0x0;
322 
323 	/* determine if a GPU reset has occured since the last call */
324 	reset_counter = atomic_read(&adev->gpu_reset_counter);
325 	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
326 	if (ctx->reset_counter_query == reset_counter)
327 		out->state.reset_status = AMDGPU_CTX_NO_RESET;
328 	else
329 		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
330 	ctx->reset_counter_query = reset_counter;
331 
332 	mutex_unlock(&mgr->lock);
333 	return 0;
334 }
335 
336 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
337 	struct amdgpu_fpriv *fpriv, uint32_t id,
338 	union drm_amdgpu_ctx_out *out)
339 {
340 	struct amdgpu_ctx *ctx;
341 	struct amdgpu_ctx_mgr *mgr;
342 
343 	if (!fpriv)
344 		return -EINVAL;
345 
346 	mgr = &fpriv->ctx_mgr;
347 	mutex_lock(&mgr->lock);
348 	ctx = idr_find(&mgr->ctx_handles, id);
349 	if (!ctx) {
350 		mutex_unlock(&mgr->lock);
351 		return -EINVAL;
352 	}
353 
354 	out->state.flags = 0x0;
355 	out->state.hangs = 0x0;
356 
357 	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
358 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
359 
360 	if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
361 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
362 
363 	if (atomic_read(&ctx->guilty))
364 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
365 
366 	mutex_unlock(&mgr->lock);
367 	return 0;
368 }
369 
370 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
371 		     struct drm_file *filp)
372 {
373 	int r;
374 	uint32_t id;
375 	enum drm_sched_priority priority;
376 
377 	union drm_amdgpu_ctx *args = data;
378 	struct amdgpu_device *adev = dev->dev_private;
379 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
380 
381 	r = 0;
382 	id = args->in.ctx_id;
383 	priority = amdgpu_to_sched_priority(args->in.priority);
384 
385 	/* For backwards compatibility reasons, we need to accept
386 	 * ioctls with garbage in the priority field */
387 	if (priority == DRM_SCHED_PRIORITY_INVALID)
388 		priority = DRM_SCHED_PRIORITY_NORMAL;
389 
390 	switch (args->in.op) {
391 	case AMDGPU_CTX_OP_ALLOC_CTX:
392 		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
393 		args->out.alloc.ctx_id = id;
394 		break;
395 	case AMDGPU_CTX_OP_FREE_CTX:
396 		r = amdgpu_ctx_free(fpriv, id);
397 		break;
398 	case AMDGPU_CTX_OP_QUERY_STATE:
399 		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
400 		break;
401 	case AMDGPU_CTX_OP_QUERY_STATE2:
402 		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
403 		break;
404 	default:
405 		return -EINVAL;
406 	}
407 
408 	return r;
409 }
410 
411 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
412 {
413 	struct amdgpu_ctx *ctx;
414 	struct amdgpu_ctx_mgr *mgr;
415 
416 	if (!fpriv)
417 		return NULL;
418 
419 	mgr = &fpriv->ctx_mgr;
420 
421 	mutex_lock(&mgr->lock);
422 	ctx = idr_find(&mgr->ctx_handles, id);
423 	if (ctx)
424 		kref_get(&ctx->refcount);
425 	mutex_unlock(&mgr->lock);
426 	return ctx;
427 }
428 
429 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
430 {
431 	if (ctx == NULL)
432 		return -EINVAL;
433 
434 	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
435 	return 0;
436 }
437 
438 void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
439 			  struct drm_sched_entity *entity,
440 			  struct dma_fence *fence, uint64_t* handle)
441 {
442 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
443 	uint64_t seq = centity->sequence;
444 	struct dma_fence *other = NULL;
445 	unsigned idx = 0;
446 
447 	idx = seq & (amdgpu_sched_jobs - 1);
448 	other = centity->fences[idx];
449 	if (other)
450 		BUG_ON(!dma_fence_is_signaled(other));
451 
452 	dma_fence_get(fence);
453 
454 	spin_lock(&ctx->ring_lock);
455 	centity->fences[idx] = fence;
456 	centity->sequence++;
457 	spin_unlock(&ctx->ring_lock);
458 
459 	dma_fence_put(other);
460 	if (handle)
461 		*handle = seq;
462 }
463 
464 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
465 				       struct drm_sched_entity *entity,
466 				       uint64_t seq)
467 {
468 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
469 	struct dma_fence *fence;
470 
471 	spin_lock(&ctx->ring_lock);
472 
473 	if (seq == ~0ull)
474 		seq = centity->sequence - 1;
475 
476 	if (seq >= centity->sequence) {
477 		spin_unlock(&ctx->ring_lock);
478 		return ERR_PTR(-EINVAL);
479 	}
480 
481 
482 	if (seq + amdgpu_sched_jobs < centity->sequence) {
483 		spin_unlock(&ctx->ring_lock);
484 		return NULL;
485 	}
486 
487 	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
488 	spin_unlock(&ctx->ring_lock);
489 
490 	return fence;
491 }
492 
493 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
494 				  enum drm_sched_priority priority)
495 {
496 	unsigned num_entities = amdgput_ctx_total_num_entities();
497 	enum drm_sched_priority ctx_prio;
498 	unsigned i;
499 
500 	ctx->override_priority = priority;
501 
502 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
503 			ctx->init_priority : ctx->override_priority;
504 
505 	for (i = 0; i < num_entities; i++) {
506 		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
507 
508 		drm_sched_entity_set_priority(entity, ctx_prio);
509 	}
510 }
511 
512 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
513 			       struct drm_sched_entity *entity)
514 {
515 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
516 	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
517 	struct dma_fence *other = centity->fences[idx];
518 
519 	if (other) {
520 		signed long r;
521 		r = dma_fence_wait(other, true);
522 		if (r < 0) {
523 			if (r != -ERESTARTSYS)
524 				DRM_ERROR("Error (%ld) waiting for fence!\n", r);
525 
526 			return r;
527 		}
528 	}
529 
530 	return 0;
531 }
532 
533 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
534 {
535 	mutex_init(&mgr->lock);
536 	idr_init(&mgr->ctx_handles);
537 }
538 
539 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
540 {
541 	unsigned num_entities = amdgput_ctx_total_num_entities();
542 	struct amdgpu_ctx *ctx;
543 	struct idr *idp;
544 	uint32_t id, i;
545 	long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
546 
547 	idp = &mgr->ctx_handles;
548 
549 	mutex_lock(&mgr->lock);
550 	idr_for_each_entry(idp, ctx, id) {
551 
552 		if (!ctx->adev) {
553 			mutex_unlock(&mgr->lock);
554 			return;
555 		}
556 
557 		for (i = 0; i < num_entities; i++) {
558 			struct drm_sched_entity *entity;
559 
560 			entity = &ctx->entities[0][i].entity;
561 			max_wait = drm_sched_entity_flush(entity, max_wait);
562 		}
563 	}
564 	mutex_unlock(&mgr->lock);
565 }
566 
567 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
568 {
569 	unsigned num_entities = amdgput_ctx_total_num_entities();
570 	struct amdgpu_ctx *ctx;
571 	struct idr *idp;
572 	uint32_t id, i;
573 
574 	idp = &mgr->ctx_handles;
575 
576 	idr_for_each_entry(idp, ctx, id) {
577 
578 		if (!ctx->adev)
579 			return;
580 
581 		if (kref_read(&ctx->refcount) != 1) {
582 			DRM_ERROR("ctx %p is still alive\n", ctx);
583 			continue;
584 		}
585 
586 		for (i = 0; i < num_entities; i++)
587 			drm_sched_entity_fini(&ctx->entities[0][i].entity);
588 	}
589 }
590 
591 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
592 {
593 	struct amdgpu_ctx *ctx;
594 	struct idr *idp;
595 	uint32_t id;
596 
597 	amdgpu_ctx_mgr_entity_fini(mgr);
598 
599 	idp = &mgr->ctx_handles;
600 
601 	idr_for_each_entry(idp, ctx, id) {
602 		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
603 			DRM_ERROR("ctx %p is still alive\n", ctx);
604 	}
605 
606 	idr_destroy(&mgr->ctx_handles);
607 	mutex_destroy(&mgr->lock);
608 }
609