xref: /openbmc/linux/drivers/gpu/drm/i915/gvt/execlist.c (revision 2f0f2441b4a10948e2ec042b48fef13680387f7c)
1 /*
2  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Zhiyuan Lv <zhiyuan.lv@intel.com>
25  *    Zhi Wang <zhi.a.wang@intel.com>
26  *
27  * Contributors:
28  *    Min He <min.he@intel.com>
29  *    Bing Niu <bing.niu@intel.com>
30  *    Ping Gao <ping.a.gao@intel.com>
31  *    Tina Zhang <tina.zhang@intel.com>
32  *
33  */
34 
35 #include "i915_drv.h"
36 #include "gvt.h"
37 
38 #define _EL_OFFSET_STATUS       0x234
39 #define _EL_OFFSET_STATUS_BUF   0x370
40 #define _EL_OFFSET_STATUS_PTR   0x3A0
41 
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 	(gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
44 
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 		((a)->lrca == (b)->lrca))
48 
49 static int context_switch_events[] = {
50 	[RCS0]  = RCS_AS_CONTEXT_SWITCH,
51 	[BCS0]  = BCS_AS_CONTEXT_SWITCH,
52 	[VCS0]  = VCS_AS_CONTEXT_SWITCH,
53 	[VCS1]  = VCS2_AS_CONTEXT_SWITCH,
54 	[VECS0] = VECS_AS_CONTEXT_SWITCH,
55 };
56 
57 static int ring_id_to_context_switch_event(unsigned int ring_id)
58 {
59 	if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events)))
60 		return -EINVAL;
61 
62 	return context_switch_events[ring_id];
63 }
64 
65 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
66 {
67 	gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
68 			execlist->running_slot ?
69 			execlist->running_slot->index : -1,
70 			execlist->running_context ?
71 			execlist->running_context->context_id : 0,
72 			execlist->pending_slot ?
73 			execlist->pending_slot->index : -1);
74 
75 	execlist->running_slot = execlist->pending_slot;
76 	execlist->pending_slot = NULL;
77 	execlist->running_context = execlist->running_context ?
78 		&execlist->running_slot->ctx[0] : NULL;
79 
80 	gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
81 			execlist->running_slot ?
82 			execlist->running_slot->index : -1,
83 			execlist->running_context ?
84 			execlist->running_context->context_id : 0,
85 			execlist->pending_slot ?
86 			execlist->pending_slot->index : -1);
87 }
88 
89 static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
90 {
91 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
92 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
93 	struct execlist_ctx_descriptor_format *desc = execlist->running_context;
94 	struct intel_vgpu *vgpu = execlist->vgpu;
95 	struct execlist_status_format status;
96 	int ring_id = execlist->ring_id;
97 	u32 status_reg = execlist_ring_mmio(vgpu->gvt,
98 			ring_id, _EL_OFFSET_STATUS);
99 
100 	status.ldw = vgpu_vreg(vgpu, status_reg);
101 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
102 
103 	if (running) {
104 		status.current_execlist_pointer = !!running->index;
105 		status.execlist_write_pointer = !!!running->index;
106 		status.execlist_0_active = status.execlist_0_valid =
107 			!!!(running->index);
108 		status.execlist_1_active = status.execlist_1_valid =
109 			!!(running->index);
110 	} else {
111 		status.context_id = 0;
112 		status.execlist_0_active = status.execlist_0_valid = 0;
113 		status.execlist_1_active = status.execlist_1_valid = 0;
114 	}
115 
116 	status.context_id = desc ? desc->context_id : 0;
117 	status.execlist_queue_full = !!(pending);
118 
119 	vgpu_vreg(vgpu, status_reg) = status.ldw;
120 	vgpu_vreg(vgpu, status_reg + 4) = status.udw;
121 
122 	gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
123 		vgpu->id, status_reg, status.ldw, status.udw);
124 }
125 
126 static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
127 		struct execlist_context_status_format *status,
128 		bool trigger_interrupt_later)
129 {
130 	struct intel_vgpu *vgpu = execlist->vgpu;
131 	int ring_id = execlist->ring_id;
132 	struct execlist_context_status_pointer_format ctx_status_ptr;
133 	u32 write_pointer;
134 	u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
135 	unsigned long hwsp_gpa;
136 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
137 
138 	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
139 			_EL_OFFSET_STATUS_PTR);
140 	ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
141 			_EL_OFFSET_STATUS_BUF);
142 
143 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
144 
145 	write_pointer = ctx_status_ptr.write_ptr;
146 
147 	if (write_pointer == 0x7)
148 		write_pointer = 0;
149 	else {
150 		++write_pointer;
151 		write_pointer %= 0x6;
152 	}
153 
154 	offset = ctx_status_buf_reg + write_pointer * 8;
155 
156 	vgpu_vreg(vgpu, offset) = status->ldw;
157 	vgpu_vreg(vgpu, offset + 4) = status->udw;
158 
159 	ctx_status_ptr.write_ptr = write_pointer;
160 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
161 
162 	/* Update the CSB and CSB write pointer in HWSP */
163 	hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
164 					 vgpu->hws_pga[ring_id]);
165 	if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
166 		intel_gvt_hypervisor_write_gpa(vgpu,
167 			hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 +
168 			write_pointer * 8,
169 			status, 8);
170 		intel_gvt_hypervisor_write_gpa(vgpu,
171 			hwsp_gpa +
172 			intel_hws_csb_write_index(dev_priv) * 4,
173 			&write_pointer, 4);
174 	}
175 
176 	gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
177 		vgpu->id, write_pointer, offset, status->ldw, status->udw);
178 
179 	if (trigger_interrupt_later)
180 		return;
181 
182 	intel_vgpu_trigger_virtual_event(vgpu,
183 			ring_id_to_context_switch_event(execlist->ring_id));
184 }
185 
186 static int emulate_execlist_ctx_schedule_out(
187 		struct intel_vgpu_execlist *execlist,
188 		struct execlist_ctx_descriptor_format *ctx)
189 {
190 	struct intel_vgpu *vgpu = execlist->vgpu;
191 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
192 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
193 	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
194 	struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
195 	struct execlist_context_status_format status;
196 
197 	memset(&status, 0, sizeof(status));
198 
199 	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
200 
201 	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
202 		gvt_vgpu_err("schedule out context is not running context,"
203 				"ctx id %x running ctx id %x\n",
204 				ctx->context_id,
205 				execlist->running_context->context_id);
206 		return -EINVAL;
207 	}
208 
209 	/* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
210 	if (valid_context(ctx1) && same_context(ctx0, ctx)) {
211 		gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
212 
213 		execlist->running_context = ctx1;
214 
215 		emulate_execlist_status(execlist);
216 
217 		status.context_complete = status.element_switch = 1;
218 		status.context_id = ctx->context_id;
219 
220 		emulate_csb_update(execlist, &status, false);
221 		/*
222 		 * ctx1 is not valid, ctx == ctx0
223 		 * ctx1 is valid, ctx1 == ctx
224 		 *	--> last element is finished
225 		 * emulate:
226 		 *	active-to-idle if there is *no* pending execlist
227 		 *	context-complete if there *is* pending execlist
228 		 */
229 	} else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
230 			|| (valid_context(ctx1) && same_context(ctx1, ctx))) {
231 		gvt_dbg_el("need to switch virtual execlist slot\n");
232 
233 		switch_virtual_execlist_slot(execlist);
234 
235 		emulate_execlist_status(execlist);
236 
237 		status.context_complete = status.active_to_idle = 1;
238 		status.context_id = ctx->context_id;
239 
240 		if (!pending) {
241 			emulate_csb_update(execlist, &status, false);
242 		} else {
243 			emulate_csb_update(execlist, &status, true);
244 
245 			memset(&status, 0, sizeof(status));
246 
247 			status.idle_to_active = 1;
248 			status.context_id = 0;
249 
250 			emulate_csb_update(execlist, &status, false);
251 		}
252 	} else {
253 		WARN_ON(1);
254 		return -EINVAL;
255 	}
256 
257 	return 0;
258 }
259 
260 static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
261 		struct intel_vgpu_execlist *execlist)
262 {
263 	struct intel_vgpu *vgpu = execlist->vgpu;
264 	int ring_id = execlist->ring_id;
265 	u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
266 			_EL_OFFSET_STATUS);
267 	struct execlist_status_format status;
268 
269 	status.ldw = vgpu_vreg(vgpu, status_reg);
270 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
271 
272 	if (status.execlist_queue_full) {
273 		gvt_vgpu_err("virtual execlist slots are full\n");
274 		return NULL;
275 	}
276 
277 	return &execlist->slot[status.execlist_write_pointer];
278 }
279 
280 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
281 		struct execlist_ctx_descriptor_format ctx[2])
282 {
283 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
284 	struct intel_vgpu_execlist_slot *slot =
285 		get_next_execlist_slot(execlist);
286 
287 	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
288 	struct execlist_context_status_format status;
289 	struct intel_vgpu *vgpu = execlist->vgpu;
290 
291 	gvt_dbg_el("emulate schedule-in\n");
292 
293 	if (!slot) {
294 		gvt_vgpu_err("no available execlist slot\n");
295 		return -EINVAL;
296 	}
297 
298 	memset(&status, 0, sizeof(status));
299 	memset(slot->ctx, 0, sizeof(slot->ctx));
300 
301 	slot->ctx[0] = ctx[0];
302 	slot->ctx[1] = ctx[1];
303 
304 	gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
305 			slot->index, ctx[0].context_id,
306 			ctx[1].context_id);
307 
308 	/*
309 	 * no running execlist, make this write bundle as running execlist
310 	 * -> idle-to-active
311 	 */
312 	if (!running) {
313 		gvt_dbg_el("no current running execlist\n");
314 
315 		execlist->running_slot = slot;
316 		execlist->pending_slot = NULL;
317 		execlist->running_context = &slot->ctx[0];
318 
319 		gvt_dbg_el("running slot index %d running context %x\n",
320 				execlist->running_slot->index,
321 				execlist->running_context->context_id);
322 
323 		emulate_execlist_status(execlist);
324 
325 		status.idle_to_active = 1;
326 		status.context_id = 0;
327 
328 		emulate_csb_update(execlist, &status, false);
329 		return 0;
330 	}
331 
332 	ctx0 = &running->ctx[0];
333 	ctx1 = &running->ctx[1];
334 
335 	gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
336 		running->index, ctx0->context_id, ctx1->context_id);
337 
338 	/*
339 	 * already has an running execlist
340 	 *	a. running ctx1 is valid,
341 	 *	   ctx0 is finished, and running ctx1 == new execlist ctx[0]
342 	 *	b. running ctx1 is not valid,
343 	 *	   ctx0 == new execlist ctx[0]
344 	 * ----> lite-restore + preempted
345 	 */
346 	if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
347 		/* condition a */
348 		(!same_context(ctx0, execlist->running_context))) ||
349 			(!valid_context(ctx1) &&
350 			 same_context(ctx0, &slot->ctx[0]))) { /* condition b */
351 		gvt_dbg_el("need to switch virtual execlist slot\n");
352 
353 		execlist->pending_slot = slot;
354 		switch_virtual_execlist_slot(execlist);
355 
356 		emulate_execlist_status(execlist);
357 
358 		status.lite_restore = status.preempted = 1;
359 		status.context_id = ctx[0].context_id;
360 
361 		emulate_csb_update(execlist, &status, false);
362 	} else {
363 		gvt_dbg_el("emulate as pending slot\n");
364 		/*
365 		 * otherwise
366 		 * --> emulate pending execlist exist + but no preemption case
367 		 */
368 		execlist->pending_slot = slot;
369 		emulate_execlist_status(execlist);
370 	}
371 	return 0;
372 }
373 
374 #define get_desc_from_elsp_dwords(ed, i) \
375 	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
376 
377 static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
378 {
379 	struct intel_vgpu *vgpu = workload->vgpu;
380 	struct intel_vgpu_submission *s = &vgpu->submission;
381 	struct execlist_ctx_descriptor_format ctx[2];
382 	int ring_id = workload->ring_id;
383 	int ret;
384 
385 	if (!workload->emulate_schedule_in)
386 		return 0;
387 
388 	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
389 	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
390 
391 	ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx);
392 	if (ret) {
393 		gvt_vgpu_err("fail to emulate execlist schedule in\n");
394 		return ret;
395 	}
396 	return 0;
397 }
398 
399 static int complete_execlist_workload(struct intel_vgpu_workload *workload)
400 {
401 	struct intel_vgpu *vgpu = workload->vgpu;
402 	int ring_id = workload->ring_id;
403 	struct intel_vgpu_submission *s = &vgpu->submission;
404 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
405 	struct intel_vgpu_workload *next_workload;
406 	struct list_head *next = workload_q_head(vgpu, ring_id)->next;
407 	bool lite_restore = false;
408 	int ret = 0;
409 
410 	gvt_dbg_el("complete workload %p status %d\n", workload,
411 			workload->status);
412 
413 	if (workload->status || (vgpu->resetting_eng & BIT(ring_id)))
414 		goto out;
415 
416 	if (!list_empty(workload_q_head(vgpu, ring_id))) {
417 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
418 
419 		next_workload = container_of(next,
420 				struct intel_vgpu_workload, list);
421 		this_desc = &workload->ctx_desc;
422 		next_desc = &next_workload->ctx_desc;
423 
424 		lite_restore = same_context(this_desc, next_desc);
425 	}
426 
427 	if (lite_restore) {
428 		gvt_dbg_el("next context == current - no schedule-out\n");
429 		goto out;
430 	}
431 
432 	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
433 out:
434 	intel_vgpu_unpin_mm(workload->shadow_mm);
435 	intel_vgpu_destroy_workload(workload);
436 	return ret;
437 }
438 
439 static int submit_context(struct intel_vgpu *vgpu, int ring_id,
440 		struct execlist_ctx_descriptor_format *desc,
441 		bool emulate_schedule_in)
442 {
443 	struct intel_vgpu_submission *s = &vgpu->submission;
444 	struct intel_vgpu_workload *workload = NULL;
445 
446 	workload = intel_vgpu_create_workload(vgpu, ring_id, desc);
447 	if (IS_ERR(workload))
448 		return PTR_ERR(workload);
449 
450 	workload->prepare = prepare_execlist_workload;
451 	workload->complete = complete_execlist_workload;
452 	workload->emulate_schedule_in = emulate_schedule_in;
453 
454 	if (emulate_schedule_in)
455 		workload->elsp_dwords = s->execlist[ring_id].elsp_dwords;
456 
457 	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
458 			emulate_schedule_in);
459 
460 	intel_vgpu_queue_workload(workload);
461 	return 0;
462 }
463 
464 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
465 {
466 	struct intel_vgpu_submission *s = &vgpu->submission;
467 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
468 	struct execlist_ctx_descriptor_format *desc[2];
469 	int i, ret;
470 
471 	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
472 	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
473 
474 	if (!desc[0]->valid) {
475 		gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
476 		goto inv_desc;
477 	}
478 
479 	for (i = 0; i < ARRAY_SIZE(desc); i++) {
480 		if (!desc[i]->valid)
481 			continue;
482 		if (!desc[i]->privilege_access) {
483 			gvt_vgpu_err("unexpected GGTT elsp submission\n");
484 			goto inv_desc;
485 		}
486 	}
487 
488 	/* submit workload */
489 	for (i = 0; i < ARRAY_SIZE(desc); i++) {
490 		if (!desc[i]->valid)
491 			continue;
492 		ret = submit_context(vgpu, ring_id, desc[i], i == 0);
493 		if (ret) {
494 			gvt_vgpu_err("failed to submit desc %d\n", i);
495 			return ret;
496 		}
497 	}
498 
499 	return 0;
500 
501 inv_desc:
502 	gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
503 		     desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
504 	return -EINVAL;
505 }
506 
507 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
508 {
509 	struct intel_vgpu_submission *s = &vgpu->submission;
510 	struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
511 	struct execlist_context_status_pointer_format ctx_status_ptr;
512 	u32 ctx_status_ptr_reg;
513 
514 	memset(execlist, 0, sizeof(*execlist));
515 
516 	execlist->vgpu = vgpu;
517 	execlist->ring_id = ring_id;
518 	execlist->slot[0].index = 0;
519 	execlist->slot[1].index = 1;
520 
521 	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
522 			_EL_OFFSET_STATUS_PTR);
523 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
524 	ctx_status_ptr.read_ptr = 0;
525 	ctx_status_ptr.write_ptr = 0x7;
526 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
527 }
528 
529 static void clean_execlist(struct intel_vgpu *vgpu,
530 			   intel_engine_mask_t engine_mask)
531 {
532 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
533 	struct intel_engine_cs *engine;
534 	struct intel_vgpu_submission *s = &vgpu->submission;
535 	intel_engine_mask_t tmp;
536 
537 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
538 		kfree(s->ring_scan_buffer[engine->id]);
539 		s->ring_scan_buffer[engine->id] = NULL;
540 		s->ring_scan_buffer_size[engine->id] = 0;
541 	}
542 }
543 
544 static void reset_execlist(struct intel_vgpu *vgpu,
545 			   intel_engine_mask_t engine_mask)
546 {
547 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
548 	struct intel_engine_cs *engine;
549 	intel_engine_mask_t tmp;
550 
551 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
552 		init_vgpu_execlist(vgpu, engine->id);
553 }
554 
555 static int init_execlist(struct intel_vgpu *vgpu,
556 			 intel_engine_mask_t engine_mask)
557 {
558 	reset_execlist(vgpu, engine_mask);
559 	return 0;
560 }
561 
562 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
563 	.name = "execlist",
564 	.init = init_execlist,
565 	.reset = reset_execlist,
566 	.clean = clean_execlist,
567 };
568