xref: /openbmc/linux/drivers/gpu/drm/i915/gvt/scheduler.c (revision 680ef72a)
1 /*
2  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Zhi Wang <zhi.a.wang@intel.com>
25  *
26  * Contributors:
27  *    Ping Gao <ping.a.gao@intel.com>
28  *    Tina Zhang <tina.zhang@intel.com>
29  *    Chanbin Du <changbin.du@intel.com>
30  *    Min He <min.he@intel.com>
31  *    Bing Niu <bing.niu@intel.com>
32  *    Zhenyu Wang <zhenyuw@linux.intel.com>
33  *
34  */
35 
36 #include <linux/kthread.h>
37 
38 #include "i915_drv.h"
39 #include "gvt.h"
40 
41 #define RING_CTX_OFF(x) \
42 	offsetof(struct execlist_ring_context, x)
43 
44 static void set_context_pdp_root_pointer(
45 		struct execlist_ring_context *ring_context,
46 		u32 pdp[8])
47 {
48 	struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW;
49 	int i;
50 
51 	for (i = 0; i < 8; i++)
52 		pdp_pair[i].val = pdp[7 - i];
53 }
54 
55 static int populate_shadow_context(struct intel_vgpu_workload *workload)
56 {
57 	struct intel_vgpu *vgpu = workload->vgpu;
58 	struct intel_gvt *gvt = vgpu->gvt;
59 	int ring_id = workload->ring_id;
60 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
61 	struct drm_i915_gem_object *ctx_obj =
62 		shadow_ctx->engine[ring_id].state->obj;
63 	struct execlist_ring_context *shadow_ring_context;
64 	struct page *page;
65 	void *dst;
66 	unsigned long context_gpa, context_page_num;
67 	int i;
68 
69 	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
70 			workload->ctx_desc.lrca);
71 
72 	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
73 
74 	context_page_num = context_page_num >> PAGE_SHIFT;
75 
76 	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
77 		context_page_num = 19;
78 
79 	i = 2;
80 
81 	while (i < context_page_num) {
82 		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
83 				(u32)((workload->ctx_desc.lrca + i) <<
84 				GTT_PAGE_SHIFT));
85 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
86 			gvt_vgpu_err("Invalid guest context descriptor\n");
87 			return -EINVAL;
88 		}
89 
90 		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
91 		dst = kmap(page);
92 		intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
93 				GTT_PAGE_SIZE);
94 		kunmap(page);
95 		i++;
96 	}
97 
98 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
99 	shadow_ring_context = kmap(page);
100 
101 #define COPY_REG(name) \
102 	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
103 		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
104 
105 	COPY_REG(ctx_ctrl);
106 	COPY_REG(ctx_timestamp);
107 
108 	if (ring_id == RCS) {
109 		COPY_REG(bb_per_ctx_ptr);
110 		COPY_REG(rcs_indirect_ctx);
111 		COPY_REG(rcs_indirect_ctx_offset);
112 	}
113 #undef COPY_REG
114 
115 	set_context_pdp_root_pointer(shadow_ring_context,
116 				     workload->shadow_mm->shadow_page_table);
117 
118 	intel_gvt_hypervisor_read_gpa(vgpu,
119 			workload->ring_context_gpa +
120 			sizeof(*shadow_ring_context),
121 			(void *)shadow_ring_context +
122 			sizeof(*shadow_ring_context),
123 			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
124 
125 	kunmap(page);
126 	return 0;
127 }
128 
129 static inline bool is_gvt_request(struct drm_i915_gem_request *req)
130 {
131 	return i915_gem_context_force_single_submission(req->ctx);
132 }
133 
134 static int shadow_context_status_change(struct notifier_block *nb,
135 		unsigned long action, void *data)
136 {
137 	struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
138 	struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
139 				shadow_ctx_notifier_block[req->engine->id]);
140 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
141 	enum intel_engine_id ring_id = req->engine->id;
142 	struct intel_vgpu_workload *workload;
143 
144 	if (!is_gvt_request(req)) {
145 		spin_lock_bh(&scheduler->mmio_context_lock);
146 		if (action == INTEL_CONTEXT_SCHEDULE_IN &&
147 		    scheduler->engine_owner[ring_id]) {
148 			/* Switch ring from vGPU to host. */
149 			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
150 					      NULL, ring_id);
151 			scheduler->engine_owner[ring_id] = NULL;
152 		}
153 		spin_unlock_bh(&scheduler->mmio_context_lock);
154 
155 		return NOTIFY_OK;
156 	}
157 
158 	workload = scheduler->current_workload[ring_id];
159 	if (unlikely(!workload))
160 		return NOTIFY_OK;
161 
162 	switch (action) {
163 	case INTEL_CONTEXT_SCHEDULE_IN:
164 		spin_lock_bh(&scheduler->mmio_context_lock);
165 		if (workload->vgpu != scheduler->engine_owner[ring_id]) {
166 			/* Switch ring from host to vGPU or vGPU to vGPU. */
167 			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
168 					      workload->vgpu, ring_id);
169 			scheduler->engine_owner[ring_id] = workload->vgpu;
170 		} else
171 			gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
172 				      ring_id, workload->vgpu->id);
173 		spin_unlock_bh(&scheduler->mmio_context_lock);
174 		atomic_set(&workload->shadow_ctx_active, 1);
175 		break;
176 	case INTEL_CONTEXT_SCHEDULE_OUT:
177 	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
178 		atomic_set(&workload->shadow_ctx_active, 0);
179 		break;
180 	default:
181 		WARN_ON(1);
182 		return NOTIFY_OK;
183 	}
184 	wake_up(&workload->shadow_ctx_status_wq);
185 	return NOTIFY_OK;
186 }
187 
188 static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
189 		struct intel_engine_cs *engine)
190 {
191 	struct intel_context *ce = &ctx->engine[engine->id];
192 	u64 desc = 0;
193 
194 	desc = ce->lrc_desc;
195 
196 	/* Update bits 0-11 of the context descriptor which includes flags
197 	 * like GEN8_CTX_* cached in desc_template
198 	 */
199 	desc &= U64_MAX << 12;
200 	desc |= ctx->desc_template & ((1ULL << 12) - 1);
201 
202 	ce->lrc_desc = desc;
203 }
204 
205 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
206 {
207 	struct intel_vgpu *vgpu = workload->vgpu;
208 	void *shadow_ring_buffer_va;
209 	u32 *cs;
210 
211 	/* allocate shadow ring buffer */
212 	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
213 	if (IS_ERR(cs)) {
214 		gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
215 			workload->rb_len);
216 		return PTR_ERR(cs);
217 	}
218 
219 	shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
220 
221 	/* get shadow ring buffer va */
222 	workload->shadow_ring_buffer_va = cs;
223 
224 	memcpy(cs, shadow_ring_buffer_va,
225 			workload->rb_len);
226 
227 	cs += workload->rb_len / sizeof(u32);
228 	intel_ring_advance(workload->req, cs);
229 
230 	return 0;
231 }
232 
233 void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
234 {
235 	if (!wa_ctx->indirect_ctx.obj)
236 		return;
237 
238 	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
239 	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
240 }
241 
242 /**
243  * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
244  * shadow it as well, include ringbuffer,wa_ctx and ctx.
245  * @workload: an abstract entity for each execlist submission.
246  *
247  * This function is called before the workload submitting to i915, to make
248  * sure the content of the workload is valid.
249  */
250 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
251 {
252 	int ring_id = workload->ring_id;
253 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
254 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
255 	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
256 	struct drm_i915_gem_request *rq;
257 	struct intel_vgpu *vgpu = workload->vgpu;
258 	struct intel_ring *ring;
259 	int ret;
260 
261 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
262 
263 	if (workload->shadowed)
264 		return 0;
265 
266 	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
267 	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
268 				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
269 
270 	if (!test_and_set_bit(ring_id, vgpu->shadow_ctx_desc_updated))
271 		shadow_context_descriptor_update(shadow_ctx,
272 					dev_priv->engine[ring_id]);
273 
274 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
275 	if (ret)
276 		goto err_scan;
277 
278 	if ((workload->ring_id == RCS) &&
279 	    (workload->wa_ctx.indirect_ctx.size != 0)) {
280 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
281 		if (ret)
282 			goto err_scan;
283 	}
284 
285 	/* pin shadow context by gvt even the shadow context will be pinned
286 	 * when i915 alloc request. That is because gvt will update the guest
287 	 * context from shadow context when workload is completed, and at that
288 	 * moment, i915 may already unpined the shadow context to make the
289 	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
290 	 * the guest context, gvt can unpin the shadow_ctx safely.
291 	 */
292 	ring = engine->context_pin(engine, shadow_ctx);
293 	if (IS_ERR(ring)) {
294 		ret = PTR_ERR(ring);
295 		gvt_vgpu_err("fail to pin shadow context\n");
296 		goto err_shadow;
297 	}
298 
299 	ret = populate_shadow_context(workload);
300 	if (ret)
301 		goto err_unpin;
302 
303 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
304 	if (IS_ERR(rq)) {
305 		gvt_vgpu_err("fail to allocate gem request\n");
306 		ret = PTR_ERR(rq);
307 		goto err_unpin;
308 	}
309 
310 	gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
311 
312 	workload->req = i915_gem_request_get(rq);
313 	ret = copy_workload_to_ring_buffer(workload);
314 	if (ret)
315 		goto err_unpin;
316 	workload->shadowed = true;
317 	return 0;
318 
319 err_unpin:
320 	engine->context_unpin(engine, shadow_ctx);
321 err_shadow:
322 	release_shadow_wa_ctx(&workload->wa_ctx);
323 err_scan:
324 	return ret;
325 }
326 
327 static int dispatch_workload(struct intel_vgpu_workload *workload)
328 {
329 	int ring_id = workload->ring_id;
330 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
331 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
332 	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
333 	int ret = 0;
334 
335 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
336 		ring_id, workload);
337 
338 	mutex_lock(&dev_priv->drm.struct_mutex);
339 
340 	ret = intel_gvt_scan_and_shadow_workload(workload);
341 	if (ret)
342 		goto out;
343 
344 	if (workload->prepare) {
345 		ret = workload->prepare(workload);
346 		if (ret) {
347 			engine->context_unpin(engine, shadow_ctx);
348 			goto out;
349 		}
350 	}
351 
352 out:
353 	if (ret)
354 		workload->status = ret;
355 
356 	if (!IS_ERR_OR_NULL(workload->req)) {
357 		gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
358 				ring_id, workload->req);
359 		i915_add_request(workload->req);
360 		workload->dispatched = true;
361 	}
362 
363 	mutex_unlock(&dev_priv->drm.struct_mutex);
364 	return ret;
365 }
366 
367 static struct intel_vgpu_workload *pick_next_workload(
368 		struct intel_gvt *gvt, int ring_id)
369 {
370 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
371 	struct intel_vgpu_workload *workload = NULL;
372 
373 	mutex_lock(&gvt->lock);
374 
375 	/*
376 	 * no current vgpu / will be scheduled out / no workload
377 	 * bail out
378 	 */
379 	if (!scheduler->current_vgpu) {
380 		gvt_dbg_sched("ring id %d stop - no current vgpu\n", ring_id);
381 		goto out;
382 	}
383 
384 	if (scheduler->need_reschedule) {
385 		gvt_dbg_sched("ring id %d stop - will reschedule\n", ring_id);
386 		goto out;
387 	}
388 
389 	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
390 		goto out;
391 
392 	/*
393 	 * still have current workload, maybe the workload disptacher
394 	 * fail to submit it for some reason, resubmit it.
395 	 */
396 	if (scheduler->current_workload[ring_id]) {
397 		workload = scheduler->current_workload[ring_id];
398 		gvt_dbg_sched("ring id %d still have current workload %p\n",
399 				ring_id, workload);
400 		goto out;
401 	}
402 
403 	/*
404 	 * pick a workload as current workload
405 	 * once current workload is set, schedule policy routines
406 	 * will wait the current workload is finished when trying to
407 	 * schedule out a vgpu.
408 	 */
409 	scheduler->current_workload[ring_id] = container_of(
410 			workload_q_head(scheduler->current_vgpu, ring_id)->next,
411 			struct intel_vgpu_workload, list);
412 
413 	workload = scheduler->current_workload[ring_id];
414 
415 	gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload);
416 
417 	atomic_inc(&workload->vgpu->running_workload_num);
418 out:
419 	mutex_unlock(&gvt->lock);
420 	return workload;
421 }
422 
423 static void update_guest_context(struct intel_vgpu_workload *workload)
424 {
425 	struct intel_vgpu *vgpu = workload->vgpu;
426 	struct intel_gvt *gvt = vgpu->gvt;
427 	int ring_id = workload->ring_id;
428 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
429 	struct drm_i915_gem_object *ctx_obj =
430 		shadow_ctx->engine[ring_id].state->obj;
431 	struct execlist_ring_context *shadow_ring_context;
432 	struct page *page;
433 	void *src;
434 	unsigned long context_gpa, context_page_num;
435 	int i;
436 
437 	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
438 			workload->ctx_desc.lrca);
439 
440 	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
441 
442 	context_page_num = context_page_num >> PAGE_SHIFT;
443 
444 	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
445 		context_page_num = 19;
446 
447 	i = 2;
448 
449 	while (i < context_page_num) {
450 		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
451 				(u32)((workload->ctx_desc.lrca + i) <<
452 					GTT_PAGE_SHIFT));
453 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
454 			gvt_vgpu_err("invalid guest context descriptor\n");
455 			return;
456 		}
457 
458 		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
459 		src = kmap(page);
460 		intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
461 				GTT_PAGE_SIZE);
462 		kunmap(page);
463 		i++;
464 	}
465 
466 	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
467 		RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
468 
469 	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
470 	shadow_ring_context = kmap(page);
471 
472 #define COPY_REG(name) \
473 	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
474 		RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
475 
476 	COPY_REG(ctx_ctrl);
477 	COPY_REG(ctx_timestamp);
478 
479 #undef COPY_REG
480 
481 	intel_gvt_hypervisor_write_gpa(vgpu,
482 			workload->ring_context_gpa +
483 			sizeof(*shadow_ring_context),
484 			(void *)shadow_ring_context +
485 			sizeof(*shadow_ring_context),
486 			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
487 
488 	kunmap(page);
489 }
490 
491 static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
492 {
493 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
494 	struct intel_vgpu_workload *workload;
495 	struct intel_vgpu *vgpu;
496 	int event;
497 
498 	mutex_lock(&gvt->lock);
499 
500 	workload = scheduler->current_workload[ring_id];
501 	vgpu = workload->vgpu;
502 
503 	/* For the workload w/ request, needs to wait for the context
504 	 * switch to make sure request is completed.
505 	 * For the workload w/o request, directly complete the workload.
506 	 */
507 	if (workload->req) {
508 		struct drm_i915_private *dev_priv =
509 			workload->vgpu->gvt->dev_priv;
510 		struct intel_engine_cs *engine =
511 			dev_priv->engine[workload->ring_id];
512 		wait_event(workload->shadow_ctx_status_wq,
513 			   !atomic_read(&workload->shadow_ctx_active));
514 
515 		/* If this request caused GPU hang, req->fence.error will
516 		 * be set to -EIO. Use -EIO to set workload status so
517 		 * that when this request caused GPU hang, didn't trigger
518 		 * context switch interrupt to guest.
519 		 */
520 		if (likely(workload->status == -EINPROGRESS)) {
521 			if (workload->req->fence.error == -EIO)
522 				workload->status = -EIO;
523 			else
524 				workload->status = 0;
525 		}
526 
527 		i915_gem_request_put(fetch_and_zero(&workload->req));
528 
529 		if (!workload->status && !(vgpu->resetting_eng &
530 					   ENGINE_MASK(ring_id))) {
531 			update_guest_context(workload);
532 
533 			for_each_set_bit(event, workload->pending_events,
534 					 INTEL_GVT_EVENT_MAX)
535 				intel_vgpu_trigger_virtual_event(vgpu, event);
536 		}
537 		mutex_lock(&dev_priv->drm.struct_mutex);
538 		/* unpin shadow ctx as the shadow_ctx update is done */
539 		engine->context_unpin(engine, workload->vgpu->shadow_ctx);
540 		mutex_unlock(&dev_priv->drm.struct_mutex);
541 	}
542 
543 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
544 			ring_id, workload, workload->status);
545 
546 	scheduler->current_workload[ring_id] = NULL;
547 
548 	list_del_init(&workload->list);
549 	workload->complete(workload);
550 
551 	atomic_dec(&vgpu->running_workload_num);
552 	wake_up(&scheduler->workload_complete_wq);
553 
554 	if (gvt->scheduler.need_reschedule)
555 		intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
556 
557 	mutex_unlock(&gvt->lock);
558 }
559 
560 struct workload_thread_param {
561 	struct intel_gvt *gvt;
562 	int ring_id;
563 };
564 
565 static int workload_thread(void *priv)
566 {
567 	struct workload_thread_param *p = (struct workload_thread_param *)priv;
568 	struct intel_gvt *gvt = p->gvt;
569 	int ring_id = p->ring_id;
570 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
571 	struct intel_vgpu_workload *workload = NULL;
572 	struct intel_vgpu *vgpu = NULL;
573 	int ret;
574 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
575 			|| IS_KABYLAKE(gvt->dev_priv);
576 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
577 
578 	kfree(p);
579 
580 	gvt_dbg_core("workload thread for ring %d started\n", ring_id);
581 
582 	while (!kthread_should_stop()) {
583 		add_wait_queue(&scheduler->waitq[ring_id], &wait);
584 		do {
585 			workload = pick_next_workload(gvt, ring_id);
586 			if (workload)
587 				break;
588 			wait_woken(&wait, TASK_INTERRUPTIBLE,
589 				   MAX_SCHEDULE_TIMEOUT);
590 		} while (!kthread_should_stop());
591 		remove_wait_queue(&scheduler->waitq[ring_id], &wait);
592 
593 		if (!workload)
594 			break;
595 
596 		gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
597 				workload->ring_id, workload,
598 				workload->vgpu->id);
599 
600 		intel_runtime_pm_get(gvt->dev_priv);
601 
602 		gvt_dbg_sched("ring id %d will dispatch workload %p\n",
603 				workload->ring_id, workload);
604 
605 		if (need_force_wake)
606 			intel_uncore_forcewake_get(gvt->dev_priv,
607 					FORCEWAKE_ALL);
608 
609 		mutex_lock(&gvt->lock);
610 		ret = dispatch_workload(workload);
611 		mutex_unlock(&gvt->lock);
612 
613 		if (ret) {
614 			vgpu = workload->vgpu;
615 			gvt_vgpu_err("fail to dispatch workload, skip\n");
616 			goto complete;
617 		}
618 
619 		gvt_dbg_sched("ring id %d wait workload %p\n",
620 				workload->ring_id, workload);
621 		i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
622 
623 complete:
624 		gvt_dbg_sched("will complete workload %p, status: %d\n",
625 				workload, workload->status);
626 
627 		complete_current_workload(gvt, ring_id);
628 
629 		if (need_force_wake)
630 			intel_uncore_forcewake_put(gvt->dev_priv,
631 					FORCEWAKE_ALL);
632 
633 		intel_runtime_pm_put(gvt->dev_priv);
634 	}
635 	return 0;
636 }
637 
638 void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
639 {
640 	struct intel_gvt *gvt = vgpu->gvt;
641 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
642 
643 	if (atomic_read(&vgpu->running_workload_num)) {
644 		gvt_dbg_sched("wait vgpu idle\n");
645 
646 		wait_event(scheduler->workload_complete_wq,
647 				!atomic_read(&vgpu->running_workload_num));
648 	}
649 }
650 
651 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
652 {
653 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
654 	struct intel_engine_cs *engine;
655 	enum intel_engine_id i;
656 
657 	gvt_dbg_core("clean workload scheduler\n");
658 
659 	for_each_engine(engine, gvt->dev_priv, i) {
660 		atomic_notifier_chain_unregister(
661 					&engine->context_status_notifier,
662 					&gvt->shadow_ctx_notifier_block[i]);
663 		kthread_stop(scheduler->thread[i]);
664 	}
665 }
666 
667 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
668 {
669 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
670 	struct workload_thread_param *param = NULL;
671 	struct intel_engine_cs *engine;
672 	enum intel_engine_id i;
673 	int ret;
674 
675 	gvt_dbg_core("init workload scheduler\n");
676 
677 	init_waitqueue_head(&scheduler->workload_complete_wq);
678 
679 	for_each_engine(engine, gvt->dev_priv, i) {
680 		init_waitqueue_head(&scheduler->waitq[i]);
681 
682 		param = kzalloc(sizeof(*param), GFP_KERNEL);
683 		if (!param) {
684 			ret = -ENOMEM;
685 			goto err;
686 		}
687 
688 		param->gvt = gvt;
689 		param->ring_id = i;
690 
691 		scheduler->thread[i] = kthread_run(workload_thread, param,
692 			"gvt workload %d", i);
693 		if (IS_ERR(scheduler->thread[i])) {
694 			gvt_err("fail to create workload thread\n");
695 			ret = PTR_ERR(scheduler->thread[i]);
696 			goto err;
697 		}
698 
699 		gvt->shadow_ctx_notifier_block[i].notifier_call =
700 					shadow_context_status_change;
701 		atomic_notifier_chain_register(&engine->context_status_notifier,
702 					&gvt->shadow_ctx_notifier_block[i]);
703 	}
704 	return 0;
705 err:
706 	intel_gvt_clean_workload_scheduler(gvt);
707 	kfree(param);
708 	param = NULL;
709 	return ret;
710 }
711 
712 void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu)
713 {
714 	i915_gem_context_put(vgpu->shadow_ctx);
715 }
716 
717 int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
718 {
719 	atomic_set(&vgpu->running_workload_num, 0);
720 
721 	vgpu->shadow_ctx = i915_gem_context_create_gvt(
722 			&vgpu->gvt->dev_priv->drm);
723 	if (IS_ERR(vgpu->shadow_ctx))
724 		return PTR_ERR(vgpu->shadow_ctx);
725 
726 	vgpu->shadow_ctx->engine[RCS].initialised = true;
727 
728 	bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES);
729 
730 	return 0;
731 }
732