1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "intel_breadcrumbs.h" 10 #include "intel_context.h" 11 #include "intel_engine.h" 12 #include "intel_engine_heartbeat.h" 13 #include "intel_engine_pm.h" 14 #include "intel_gt.h" 15 #include "intel_gt_pm.h" 16 #include "intel_rc6.h" 17 #include "intel_ring.h" 18 #include "shmem_utils.h" 19 20 static int __engine_unpark(struct intel_wakeref *wf) 21 { 22 struct intel_engine_cs *engine = 23 container_of(wf, typeof(*engine), wakeref); 24 struct intel_context *ce; 25 26 ENGINE_TRACE(engine, "\n"); 27 28 intel_gt_pm_get(engine->gt); 29 30 /* Discard stale context state from across idling */ 31 ce = engine->kernel_context; 32 if (ce) { 33 GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); 34 35 /* First poison the image to verify we never fully trust it */ 36 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { 37 struct drm_i915_gem_object *obj = ce->state->obj; 38 int type = i915_coherent_map_type(engine->i915); 39 void *map; 40 41 map = i915_gem_object_pin_map(obj, type); 42 if (!IS_ERR(map)) { 43 memset(map, CONTEXT_REDZONE, obj->base.size); 44 i915_gem_object_flush_map(obj); 45 i915_gem_object_unpin_map(obj); 46 } 47 } 48 49 ce->ops->reset(ce); 50 } 51 52 if (engine->unpark) 53 engine->unpark(engine); 54 55 intel_engine_unpark_heartbeat(engine); 56 return 0; 57 } 58 59 #if IS_ENABLED(CONFIG_LOCKDEP) 60 61 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 62 { 63 unsigned long flags; 64 65 local_irq_save(flags); 66 mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); 67 68 return flags; 69 } 70 71 static inline void __timeline_mark_unlock(struct intel_context *ce, 72 unsigned long flags) 73 { 74 mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); 75 local_irq_restore(flags); 76 } 77 78 #else 79 80 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 81 { 82 return 0; 83 } 84 85 static inline void __timeline_mark_unlock(struct intel_context *ce, 86 unsigned long flags) 87 { 88 } 89 90 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ 91 92 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) 93 { 94 struct i915_request *rq = to_request(fence); 95 96 ewma__engine_latency_add(&rq->engine->latency, 97 ktime_us_delta(rq->fence.timestamp, 98 rq->duration.emitted)); 99 } 100 101 static void 102 __queue_and_release_pm(struct i915_request *rq, 103 struct intel_timeline *tl, 104 struct intel_engine_cs *engine) 105 { 106 struct intel_gt_timelines *timelines = &engine->gt->timelines; 107 108 ENGINE_TRACE(engine, "parking\n"); 109 110 /* 111 * We have to serialise all potential retirement paths with our 112 * submission, as we don't want to underflow either the 113 * engine->wakeref.counter or our timeline->active_count. 114 * 115 * Equally, we cannot allow a new submission to start until 116 * after we finish queueing, nor could we allow that submitter 117 * to retire us before we are ready! 118 */ 119 spin_lock(&timelines->lock); 120 121 /* Let intel_gt_retire_requests() retire us (acquired under lock) */ 122 if (!atomic_fetch_inc(&tl->active_count)) 123 list_add_tail(&tl->link, &timelines->active_list); 124 125 /* Hand the request over to HW and so engine_retire() */ 126 __i915_request_queue(rq, NULL); 127 128 /* Let new submissions commence (and maybe retire this timeline) */ 129 __intel_wakeref_defer_park(&engine->wakeref); 130 131 spin_unlock(&timelines->lock); 132 } 133 134 static bool switch_to_kernel_context(struct intel_engine_cs *engine) 135 { 136 struct intel_context *ce = engine->kernel_context; 137 struct i915_request *rq; 138 unsigned long flags; 139 bool result = true; 140 141 /* GPU is pointing to the void, as good as in the kernel context. */ 142 if (intel_gt_is_wedged(engine->gt)) 143 return true; 144 145 GEM_BUG_ON(!intel_context_is_barrier(ce)); 146 GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma); 147 148 /* Already inside the kernel context, safe to power down. */ 149 if (engine->wakeref_serial == engine->serial) 150 return true; 151 152 /* 153 * Note, we do this without taking the timeline->mutex. We cannot 154 * as we may be called while retiring the kernel context and so 155 * already underneath the timeline->mutex. Instead we rely on the 156 * exclusive property of the __engine_park that prevents anyone 157 * else from creating a request on this engine. This also requires 158 * that the ring is empty and we avoid any waits while constructing 159 * the context, as they assume protection by the timeline->mutex. 160 * This should hold true as we can only park the engine after 161 * retiring the last request, thus all rings should be empty and 162 * all timelines idle. 163 * 164 * For unlocking, there are 2 other parties and the GPU who have a 165 * stake here. 166 * 167 * A new gpu user will be waiting on the engine-pm to start their 168 * engine_unpark. New waiters are predicated on engine->wakeref.count 169 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the 170 * engine->wakeref. 171 * 172 * The other party is intel_gt_retire_requests(), which is walking the 173 * list of active timelines looking for completions. Meanwhile as soon 174 * as we call __i915_request_queue(), the GPU may complete our request. 175 * Ergo, if we put ourselves on the timelines.active_list 176 * (se intel_timeline_enter()) before we increment the 177 * engine->wakeref.count, we may see the request completion and retire 178 * it causing an underflow of the engine->wakeref. 179 */ 180 flags = __timeline_mark_lock(ce); 181 GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); 182 183 rq = __i915_request_create(ce, GFP_NOWAIT); 184 if (IS_ERR(rq)) 185 /* Context switch failed, hope for the best! Maybe reset? */ 186 goto out_unlock; 187 188 /* Check again on the next retirement. */ 189 engine->wakeref_serial = engine->serial + 1; 190 i915_request_add_active_barriers(rq); 191 192 /* Install ourselves as a preemption barrier */ 193 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 194 if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ 195 /* 196 * Use an interrupt for precise measurement of duration, 197 * otherwise we rely on someone else retiring all the requests 198 * which may delay the signaling (i.e. we will likely wait 199 * until the background request retirement running every 200 * second or two). 201 */ 202 BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); 203 dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); 204 rq->duration.emitted = ktime_get(); 205 } 206 207 /* Expose ourselves to the world */ 208 __queue_and_release_pm(rq, ce->timeline, engine); 209 210 result = false; 211 out_unlock: 212 __timeline_mark_unlock(ce, flags); 213 return result; 214 } 215 216 static void call_idle_barriers(struct intel_engine_cs *engine) 217 { 218 struct llist_node *node, *next; 219 220 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 221 struct dma_fence_cb *cb = 222 container_of((struct list_head *)node, 223 typeof(*cb), node); 224 225 cb->func(ERR_PTR(-EAGAIN), cb); 226 } 227 } 228 229 static int __engine_park(struct intel_wakeref *wf) 230 { 231 struct intel_engine_cs *engine = 232 container_of(wf, typeof(*engine), wakeref); 233 234 engine->saturated = 0; 235 236 /* 237 * If one and only one request is completed between pm events, 238 * we know that we are inside the kernel context and it is 239 * safe to power down. (We are paranoid in case that runtime 240 * suspend causes corruption to the active context image, and 241 * want to avoid that impacting userspace.) 242 */ 243 if (!switch_to_kernel_context(engine)) 244 return -EBUSY; 245 246 ENGINE_TRACE(engine, "parked\n"); 247 248 call_idle_barriers(engine); /* cleanup after wedging */ 249 250 intel_engine_park_heartbeat(engine); 251 intel_breadcrumbs_park(engine->breadcrumbs); 252 253 /* Must be reset upon idling, or we may miss the busy wakeup. */ 254 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); 255 256 if (engine->park) 257 engine->park(engine); 258 259 engine->execlists.no_priolist = false; 260 261 /* While gt calls i915_vma_parked(), we have to break the lock cycle */ 262 intel_gt_pm_put_async(engine->gt); 263 return 0; 264 } 265 266 static const struct intel_wakeref_ops wf_ops = { 267 .get = __engine_unpark, 268 .put = __engine_park, 269 }; 270 271 void intel_engine_init__pm(struct intel_engine_cs *engine) 272 { 273 struct intel_runtime_pm *rpm = engine->uncore->rpm; 274 275 intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); 276 intel_engine_init_heartbeat(engine); 277 } 278 279 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 280 #include "selftest_engine_pm.c" 281 #endif 282