1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "intel_context.h" 10 #include "intel_engine.h" 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_engine_pool.h" 14 #include "intel_gt.h" 15 #include "intel_gt_pm.h" 16 #include "intel_rc6.h" 17 #include "intel_ring.h" 18 19 static int __engine_unpark(struct intel_wakeref *wf) 20 { 21 struct intel_engine_cs *engine = 22 container_of(wf, typeof(*engine), wakeref); 23 void *map; 24 25 ENGINE_TRACE(engine, "\n"); 26 27 intel_gt_pm_get(engine->gt); 28 29 /* Pin the default state for fast resets from atomic context. */ 30 map = NULL; 31 if (engine->default_state) 32 map = i915_gem_object_pin_map(engine->default_state, 33 I915_MAP_WB); 34 if (!IS_ERR_OR_NULL(map)) 35 engine->pinned_default_state = map; 36 37 if (engine->unpark) 38 engine->unpark(engine); 39 40 intel_engine_unpark_heartbeat(engine); 41 return 0; 42 } 43 44 #if IS_ENABLED(CONFIG_LOCKDEP) 45 46 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 47 { 48 unsigned long flags; 49 50 local_irq_save(flags); 51 mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); 52 53 return flags; 54 } 55 56 static inline void __timeline_mark_unlock(struct intel_context *ce, 57 unsigned long flags) 58 { 59 mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); 60 local_irq_restore(flags); 61 } 62 63 #else 64 65 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 66 { 67 return 0; 68 } 69 70 static inline void __timeline_mark_unlock(struct intel_context *ce, 71 unsigned long flags) 72 { 73 } 74 75 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ 76 77 static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) 78 { 79 struct i915_request *rq = to_request(fence); 80 81 ewma__engine_latency_add(&rq->engine->latency, 82 ktime_us_delta(rq->fence.timestamp, 83 rq->duration.emitted)); 84 } 85 86 static void 87 __queue_and_release_pm(struct i915_request *rq, 88 struct intel_timeline *tl, 89 struct intel_engine_cs *engine) 90 { 91 struct intel_gt_timelines *timelines = &engine->gt->timelines; 92 93 ENGINE_TRACE(engine, "\n"); 94 95 /* 96 * We have to serialise all potential retirement paths with our 97 * submission, as we don't want to underflow either the 98 * engine->wakeref.counter or our timeline->active_count. 99 * 100 * Equally, we cannot allow a new submission to start until 101 * after we finish queueing, nor could we allow that submitter 102 * to retire us before we are ready! 103 */ 104 spin_lock(&timelines->lock); 105 106 /* Let intel_gt_retire_requests() retire us (acquired under lock) */ 107 if (!atomic_fetch_inc(&tl->active_count)) 108 list_add_tail(&tl->link, &timelines->active_list); 109 110 /* Hand the request over to HW and so engine_retire() */ 111 __i915_request_queue(rq, NULL); 112 113 /* Let new submissions commence (and maybe retire this timeline) */ 114 __intel_wakeref_defer_park(&engine->wakeref); 115 116 spin_unlock(&timelines->lock); 117 } 118 119 static bool switch_to_kernel_context(struct intel_engine_cs *engine) 120 { 121 struct intel_context *ce = engine->kernel_context; 122 struct i915_request *rq; 123 unsigned long flags; 124 bool result = true; 125 126 GEM_BUG_ON(!intel_context_is_barrier(ce)); 127 128 /* Already inside the kernel context, safe to power down. */ 129 if (engine->wakeref_serial == engine->serial) 130 return true; 131 132 /* GPU is pointing to the void, as good as in the kernel context. */ 133 if (intel_gt_is_wedged(engine->gt)) 134 return true; 135 136 /* 137 * Note, we do this without taking the timeline->mutex. We cannot 138 * as we may be called while retiring the kernel context and so 139 * already underneath the timeline->mutex. Instead we rely on the 140 * exclusive property of the __engine_park that prevents anyone 141 * else from creating a request on this engine. This also requires 142 * that the ring is empty and we avoid any waits while constructing 143 * the context, as they assume protection by the timeline->mutex. 144 * This should hold true as we can only park the engine after 145 * retiring the last request, thus all rings should be empty and 146 * all timelines idle. 147 * 148 * For unlocking, there are 2 other parties and the GPU who have a 149 * stake here. 150 * 151 * A new gpu user will be waiting on the engine-pm to start their 152 * engine_unpark. New waiters are predicated on engine->wakeref.count 153 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the 154 * engine->wakeref. 155 * 156 * The other party is intel_gt_retire_requests(), which is walking the 157 * list of active timelines looking for completions. Meanwhile as soon 158 * as we call __i915_request_queue(), the GPU may complete our request. 159 * Ergo, if we put ourselves on the timelines.active_list 160 * (se intel_timeline_enter()) before we increment the 161 * engine->wakeref.count, we may see the request completion and retire 162 * it causing an undeflow of the engine->wakeref. 163 */ 164 flags = __timeline_mark_lock(ce); 165 GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); 166 167 rq = __i915_request_create(ce, GFP_NOWAIT); 168 if (IS_ERR(rq)) 169 /* Context switch failed, hope for the best! Maybe reset? */ 170 goto out_unlock; 171 172 /* Check again on the next retirement. */ 173 engine->wakeref_serial = engine->serial + 1; 174 i915_request_add_active_barriers(rq); 175 176 /* Install ourselves as a preemption barrier */ 177 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 178 if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ 179 /* 180 * Use an interrupt for precise measurement of duration, 181 * otherwise we rely on someone else retiring all the requests 182 * which may delay the signaling (i.e. we will likely wait 183 * until the background request retirement running every 184 * second or two). 185 */ 186 BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); 187 dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); 188 rq->duration.emitted = ktime_get(); 189 } 190 191 /* Expose ourselves to the world */ 192 __queue_and_release_pm(rq, ce->timeline, engine); 193 194 result = false; 195 out_unlock: 196 __timeline_mark_unlock(ce, flags); 197 return result; 198 } 199 200 static void call_idle_barriers(struct intel_engine_cs *engine) 201 { 202 struct llist_node *node, *next; 203 204 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 205 struct dma_fence_cb *cb = 206 container_of((struct list_head *)node, 207 typeof(*cb), node); 208 209 cb->func(ERR_PTR(-EAGAIN), cb); 210 } 211 } 212 213 static int __engine_park(struct intel_wakeref *wf) 214 { 215 struct intel_engine_cs *engine = 216 container_of(wf, typeof(*engine), wakeref); 217 218 engine->saturated = 0; 219 220 /* 221 * If one and only one request is completed between pm events, 222 * we know that we are inside the kernel context and it is 223 * safe to power down. (We are paranoid in case that runtime 224 * suspend causes corruption to the active context image, and 225 * want to avoid that impacting userspace.) 226 */ 227 if (!switch_to_kernel_context(engine)) 228 return -EBUSY; 229 230 ENGINE_TRACE(engine, "\n"); 231 232 call_idle_barriers(engine); /* cleanup after wedging */ 233 234 intel_engine_park_heartbeat(engine); 235 intel_engine_disarm_breadcrumbs(engine); 236 intel_engine_pool_park(&engine->pool); 237 238 /* Must be reset upon idling, or we may miss the busy wakeup. */ 239 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); 240 241 if (engine->park) 242 engine->park(engine); 243 244 if (engine->pinned_default_state) { 245 i915_gem_object_unpin_map(engine->default_state); 246 engine->pinned_default_state = NULL; 247 } 248 249 engine->execlists.no_priolist = false; 250 251 /* While gt calls i915_vma_parked(), we have to break the lock cycle */ 252 intel_gt_pm_put_async(engine->gt); 253 return 0; 254 } 255 256 static const struct intel_wakeref_ops wf_ops = { 257 .get = __engine_unpark, 258 .put = __engine_park, 259 }; 260 261 void intel_engine_init__pm(struct intel_engine_cs *engine) 262 { 263 struct intel_runtime_pm *rpm = engine->uncore->rpm; 264 265 intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); 266 intel_engine_init_heartbeat(engine); 267 } 268 269 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 270 #include "selftest_engine_pm.c" 271 #endif 272