1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 9 #include "intel_engine.h" 10 #include "intel_engine_heartbeat.h" 11 #include "intel_engine_pm.h" 12 #include "intel_engine_pool.h" 13 #include "intel_gt.h" 14 #include "intel_gt_pm.h" 15 #include "intel_rc6.h" 16 #include "intel_ring.h" 17 18 static int __engine_unpark(struct intel_wakeref *wf) 19 { 20 struct intel_engine_cs *engine = 21 container_of(wf, typeof(*engine), wakeref); 22 void *map; 23 24 GEM_TRACE("%s\n", engine->name); 25 26 intel_gt_pm_get(engine->gt); 27 28 /* Pin the default state for fast resets from atomic context. */ 29 map = NULL; 30 if (engine->default_state) 31 map = i915_gem_object_pin_map(engine->default_state, 32 I915_MAP_WB); 33 if (!IS_ERR_OR_NULL(map)) 34 engine->pinned_default_state = map; 35 36 if (engine->unpark) 37 engine->unpark(engine); 38 39 intel_engine_unpark_heartbeat(engine); 40 return 0; 41 } 42 43 #if IS_ENABLED(CONFIG_LOCKDEP) 44 45 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 46 { 47 unsigned long flags; 48 49 local_irq_save(flags); 50 mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); 51 52 return flags; 53 } 54 55 static inline void __timeline_mark_unlock(struct intel_context *ce, 56 unsigned long flags) 57 { 58 mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); 59 local_irq_restore(flags); 60 } 61 62 #else 63 64 static inline unsigned long __timeline_mark_lock(struct intel_context *ce) 65 { 66 return 0; 67 } 68 69 static inline void __timeline_mark_unlock(struct intel_context *ce, 70 unsigned long flags) 71 { 72 } 73 74 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ 75 76 static void 77 __queue_and_release_pm(struct i915_request *rq, 78 struct intel_timeline *tl, 79 struct intel_engine_cs *engine) 80 { 81 struct intel_gt_timelines *timelines = &engine->gt->timelines; 82 83 GEM_TRACE("%s\n", engine->name); 84 85 /* 86 * We have to serialise all potential retirement paths with our 87 * submission, as we don't want to underflow either the 88 * engine->wakeref.counter or our timeline->active_count. 89 * 90 * Equally, we cannot allow a new submission to start until 91 * after we finish queueing, nor could we allow that submitter 92 * to retire us before we are ready! 93 */ 94 spin_lock(&timelines->lock); 95 96 /* Let intel_gt_retire_requests() retire us (acquired under lock) */ 97 if (!atomic_fetch_inc(&tl->active_count)) 98 list_add_tail(&tl->link, &timelines->active_list); 99 100 /* Hand the request over to HW and so engine_retire() */ 101 __i915_request_queue(rq, NULL); 102 103 /* Let new submissions commence (and maybe retire this timeline) */ 104 __intel_wakeref_defer_park(&engine->wakeref); 105 106 spin_unlock(&timelines->lock); 107 } 108 109 static bool switch_to_kernel_context(struct intel_engine_cs *engine) 110 { 111 struct intel_context *ce = engine->kernel_context; 112 struct i915_request *rq; 113 unsigned long flags; 114 bool result = true; 115 116 /* Already inside the kernel context, safe to power down. */ 117 if (engine->wakeref_serial == engine->serial) 118 return true; 119 120 /* GPU is pointing to the void, as good as in the kernel context. */ 121 if (intel_gt_is_wedged(engine->gt)) 122 return true; 123 124 /* 125 * Note, we do this without taking the timeline->mutex. We cannot 126 * as we may be called while retiring the kernel context and so 127 * already underneath the timeline->mutex. Instead we rely on the 128 * exclusive property of the __engine_park that prevents anyone 129 * else from creating a request on this engine. This also requires 130 * that the ring is empty and we avoid any waits while constructing 131 * the context, as they assume protection by the timeline->mutex. 132 * This should hold true as we can only park the engine after 133 * retiring the last request, thus all rings should be empty and 134 * all timelines idle. 135 * 136 * For unlocking, there are 2 other parties and the GPU who have a 137 * stake here. 138 * 139 * A new gpu user will be waiting on the engine-pm to start their 140 * engine_unpark. New waiters are predicated on engine->wakeref.count 141 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the 142 * engine->wakeref. 143 * 144 * The other party is intel_gt_retire_requests(), which is walking the 145 * list of active timelines looking for completions. Meanwhile as soon 146 * as we call __i915_request_queue(), the GPU may complete our request. 147 * Ergo, if we put ourselves on the timelines.active_list 148 * (se intel_timeline_enter()) before we increment the 149 * engine->wakeref.count, we may see the request completion and retire 150 * it causing an undeflow of the engine->wakeref. 151 */ 152 flags = __timeline_mark_lock(ce); 153 GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); 154 155 rq = __i915_request_create(ce, GFP_NOWAIT); 156 if (IS_ERR(rq)) 157 /* Context switch failed, hope for the best! Maybe reset? */ 158 goto out_unlock; 159 160 /* Check again on the next retirement. */ 161 engine->wakeref_serial = engine->serial + 1; 162 i915_request_add_active_barriers(rq); 163 164 /* Install ourselves as a preemption barrier */ 165 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 166 __i915_request_commit(rq); 167 168 /* Expose ourselves to the world */ 169 __queue_and_release_pm(rq, ce->timeline, engine); 170 171 result = false; 172 out_unlock: 173 __timeline_mark_unlock(ce, flags); 174 return result; 175 } 176 177 static void call_idle_barriers(struct intel_engine_cs *engine) 178 { 179 struct llist_node *node, *next; 180 181 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 182 struct dma_fence_cb *cb = 183 container_of((struct list_head *)node, 184 typeof(*cb), node); 185 186 cb->func(NULL, cb); 187 } 188 } 189 190 static int __engine_park(struct intel_wakeref *wf) 191 { 192 struct intel_engine_cs *engine = 193 container_of(wf, typeof(*engine), wakeref); 194 195 engine->saturated = 0; 196 197 /* 198 * If one and only one request is completed between pm events, 199 * we know that we are inside the kernel context and it is 200 * safe to power down. (We are paranoid in case that runtime 201 * suspend causes corruption to the active context image, and 202 * want to avoid that impacting userspace.) 203 */ 204 if (!switch_to_kernel_context(engine)) 205 return -EBUSY; 206 207 GEM_TRACE("%s\n", engine->name); 208 209 call_idle_barriers(engine); /* cleanup after wedging */ 210 211 intel_engine_park_heartbeat(engine); 212 intel_engine_disarm_breadcrumbs(engine); 213 intel_engine_pool_park(&engine->pool); 214 215 /* Must be reset upon idling, or we may miss the busy wakeup. */ 216 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); 217 218 if (engine->park) 219 engine->park(engine); 220 221 if (engine->pinned_default_state) { 222 i915_gem_object_unpin_map(engine->default_state); 223 engine->pinned_default_state = NULL; 224 } 225 226 engine->execlists.no_priolist = false; 227 228 /* While gt calls i915_vma_parked(), we have to break the lock cycle */ 229 intel_gt_pm_put_async(engine->gt); 230 return 0; 231 } 232 233 static const struct intel_wakeref_ops wf_ops = { 234 .get = __engine_unpark, 235 .put = __engine_park, 236 }; 237 238 void intel_engine_init__pm(struct intel_engine_cs *engine) 239 { 240 struct intel_runtime_pm *rpm = engine->uncore->rpm; 241 242 intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); 243 intel_engine_init_heartbeat(engine); 244 } 245 246 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 247 #include "selftest_engine_pm.c" 248 #endif 249