1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_request.h" 8 9 #include "intel_context.h" 10 #include "intel_engine_heartbeat.h" 11 #include "intel_engine_pm.h" 12 #include "intel_engine.h" 13 #include "intel_gt.h" 14 #include "intel_reset.h" 15 16 /* 17 * While the engine is active, we send a periodic pulse along the engine 18 * to check on its health and to flush any idle-barriers. If that request 19 * is stuck, and we fail to preempt it, we declare the engine hung and 20 * issue a reset -- in the hope that restores progress. 21 */ 22 23 static bool next_heartbeat(struct intel_engine_cs *engine) 24 { 25 long delay; 26 27 delay = READ_ONCE(engine->props.heartbeat_interval_ms); 28 if (!delay) 29 return false; 30 31 delay = msecs_to_jiffies_timeout(delay); 32 if (delay >= HZ) 33 delay = round_jiffies_up_relative(delay); 34 schedule_delayed_work(&engine->heartbeat.work, delay); 35 36 return true; 37 } 38 39 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) 40 { 41 engine->wakeref_serial = READ_ONCE(engine->serial) + 1; 42 i915_request_add_active_barriers(rq); 43 } 44 45 static void show_heartbeat(const struct i915_request *rq, 46 struct intel_engine_cs *engine) 47 { 48 struct drm_printer p = drm_debug_printer("heartbeat"); 49 50 intel_engine_dump(engine, &p, 51 "%s heartbeat {prio:%d} not ticking\n", 52 engine->name, 53 rq->sched.attr.priority); 54 } 55 56 static void heartbeat(struct work_struct *wrk) 57 { 58 struct i915_sched_attr attr = { 59 .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), 60 }; 61 struct intel_engine_cs *engine = 62 container_of(wrk, typeof(*engine), heartbeat.work.work); 63 struct intel_context *ce = engine->kernel_context; 64 struct i915_request *rq; 65 66 if (!intel_engine_pm_get_if_awake(engine)) 67 return; 68 69 rq = engine->heartbeat.systole; 70 if (rq && i915_request_completed(rq)) { 71 i915_request_put(rq); 72 engine->heartbeat.systole = NULL; 73 } 74 75 if (intel_gt_is_wedged(engine->gt)) 76 goto out; 77 78 if (engine->heartbeat.systole) { 79 if (engine->schedule && 80 rq->sched.attr.priority < I915_PRIORITY_BARRIER) { 81 /* 82 * Gradually raise the priority of the heartbeat to 83 * give high priority work [which presumably desires 84 * low latency and no jitter] the chance to naturally 85 * complete before being preempted. 86 */ 87 attr.priority = I915_PRIORITY_MASK; 88 if (rq->sched.attr.priority >= attr.priority) 89 attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); 90 if (rq->sched.attr.priority >= attr.priority) 91 attr.priority = I915_PRIORITY_BARRIER; 92 93 local_bh_disable(); 94 engine->schedule(rq, &attr); 95 local_bh_enable(); 96 } else { 97 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 98 show_heartbeat(rq, engine); 99 100 intel_gt_handle_error(engine->gt, engine->mask, 101 I915_ERROR_CAPTURE, 102 "stopped heartbeat on %s", 103 engine->name); 104 } 105 goto out; 106 } 107 108 if (engine->wakeref_serial == engine->serial) 109 goto out; 110 111 mutex_lock(&ce->timeline->mutex); 112 113 intel_context_enter(ce); 114 rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); 115 intel_context_exit(ce); 116 if (IS_ERR(rq)) 117 goto unlock; 118 119 idle_pulse(engine, rq); 120 if (i915_modparams.enable_hangcheck) 121 engine->heartbeat.systole = i915_request_get(rq); 122 123 __i915_request_commit(rq); 124 __i915_request_queue(rq, &attr); 125 126 unlock: 127 mutex_unlock(&ce->timeline->mutex); 128 out: 129 if (!next_heartbeat(engine)) 130 i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); 131 intel_engine_pm_put(engine); 132 } 133 134 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) 135 { 136 if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) 137 return; 138 139 next_heartbeat(engine); 140 } 141 142 void intel_engine_park_heartbeat(struct intel_engine_cs *engine) 143 { 144 if (cancel_delayed_work(&engine->heartbeat.work)) 145 i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); 146 } 147 148 void intel_engine_init_heartbeat(struct intel_engine_cs *engine) 149 { 150 INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); 151 } 152 153 int intel_engine_set_heartbeat(struct intel_engine_cs *engine, 154 unsigned long delay) 155 { 156 int err; 157 158 /* Send one last pulse before to cleanup persistent hogs */ 159 if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { 160 err = intel_engine_pulse(engine); 161 if (err) 162 return err; 163 } 164 165 WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); 166 167 if (intel_engine_pm_get_if_awake(engine)) { 168 if (delay) 169 intel_engine_unpark_heartbeat(engine); 170 else 171 intel_engine_park_heartbeat(engine); 172 intel_engine_pm_put(engine); 173 } 174 175 return 0; 176 } 177 178 int intel_engine_pulse(struct intel_engine_cs *engine) 179 { 180 struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; 181 struct intel_context *ce = engine->kernel_context; 182 struct i915_request *rq; 183 int err = 0; 184 185 if (!intel_engine_has_preemption(engine)) 186 return -ENODEV; 187 188 if (!intel_engine_pm_get_if_awake(engine)) 189 return 0; 190 191 if (mutex_lock_interruptible(&ce->timeline->mutex)) 192 goto out_rpm; 193 194 intel_context_enter(ce); 195 rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); 196 intel_context_exit(ce); 197 if (IS_ERR(rq)) { 198 err = PTR_ERR(rq); 199 goto out_unlock; 200 } 201 202 rq->flags |= I915_REQUEST_SENTINEL; 203 idle_pulse(engine, rq); 204 205 __i915_request_commit(rq); 206 __i915_request_queue(rq, &attr); 207 208 out_unlock: 209 mutex_unlock(&ce->timeline->mutex); 210 out_rpm: 211 intel_engine_pm_put(engine); 212 return err; 213 } 214 215 int intel_engine_flush_barriers(struct intel_engine_cs *engine) 216 { 217 struct i915_request *rq; 218 219 if (llist_empty(&engine->barrier_tasks)) 220 return 0; 221 222 rq = i915_request_create(engine->kernel_context); 223 if (IS_ERR(rq)) 224 return PTR_ERR(rq); 225 226 idle_pulse(engine, rq); 227 i915_request_add(rq); 228 229 return 0; 230 } 231 232 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 233 #include "selftest_engine_heartbeat.c" 234 #endif 235