1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "i915_drv.h" 9 10 #include "intel_gt_requests.h" 11 #include "i915_selftest.h" 12 #include "selftest_engine_heartbeat.h" 13 14 static void reset_heartbeat(struct intel_engine_cs *engine) 15 { 16 intel_engine_set_heartbeat(engine, 17 engine->defaults.heartbeat_interval_ms); 18 } 19 20 static int timeline_sync(struct intel_timeline *tl) 21 { 22 struct dma_fence *fence; 23 long timeout; 24 25 fence = i915_active_fence_get(&tl->last_request); 26 if (!fence) 27 return 0; 28 29 timeout = dma_fence_wait_timeout(fence, true, HZ / 2); 30 dma_fence_put(fence); 31 if (timeout < 0) 32 return timeout; 33 34 return 0; 35 } 36 37 static int engine_sync_barrier(struct intel_engine_cs *engine) 38 { 39 return timeline_sync(engine->kernel_context->timeline); 40 } 41 42 struct pulse { 43 struct i915_active active; 44 struct kref kref; 45 }; 46 47 static int pulse_active(struct i915_active *active) 48 { 49 kref_get(&container_of(active, struct pulse, active)->kref); 50 return 0; 51 } 52 53 static void pulse_free(struct kref *kref) 54 { 55 struct pulse *p = container_of(kref, typeof(*p), kref); 56 57 i915_active_fini(&p->active); 58 kfree(p); 59 } 60 61 static void pulse_put(struct pulse *p) 62 { 63 kref_put(&p->kref, pulse_free); 64 } 65 66 static void pulse_retire(struct i915_active *active) 67 { 68 pulse_put(container_of(active, struct pulse, active)); 69 } 70 71 static struct pulse *pulse_create(void) 72 { 73 struct pulse *p; 74 75 p = kmalloc(sizeof(*p), GFP_KERNEL); 76 if (!p) 77 return p; 78 79 kref_init(&p->kref); 80 i915_active_init(&p->active, pulse_active, pulse_retire, 0); 81 82 return p; 83 } 84 85 static void pulse_unlock_wait(struct pulse *p) 86 { 87 i915_active_unlock_wait(&p->active); 88 } 89 90 static int __live_idle_pulse(struct intel_engine_cs *engine, 91 int (*fn)(struct intel_engine_cs *cs)) 92 { 93 struct pulse *p; 94 int err; 95 96 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 97 98 p = pulse_create(); 99 if (!p) 100 return -ENOMEM; 101 102 err = i915_active_acquire(&p->active); 103 if (err) 104 goto out; 105 106 err = i915_active_acquire_preallocate_barrier(&p->active, engine); 107 if (err) { 108 i915_active_release(&p->active); 109 goto out; 110 } 111 112 i915_active_acquire_barrier(&p->active); 113 i915_active_release(&p->active); 114 115 GEM_BUG_ON(i915_active_is_idle(&p->active)); 116 GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); 117 118 err = fn(engine); 119 if (err) 120 goto out; 121 122 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); 123 124 if (engine_sync_barrier(engine)) { 125 struct drm_printer m = drm_err_printer("pulse"); 126 127 pr_err("%s: no heartbeat pulse?\n", engine->name); 128 intel_engine_dump(engine, &m, "%s", engine->name); 129 130 err = -ETIME; 131 goto out; 132 } 133 134 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); 135 136 pulse_unlock_wait(p); /* synchronize with the retirement callback */ 137 138 if (!i915_active_is_idle(&p->active)) { 139 struct drm_printer m = drm_err_printer("pulse"); 140 141 pr_err("%s: heartbeat pulse did not flush idle tasks\n", 142 engine->name); 143 i915_active_print(&p->active, &m); 144 145 err = -EINVAL; 146 goto out; 147 } 148 149 out: 150 pulse_put(p); 151 return err; 152 } 153 154 static int live_idle_flush(void *arg) 155 { 156 struct intel_gt *gt = arg; 157 struct intel_engine_cs *engine; 158 enum intel_engine_id id; 159 int err = 0; 160 161 /* Check that we can flush the idle barriers */ 162 163 for_each_engine(engine, gt, id) { 164 st_engine_heartbeat_disable(engine); 165 err = __live_idle_pulse(engine, intel_engine_flush_barriers); 166 st_engine_heartbeat_enable(engine); 167 if (err) 168 break; 169 } 170 171 return err; 172 } 173 174 static int live_idle_pulse(void *arg) 175 { 176 struct intel_gt *gt = arg; 177 struct intel_engine_cs *engine; 178 enum intel_engine_id id; 179 int err = 0; 180 181 /* Check that heartbeat pulses flush the idle barriers */ 182 183 for_each_engine(engine, gt, id) { 184 st_engine_heartbeat_disable(engine); 185 err = __live_idle_pulse(engine, intel_engine_pulse); 186 st_engine_heartbeat_enable(engine); 187 if (err && err != -ENODEV) 188 break; 189 190 err = 0; 191 } 192 193 return err; 194 } 195 196 static int cmp_u32(const void *_a, const void *_b) 197 { 198 const u32 *a = _a, *b = _b; 199 200 return *a - *b; 201 } 202 203 static int __live_heartbeat_fast(struct intel_engine_cs *engine) 204 { 205 const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); 206 struct intel_context *ce; 207 struct i915_request *rq; 208 ktime_t t0, t1; 209 u32 times[5]; 210 int err; 211 int i; 212 213 ce = intel_context_create(engine); 214 if (IS_ERR(ce)) 215 return PTR_ERR(ce); 216 217 intel_engine_pm_get(engine); 218 219 err = intel_engine_set_heartbeat(engine, 1); 220 if (err) 221 goto err_pm; 222 223 for (i = 0; i < ARRAY_SIZE(times); i++) { 224 do { 225 /* Manufacture a tick */ 226 intel_engine_park_heartbeat(engine); 227 GEM_BUG_ON(engine->heartbeat.systole); 228 engine->serial++; /* pretend we are not idle! */ 229 intel_engine_unpark_heartbeat(engine); 230 231 flush_delayed_work(&engine->heartbeat.work); 232 if (!delayed_work_pending(&engine->heartbeat.work)) { 233 pr_err("%s: heartbeat %d did not start\n", 234 engine->name, i); 235 err = -EINVAL; 236 goto err_pm; 237 } 238 239 rcu_read_lock(); 240 rq = READ_ONCE(engine->heartbeat.systole); 241 if (rq) 242 rq = i915_request_get_rcu(rq); 243 rcu_read_unlock(); 244 } while (!rq); 245 246 t0 = ktime_get(); 247 while (rq == READ_ONCE(engine->heartbeat.systole)) 248 yield(); /* work is on the local cpu! */ 249 t1 = ktime_get(); 250 251 i915_request_put(rq); 252 times[i] = ktime_us_delta(t1, t0); 253 } 254 255 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); 256 257 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", 258 engine->name, 259 times[ARRAY_SIZE(times) / 2], 260 times[0], 261 times[ARRAY_SIZE(times) - 1]); 262 263 /* 264 * Ideally, the upper bound on min work delay would be something like 265 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we 266 * are, even with system_wq_highpri, at the mercy of the CPU scheduler 267 * and may be stuck behind some slow work for many millisecond. Such 268 * as our very own display workers. 269 */ 270 if (times[ARRAY_SIZE(times) / 2] > error_threshold) { 271 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", 272 engine->name, 273 times[ARRAY_SIZE(times) / 2], 274 error_threshold); 275 err = -EINVAL; 276 } 277 278 reset_heartbeat(engine); 279 err_pm: 280 intel_engine_pm_put(engine); 281 intel_context_put(ce); 282 return err; 283 } 284 285 static int live_heartbeat_fast(void *arg) 286 { 287 struct intel_gt *gt = arg; 288 struct intel_engine_cs *engine; 289 enum intel_engine_id id; 290 int err = 0; 291 292 /* Check that the heartbeat ticks at the desired rate. */ 293 if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) 294 return 0; 295 296 for_each_engine(engine, gt, id) { 297 err = __live_heartbeat_fast(engine); 298 if (err) 299 break; 300 } 301 302 return err; 303 } 304 305 static int __live_heartbeat_off(struct intel_engine_cs *engine) 306 { 307 int err; 308 309 intel_engine_pm_get(engine); 310 311 engine->serial++; 312 flush_delayed_work(&engine->heartbeat.work); 313 if (!delayed_work_pending(&engine->heartbeat.work)) { 314 pr_err("%s: heartbeat not running\n", 315 engine->name); 316 err = -EINVAL; 317 goto err_pm; 318 } 319 320 err = intel_engine_set_heartbeat(engine, 0); 321 if (err) 322 goto err_pm; 323 324 engine->serial++; 325 flush_delayed_work(&engine->heartbeat.work); 326 if (delayed_work_pending(&engine->heartbeat.work)) { 327 pr_err("%s: heartbeat still running\n", 328 engine->name); 329 err = -EINVAL; 330 goto err_beat; 331 } 332 333 if (READ_ONCE(engine->heartbeat.systole)) { 334 pr_err("%s: heartbeat still allocated\n", 335 engine->name); 336 err = -EINVAL; 337 goto err_beat; 338 } 339 340 err_beat: 341 reset_heartbeat(engine); 342 err_pm: 343 intel_engine_pm_put(engine); 344 return err; 345 } 346 347 static int live_heartbeat_off(void *arg) 348 { 349 struct intel_gt *gt = arg; 350 struct intel_engine_cs *engine; 351 enum intel_engine_id id; 352 int err = 0; 353 354 /* Check that we can turn off heartbeat and not interrupt VIP */ 355 if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) 356 return 0; 357 358 for_each_engine(engine, gt, id) { 359 if (!intel_engine_has_preemption(engine)) 360 continue; 361 362 err = __live_heartbeat_off(engine); 363 if (err) 364 break; 365 } 366 367 return err; 368 } 369 370 int intel_heartbeat_live_selftests(struct drm_i915_private *i915) 371 { 372 static const struct i915_subtest tests[] = { 373 SUBTEST(live_idle_flush), 374 SUBTEST(live_idle_pulse), 375 SUBTEST(live_heartbeat_fast), 376 SUBTEST(live_heartbeat_off), 377 }; 378 int saved_hangcheck; 379 int err; 380 381 if (intel_gt_is_wedged(&i915->gt)) 382 return 0; 383 384 saved_hangcheck = i915->params.enable_hangcheck; 385 i915->params.enable_hangcheck = INT_MAX; 386 387 err = intel_gt_live_subtests(tests, &i915->gt); 388 389 i915->params.enable_hangcheck = saved_hangcheck; 390 return err; 391 } 392 393 void st_engine_heartbeat_disable(struct intel_engine_cs *engine) 394 { 395 engine->props.heartbeat_interval_ms = 0; 396 397 intel_engine_pm_get(engine); 398 intel_engine_park_heartbeat(engine); 399 } 400 401 void st_engine_heartbeat_enable(struct intel_engine_cs *engine) 402 { 403 intel_engine_pm_put(engine); 404 405 engine->props.heartbeat_interval_ms = 406 engine->defaults.heartbeat_interval_ms; 407 } 408