1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/sort.h> 8 9 #include "i915_drv.h" 10 11 #include "intel_gt_requests.h" 12 #include "i915_selftest.h" 13 #include "selftest_engine_heartbeat.h" 14 15 static int timeline_sync(struct intel_timeline *tl) 16 { 17 struct dma_fence *fence; 18 long timeout; 19 20 fence = i915_active_fence_get(&tl->last_request); 21 if (!fence) 22 return 0; 23 24 timeout = dma_fence_wait_timeout(fence, true, HZ / 2); 25 dma_fence_put(fence); 26 if (timeout < 0) 27 return timeout; 28 29 return 0; 30 } 31 32 static int engine_sync_barrier(struct intel_engine_cs *engine) 33 { 34 return timeline_sync(engine->kernel_context->timeline); 35 } 36 37 struct pulse { 38 struct i915_active active; 39 struct kref kref; 40 }; 41 42 static int pulse_active(struct i915_active *active) 43 { 44 kref_get(&container_of(active, struct pulse, active)->kref); 45 return 0; 46 } 47 48 static void pulse_free(struct kref *kref) 49 { 50 struct pulse *p = container_of(kref, typeof(*p), kref); 51 52 i915_active_fini(&p->active); 53 kfree(p); 54 } 55 56 static void pulse_put(struct pulse *p) 57 { 58 kref_put(&p->kref, pulse_free); 59 } 60 61 static void pulse_retire(struct i915_active *active) 62 { 63 pulse_put(container_of(active, struct pulse, active)); 64 } 65 66 static struct pulse *pulse_create(void) 67 { 68 struct pulse *p; 69 70 p = kmalloc(sizeof(*p), GFP_KERNEL); 71 if (!p) 72 return p; 73 74 kref_init(&p->kref); 75 i915_active_init(&p->active, pulse_active, pulse_retire); 76 77 return p; 78 } 79 80 static void pulse_unlock_wait(struct pulse *p) 81 { 82 i915_active_unlock_wait(&p->active); 83 } 84 85 static int __live_idle_pulse(struct intel_engine_cs *engine, 86 int (*fn)(struct intel_engine_cs *cs)) 87 { 88 struct pulse *p; 89 int err; 90 91 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 92 93 p = pulse_create(); 94 if (!p) 95 return -ENOMEM; 96 97 err = i915_active_acquire(&p->active); 98 if (err) 99 goto out; 100 101 err = i915_active_acquire_preallocate_barrier(&p->active, engine); 102 if (err) { 103 i915_active_release(&p->active); 104 goto out; 105 } 106 107 i915_active_acquire_barrier(&p->active); 108 i915_active_release(&p->active); 109 110 GEM_BUG_ON(i915_active_is_idle(&p->active)); 111 GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); 112 113 err = fn(engine); 114 if (err) 115 goto out; 116 117 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); 118 119 if (engine_sync_barrier(engine)) { 120 struct drm_printer m = drm_err_printer("pulse"); 121 122 pr_err("%s: no heartbeat pulse?\n", engine->name); 123 intel_engine_dump(engine, &m, "%s", engine->name); 124 125 err = -ETIME; 126 goto out; 127 } 128 129 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); 130 131 pulse_unlock_wait(p); /* synchronize with the retirement callback */ 132 133 if (!i915_active_is_idle(&p->active)) { 134 struct drm_printer m = drm_err_printer("pulse"); 135 136 pr_err("%s: heartbeat pulse did not flush idle tasks\n", 137 engine->name); 138 i915_active_print(&p->active, &m); 139 140 err = -EINVAL; 141 goto out; 142 } 143 144 out: 145 pulse_put(p); 146 return err; 147 } 148 149 static int live_idle_flush(void *arg) 150 { 151 struct intel_gt *gt = arg; 152 struct intel_engine_cs *engine; 153 enum intel_engine_id id; 154 int err = 0; 155 156 /* Check that we can flush the idle barriers */ 157 158 for_each_engine(engine, gt, id) { 159 st_engine_heartbeat_disable(engine); 160 err = __live_idle_pulse(engine, intel_engine_flush_barriers); 161 st_engine_heartbeat_enable(engine); 162 if (err) 163 break; 164 } 165 166 return err; 167 } 168 169 static int live_idle_pulse(void *arg) 170 { 171 struct intel_gt *gt = arg; 172 struct intel_engine_cs *engine; 173 enum intel_engine_id id; 174 int err = 0; 175 176 /* Check that heartbeat pulses flush the idle barriers */ 177 178 for_each_engine(engine, gt, id) { 179 st_engine_heartbeat_disable(engine); 180 err = __live_idle_pulse(engine, intel_engine_pulse); 181 st_engine_heartbeat_enable(engine); 182 if (err && err != -ENODEV) 183 break; 184 185 err = 0; 186 } 187 188 return err; 189 } 190 191 static int cmp_u32(const void *_a, const void *_b) 192 { 193 const u32 *a = _a, *b = _b; 194 195 return *a - *b; 196 } 197 198 static int __live_heartbeat_fast(struct intel_engine_cs *engine) 199 { 200 const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); 201 struct intel_context *ce; 202 struct i915_request *rq; 203 ktime_t t0, t1; 204 u32 times[5]; 205 int err; 206 int i; 207 208 ce = intel_context_create(engine); 209 if (IS_ERR(ce)) 210 return PTR_ERR(ce); 211 212 intel_engine_pm_get(engine); 213 214 err = intel_engine_set_heartbeat(engine, 1); 215 if (err) 216 goto err_pm; 217 218 for (i = 0; i < ARRAY_SIZE(times); i++) { 219 do { 220 /* Manufacture a tick */ 221 intel_engine_park_heartbeat(engine); 222 GEM_BUG_ON(engine->heartbeat.systole); 223 engine->serial++; /* pretend we are not idle! */ 224 intel_engine_unpark_heartbeat(engine); 225 226 flush_delayed_work(&engine->heartbeat.work); 227 if (!delayed_work_pending(&engine->heartbeat.work)) { 228 pr_err("%s: heartbeat %d did not start\n", 229 engine->name, i); 230 err = -EINVAL; 231 goto err_pm; 232 } 233 234 rcu_read_lock(); 235 rq = READ_ONCE(engine->heartbeat.systole); 236 if (rq) 237 rq = i915_request_get_rcu(rq); 238 rcu_read_unlock(); 239 } while (!rq); 240 241 t0 = ktime_get(); 242 while (rq == READ_ONCE(engine->heartbeat.systole)) 243 yield(); /* work is on the local cpu! */ 244 t1 = ktime_get(); 245 246 i915_request_put(rq); 247 times[i] = ktime_us_delta(t1, t0); 248 } 249 250 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); 251 252 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", 253 engine->name, 254 times[ARRAY_SIZE(times) / 2], 255 times[0], 256 times[ARRAY_SIZE(times) - 1]); 257 258 /* 259 * Ideally, the upper bound on min work delay would be something like 260 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we 261 * are, even with system_wq_highpri, at the mercy of the CPU scheduler 262 * and may be stuck behind some slow work for many millisecond. Such 263 * as our very own display workers. 264 */ 265 if (times[ARRAY_SIZE(times) / 2] > error_threshold) { 266 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", 267 engine->name, 268 times[ARRAY_SIZE(times) / 2], 269 error_threshold); 270 err = -EINVAL; 271 } 272 273 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); 274 err_pm: 275 intel_engine_pm_put(engine); 276 intel_context_put(ce); 277 return err; 278 } 279 280 static int live_heartbeat_fast(void *arg) 281 { 282 struct intel_gt *gt = arg; 283 struct intel_engine_cs *engine; 284 enum intel_engine_id id; 285 int err = 0; 286 287 /* Check that the heartbeat ticks at the desired rate. */ 288 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) 289 return 0; 290 291 for_each_engine(engine, gt, id) { 292 err = __live_heartbeat_fast(engine); 293 if (err) 294 break; 295 } 296 297 return err; 298 } 299 300 static int __live_heartbeat_off(struct intel_engine_cs *engine) 301 { 302 int err; 303 304 intel_engine_pm_get(engine); 305 306 engine->serial++; 307 flush_delayed_work(&engine->heartbeat.work); 308 if (!delayed_work_pending(&engine->heartbeat.work)) { 309 pr_err("%s: heartbeat not running\n", 310 engine->name); 311 err = -EINVAL; 312 goto err_pm; 313 } 314 315 err = intel_engine_set_heartbeat(engine, 0); 316 if (err) 317 goto err_pm; 318 319 engine->serial++; 320 flush_delayed_work(&engine->heartbeat.work); 321 if (delayed_work_pending(&engine->heartbeat.work)) { 322 pr_err("%s: heartbeat still running\n", 323 engine->name); 324 err = -EINVAL; 325 goto err_beat; 326 } 327 328 if (READ_ONCE(engine->heartbeat.systole)) { 329 pr_err("%s: heartbeat still allocated\n", 330 engine->name); 331 err = -EINVAL; 332 goto err_beat; 333 } 334 335 err_beat: 336 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); 337 err_pm: 338 intel_engine_pm_put(engine); 339 return err; 340 } 341 342 static int live_heartbeat_off(void *arg) 343 { 344 struct intel_gt *gt = arg; 345 struct intel_engine_cs *engine; 346 enum intel_engine_id id; 347 int err = 0; 348 349 /* Check that we can turn off heartbeat and not interrupt VIP */ 350 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) 351 return 0; 352 353 for_each_engine(engine, gt, id) { 354 if (!intel_engine_has_preemption(engine)) 355 continue; 356 357 err = __live_heartbeat_off(engine); 358 if (err) 359 break; 360 } 361 362 return err; 363 } 364 365 int intel_heartbeat_live_selftests(struct drm_i915_private *i915) 366 { 367 static const struct i915_subtest tests[] = { 368 SUBTEST(live_idle_flush), 369 SUBTEST(live_idle_pulse), 370 SUBTEST(live_heartbeat_fast), 371 SUBTEST(live_heartbeat_off), 372 }; 373 int saved_hangcheck; 374 int err; 375 376 if (intel_gt_is_wedged(&i915->gt)) 377 return 0; 378 379 saved_hangcheck = i915->params.enable_hangcheck; 380 i915->params.enable_hangcheck = INT_MAX; 381 382 err = intel_gt_live_subtests(tests, &i915->gt); 383 384 i915->params.enable_hangcheck = saved_hangcheck; 385 return err; 386 } 387 388 void st_engine_heartbeat_disable(struct intel_engine_cs *engine) 389 { 390 engine->props.heartbeat_interval_ms = 0; 391 392 intel_engine_pm_get(engine); 393 intel_engine_park_heartbeat(engine); 394 } 395 396 void st_engine_heartbeat_enable(struct intel_engine_cs *engine) 397 { 398 intel_engine_pm_put(engine); 399 400 engine->props.heartbeat_interval_ms = 401 engine->defaults.heartbeat_interval_ms; 402 } 403