// SPDX-License-Identifier: MIT /* * Copyright © 2018 Intel Corporation */ #include #include "i915_drv.h" #include "intel_gt_requests.h" #include "i915_selftest.h" #include "selftest_engine_heartbeat.h" static void reset_heartbeat(struct intel_engine_cs *engine) { intel_engine_set_heartbeat(engine, engine->defaults.heartbeat_interval_ms); } static int timeline_sync(struct intel_timeline *tl) { struct dma_fence *fence; long timeout; fence = i915_active_fence_get(&tl->last_request); if (!fence) return 0; timeout = dma_fence_wait_timeout(fence, true, HZ / 2); dma_fence_put(fence); if (timeout < 0) return timeout; return 0; } static int engine_sync_barrier(struct intel_engine_cs *engine) { return timeline_sync(engine->kernel_context->timeline); } struct pulse { struct i915_active active; struct kref kref; }; static int pulse_active(struct i915_active *active) { kref_get(&container_of(active, struct pulse, active)->kref); return 0; } static void pulse_free(struct kref *kref) { struct pulse *p = container_of(kref, typeof(*p), kref); i915_active_fini(&p->active); kfree(p); } static void pulse_put(struct pulse *p) { kref_put(&p->kref, pulse_free); } static void pulse_retire(struct i915_active *active) { pulse_put(container_of(active, struct pulse, active)); } static struct pulse *pulse_create(void) { struct pulse *p; p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return p; kref_init(&p->kref); i915_active_init(&p->active, pulse_active, pulse_retire); return p; } static void pulse_unlock_wait(struct pulse *p) { i915_active_unlock_wait(&p->active); } static int __live_idle_pulse(struct intel_engine_cs *engine, int (*fn)(struct intel_engine_cs *cs)) { struct pulse *p; int err; GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); p = pulse_create(); if (!p) return -ENOMEM; err = i915_active_acquire(&p->active); if (err) goto out; err = i915_active_acquire_preallocate_barrier(&p->active, engine); if (err) { i915_active_release(&p->active); goto out; } i915_active_acquire_barrier(&p->active); i915_active_release(&p->active); GEM_BUG_ON(i915_active_is_idle(&p->active)); GEM_BUG_ON(llist_empty(&engine->barrier_tasks)); err = fn(engine); if (err) goto out; GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); if (engine_sync_barrier(engine)) { struct drm_printer m = drm_err_printer("pulse"); pr_err("%s: no heartbeat pulse?\n", engine->name); intel_engine_dump(engine, &m, "%s", engine->name); err = -ETIME; goto out; } GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial); pulse_unlock_wait(p); /* synchronize with the retirement callback */ if (!i915_active_is_idle(&p->active)) { struct drm_printer m = drm_err_printer("pulse"); pr_err("%s: heartbeat pulse did not flush idle tasks\n", engine->name); i915_active_print(&p->active, &m); err = -EINVAL; goto out; } out: pulse_put(p); return err; } static int live_idle_flush(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; int err = 0; /* Check that we can flush the idle barriers */ for_each_engine(engine, gt, id) { st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_flush_barriers); st_engine_heartbeat_enable(engine); if (err) break; } return err; } static int live_idle_pulse(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; int err = 0; /* Check that heartbeat pulses flush the idle barriers */ for_each_engine(engine, gt, id) { st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_pulse); st_engine_heartbeat_enable(engine); if (err && err != -ENODEV) break; err = 0; } return err; } static int cmp_u32(const void *_a, const void *_b) { const u32 *a = _a, *b = _b; return *a - *b; } static int __live_heartbeat_fast(struct intel_engine_cs *engine) { const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); struct intel_context *ce; struct i915_request *rq; ktime_t t0, t1; u32 times[5]; int err; int i; ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); intel_engine_pm_get(engine); err = intel_engine_set_heartbeat(engine, 1); if (err) goto err_pm; for (i = 0; i < ARRAY_SIZE(times); i++) { do { /* Manufacture a tick */ intel_engine_park_heartbeat(engine); GEM_BUG_ON(engine->heartbeat.systole); engine->serial++; /* pretend we are not idle! */ intel_engine_unpark_heartbeat(engine); flush_delayed_work(&engine->heartbeat.work); if (!delayed_work_pending(&engine->heartbeat.work)) { pr_err("%s: heartbeat %d did not start\n", engine->name, i); err = -EINVAL; goto err_pm; } rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); if (rq) rq = i915_request_get_rcu(rq); rcu_read_unlock(); } while (!rq); t0 = ktime_get(); while (rq == READ_ONCE(engine->heartbeat.systole)) yield(); /* work is on the local cpu! */ t1 = ktime_get(); i915_request_put(rq); times[i] = ktime_us_delta(t1, t0); } sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", engine->name, times[ARRAY_SIZE(times) / 2], times[0], times[ARRAY_SIZE(times) - 1]); /* * Ideally, the upper bound on min work delay would be something like * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we * are, even with system_wq_highpri, at the mercy of the CPU scheduler * and may be stuck behind some slow work for many millisecond. Such * as our very own display workers. */ if (times[ARRAY_SIZE(times) / 2] > error_threshold) { pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", engine->name, times[ARRAY_SIZE(times) / 2], error_threshold); err = -EINVAL; } reset_heartbeat(engine); err_pm: intel_engine_pm_put(engine); intel_context_put(ce); return err; } static int live_heartbeat_fast(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; int err = 0; /* Check that the heartbeat ticks at the desired rate. */ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return 0; for_each_engine(engine, gt, id) { err = __live_heartbeat_fast(engine); if (err) break; } return err; } static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; intel_engine_pm_get(engine); engine->serial++; flush_delayed_work(&engine->heartbeat.work); if (!delayed_work_pending(&engine->heartbeat.work)) { pr_err("%s: heartbeat not running\n", engine->name); err = -EINVAL; goto err_pm; } err = intel_engine_set_heartbeat(engine, 0); if (err) goto err_pm; engine->serial++; flush_delayed_work(&engine->heartbeat.work); if (delayed_work_pending(&engine->heartbeat.work)) { pr_err("%s: heartbeat still running\n", engine->name); err = -EINVAL; goto err_beat; } if (READ_ONCE(engine->heartbeat.systole)) { pr_err("%s: heartbeat still allocated\n", engine->name); err = -EINVAL; goto err_beat; } err_beat: reset_heartbeat(engine); err_pm: intel_engine_pm_put(engine); return err; } static int live_heartbeat_off(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; int err = 0; /* Check that we can turn off heartbeat and not interrupt VIP */ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return 0; for_each_engine(engine, gt, id) { if (!intel_engine_has_preemption(engine)) continue; err = __live_heartbeat_off(engine); if (err) break; } return err; } int intel_heartbeat_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), SUBTEST(live_heartbeat_fast), SUBTEST(live_heartbeat_off), }; int saved_hangcheck; int err; if (intel_gt_is_wedged(&i915->gt)) return 0; saved_hangcheck = i915->params.enable_hangcheck; i915->params.enable_hangcheck = INT_MAX; err = intel_gt_live_subtests(tests, &i915->gt); i915->params.enable_hangcheck = saved_hangcheck; return err; } void st_engine_heartbeat_disable(struct intel_engine_cs *engine) { engine->props.heartbeat_interval_ms = 0; intel_engine_pm_get(engine); intel_engine_park_heartbeat(engine); } void st_engine_heartbeat_enable(struct intel_engine_cs *engine) { intel_engine_pm_put(engine); engine->props.heartbeat_interval_ms = engine->defaults.heartbeat_interval_ms; }