1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 #include "intel_engine_heartbeat.h" 8 #include "intel_engine_pm.h" 9 #include "intel_gt.h" 10 11 #include "gem/selftests/mock_context.h" 12 #include "selftests/igt_flush_test.h" 13 #include "selftests/mock_drm.h" 14 15 static int request_sync(struct i915_request *rq) 16 { 17 struct intel_timeline *tl = i915_request_timeline(rq); 18 long timeout; 19 int err = 0; 20 21 intel_timeline_get(tl); 22 i915_request_get(rq); 23 24 /* Opencode i915_request_add() so we can keep the timeline locked. */ 25 __i915_request_commit(rq); 26 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 27 __i915_request_queue_bh(rq); 28 29 timeout = i915_request_wait(rq, 0, HZ / 10); 30 if (timeout < 0) 31 err = timeout; 32 else 33 i915_request_retire_upto(rq); 34 35 lockdep_unpin_lock(&tl->mutex, rq->cookie); 36 mutex_unlock(&tl->mutex); 37 38 i915_request_put(rq); 39 intel_timeline_put(tl); 40 41 return err; 42 } 43 44 static int context_sync(struct intel_context *ce) 45 { 46 struct intel_timeline *tl = ce->timeline; 47 int err = 0; 48 49 mutex_lock(&tl->mutex); 50 do { 51 struct i915_request *rq; 52 long timeout; 53 54 if (list_empty(&tl->requests)) 55 break; 56 57 rq = list_last_entry(&tl->requests, typeof(*rq), link); 58 i915_request_get(rq); 59 60 timeout = i915_request_wait(rq, 0, HZ / 10); 61 if (timeout < 0) 62 err = timeout; 63 else 64 i915_request_retire_upto(rq); 65 66 i915_request_put(rq); 67 } while (!err); 68 mutex_unlock(&tl->mutex); 69 70 /* Wait for all barriers to complete (remote CPU) before we check */ 71 i915_active_unlock_wait(&ce->active); 72 return err; 73 } 74 75 static int __live_context_size(struct intel_engine_cs *engine) 76 { 77 struct intel_context *ce; 78 struct i915_request *rq; 79 void *vaddr; 80 int err; 81 82 ce = intel_context_create(engine); 83 if (IS_ERR(ce)) 84 return PTR_ERR(ce); 85 86 err = intel_context_pin(ce); 87 if (err) 88 goto err; 89 90 vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, 91 i915_coherent_map_type(engine->i915)); 92 if (IS_ERR(vaddr)) { 93 err = PTR_ERR(vaddr); 94 intel_context_unpin(ce); 95 goto err; 96 } 97 98 /* 99 * Note that execlists also applies a redzone which it checks on 100 * context unpin when debugging. We are using the same location 101 * and same poison value so that our checks overlap. Despite the 102 * redundancy, we want to keep this little selftest so that we 103 * get coverage of any and all submission backends, and we can 104 * always extend this test to ensure we trick the HW into a 105 * compromising position wrt to the various sections that need 106 * to be written into the context state. 107 * 108 * TLDR; this overlaps with the execlists redzone. 109 */ 110 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 111 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 112 113 rq = intel_context_create_request(ce); 114 intel_context_unpin(ce); 115 if (IS_ERR(rq)) { 116 err = PTR_ERR(rq); 117 goto err_unpin; 118 } 119 120 err = request_sync(rq); 121 if (err) 122 goto err_unpin; 123 124 /* Force the context switch */ 125 rq = intel_engine_create_kernel_request(engine); 126 if (IS_ERR(rq)) { 127 err = PTR_ERR(rq); 128 goto err_unpin; 129 } 130 err = request_sync(rq); 131 if (err) 132 goto err_unpin; 133 134 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 135 pr_err("%s context overwrote trailing red-zone!", engine->name); 136 err = -EINVAL; 137 } 138 139 err_unpin: 140 i915_gem_object_unpin_map(ce->state->obj); 141 err: 142 intel_context_put(ce); 143 return err; 144 } 145 146 static int live_context_size(void *arg) 147 { 148 struct intel_gt *gt = arg; 149 struct intel_engine_cs *engine; 150 enum intel_engine_id id; 151 int err = 0; 152 153 /* 154 * Check that our context sizes are correct by seeing if the 155 * HW tries to write past the end of one. 156 */ 157 158 for_each_engine(engine, gt, id) { 159 struct file *saved; 160 161 if (!engine->context_size) 162 continue; 163 164 intel_engine_pm_get(engine); 165 166 /* 167 * Hide the old default state -- we lie about the context size 168 * and get confused when the default state is smaller than 169 * expected. For our do nothing request, inheriting the 170 * active state is sufficient, we are only checking that we 171 * don't use more than we planned. 172 */ 173 saved = fetch_and_zero(&engine->default_state); 174 175 /* Overlaps with the execlists redzone */ 176 engine->context_size += I915_GTT_PAGE_SIZE; 177 178 err = __live_context_size(engine); 179 180 engine->context_size -= I915_GTT_PAGE_SIZE; 181 182 engine->default_state = saved; 183 184 intel_engine_pm_put(engine); 185 186 if (err) 187 break; 188 } 189 190 return err; 191 } 192 193 static int __live_active_context(struct intel_engine_cs *engine) 194 { 195 unsigned long saved_heartbeat; 196 struct intel_context *ce; 197 int pass; 198 int err; 199 200 /* 201 * We keep active contexts alive until after a subsequent context 202 * switch as the final write from the context-save will be after 203 * we retire the final request. We track when we unpin the context, 204 * under the presumption that the final pin is from the last request, 205 * and instead of immediately unpinning the context, we add a task 206 * to unpin the context from the next idle-barrier. 207 * 208 * This test makes sure that the context is kept alive until a 209 * subsequent idle-barrier (emitted when the engine wakeref hits 0 210 * with no more outstanding requests). 211 */ 212 213 if (intel_engine_pm_is_awake(engine)) { 214 pr_err("%s is awake before starting %s!\n", 215 engine->name, __func__); 216 return -EINVAL; 217 } 218 219 ce = intel_context_create(engine); 220 if (IS_ERR(ce)) 221 return PTR_ERR(ce); 222 223 saved_heartbeat = engine->props.heartbeat_interval_ms; 224 engine->props.heartbeat_interval_ms = 0; 225 226 for (pass = 0; pass <= 2; pass++) { 227 struct i915_request *rq; 228 229 intel_engine_pm_get(engine); 230 231 rq = intel_context_create_request(ce); 232 if (IS_ERR(rq)) { 233 err = PTR_ERR(rq); 234 goto out_engine; 235 } 236 237 err = request_sync(rq); 238 if (err) 239 goto out_engine; 240 241 /* Context will be kept active until after an idle-barrier. */ 242 if (i915_active_is_idle(&ce->active)) { 243 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 244 engine->name, pass); 245 err = -EINVAL; 246 goto out_engine; 247 } 248 249 if (!intel_engine_pm_is_awake(engine)) { 250 pr_err("%s is asleep before idle-barrier\n", 251 engine->name); 252 err = -EINVAL; 253 goto out_engine; 254 } 255 256 out_engine: 257 intel_engine_pm_put(engine); 258 if (err) 259 goto err; 260 } 261 262 /* Now make sure our idle-barriers are flushed */ 263 err = intel_engine_flush_barriers(engine); 264 if (err) 265 goto err; 266 267 /* Wait for the barrier and in the process wait for engine to park */ 268 err = context_sync(engine->kernel_context); 269 if (err) 270 goto err; 271 272 if (!i915_active_is_idle(&ce->active)) { 273 pr_err("context is still active!"); 274 err = -EINVAL; 275 } 276 277 intel_engine_pm_flush(engine); 278 279 if (intel_engine_pm_is_awake(engine)) { 280 struct drm_printer p = drm_debug_printer(__func__); 281 282 intel_engine_dump(engine, &p, 283 "%s is still awake:%d after idle-barriers\n", 284 engine->name, 285 atomic_read(&engine->wakeref.count)); 286 GEM_TRACE_DUMP(); 287 288 err = -EINVAL; 289 goto err; 290 } 291 292 err: 293 engine->props.heartbeat_interval_ms = saved_heartbeat; 294 intel_context_put(ce); 295 return err; 296 } 297 298 static int live_active_context(void *arg) 299 { 300 struct intel_gt *gt = arg; 301 struct intel_engine_cs *engine; 302 enum intel_engine_id id; 303 int err = 0; 304 305 for_each_engine(engine, gt, id) { 306 err = __live_active_context(engine); 307 if (err) 308 break; 309 310 err = igt_flush_test(gt->i915); 311 if (err) 312 break; 313 } 314 315 return err; 316 } 317 318 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 319 { 320 struct i915_request *rq; 321 int err; 322 323 err = intel_context_pin(remote); 324 if (err) 325 return err; 326 327 rq = intel_context_create_request(ce); 328 if (IS_ERR(rq)) { 329 err = PTR_ERR(rq); 330 goto unpin; 331 } 332 333 err = intel_context_prepare_remote_request(remote, rq); 334 if (err) { 335 i915_request_add(rq); 336 goto unpin; 337 } 338 339 err = request_sync(rq); 340 341 unpin: 342 intel_context_unpin(remote); 343 return err; 344 } 345 346 static int __live_remote_context(struct intel_engine_cs *engine) 347 { 348 struct intel_context *local, *remote; 349 unsigned long saved_heartbeat; 350 int pass; 351 int err; 352 353 /* 354 * Check that our idle barriers do not interfere with normal 355 * activity tracking. In particular, check that operating 356 * on the context image remotely (intel_context_prepare_remote_request), 357 * which inserts foreign fences into intel_context.active, does not 358 * clobber the idle-barrier. 359 */ 360 361 if (intel_engine_pm_is_awake(engine)) { 362 pr_err("%s is awake before starting %s!\n", 363 engine->name, __func__); 364 return -EINVAL; 365 } 366 367 remote = intel_context_create(engine); 368 if (IS_ERR(remote)) 369 return PTR_ERR(remote); 370 371 local = intel_context_create(engine); 372 if (IS_ERR(local)) { 373 err = PTR_ERR(local); 374 goto err_remote; 375 } 376 377 saved_heartbeat = engine->props.heartbeat_interval_ms; 378 engine->props.heartbeat_interval_ms = 0; 379 intel_engine_pm_get(engine); 380 381 for (pass = 0; pass <= 2; pass++) { 382 err = __remote_sync(local, remote); 383 if (err) 384 break; 385 386 err = __remote_sync(engine->kernel_context, remote); 387 if (err) 388 break; 389 390 if (i915_active_is_idle(&remote->active)) { 391 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 392 engine->name, pass); 393 err = -EINVAL; 394 break; 395 } 396 } 397 398 intel_engine_pm_put(engine); 399 engine->props.heartbeat_interval_ms = saved_heartbeat; 400 401 intel_context_put(local); 402 err_remote: 403 intel_context_put(remote); 404 return err; 405 } 406 407 static int live_remote_context(void *arg) 408 { 409 struct intel_gt *gt = arg; 410 struct intel_engine_cs *engine; 411 enum intel_engine_id id; 412 int err = 0; 413 414 for_each_engine(engine, gt, id) { 415 err = __live_remote_context(engine); 416 if (err) 417 break; 418 419 err = igt_flush_test(gt->i915); 420 if (err) 421 break; 422 } 423 424 return err; 425 } 426 427 int intel_context_live_selftests(struct drm_i915_private *i915) 428 { 429 static const struct i915_subtest tests[] = { 430 SUBTEST(live_context_size), 431 SUBTEST(live_active_context), 432 SUBTEST(live_remote_context), 433 }; 434 struct intel_gt *gt = &i915->gt; 435 436 if (intel_gt_is_wedged(gt)) 437 return 0; 438 439 return intel_gt_live_subtests(tests, gt); 440 } 441