1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 #include "intel_engine_heartbeat.h" 8 #include "intel_engine_pm.h" 9 #include "intel_gt.h" 10 11 #include "gem/selftests/mock_context.h" 12 #include "selftests/igt_flush_test.h" 13 #include "selftests/mock_drm.h" 14 15 static int request_sync(struct i915_request *rq) 16 { 17 struct intel_timeline *tl = i915_request_timeline(rq); 18 long timeout; 19 int err = 0; 20 21 intel_timeline_get(tl); 22 i915_request_get(rq); 23 24 /* Opencode i915_request_add() so we can keep the timeline locked. */ 25 __i915_request_commit(rq); 26 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 27 __i915_request_queue_bh(rq); 28 29 timeout = i915_request_wait(rq, 0, HZ / 10); 30 if (timeout < 0) 31 err = timeout; 32 else 33 i915_request_retire_upto(rq); 34 35 lockdep_unpin_lock(&tl->mutex, rq->cookie); 36 mutex_unlock(&tl->mutex); 37 38 i915_request_put(rq); 39 intel_timeline_put(tl); 40 41 return err; 42 } 43 44 static int context_sync(struct intel_context *ce) 45 { 46 struct intel_timeline *tl = ce->timeline; 47 int err = 0; 48 49 mutex_lock(&tl->mutex); 50 do { 51 struct i915_request *rq; 52 long timeout; 53 54 if (list_empty(&tl->requests)) 55 break; 56 57 rq = list_last_entry(&tl->requests, typeof(*rq), link); 58 i915_request_get(rq); 59 60 timeout = i915_request_wait(rq, 0, HZ / 10); 61 if (timeout < 0) 62 err = timeout; 63 else 64 i915_request_retire_upto(rq); 65 66 i915_request_put(rq); 67 } while (!err); 68 mutex_unlock(&tl->mutex); 69 70 /* Wait for all barriers to complete (remote CPU) before we check */ 71 i915_active_unlock_wait(&ce->active); 72 return err; 73 } 74 75 static int __live_context_size(struct intel_engine_cs *engine) 76 { 77 struct intel_context *ce; 78 struct i915_request *rq; 79 void *vaddr; 80 int err; 81 82 ce = intel_context_create(engine); 83 if (IS_ERR(ce)) 84 return PTR_ERR(ce); 85 86 err = intel_context_pin(ce); 87 if (err) 88 goto err; 89 90 vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, 91 intel_gt_coherent_map_type(engine->gt, 92 ce->state->obj, 93 false)); 94 if (IS_ERR(vaddr)) { 95 err = PTR_ERR(vaddr); 96 intel_context_unpin(ce); 97 goto err; 98 } 99 100 /* 101 * Note that execlists also applies a redzone which it checks on 102 * context unpin when debugging. We are using the same location 103 * and same poison value so that our checks overlap. Despite the 104 * redundancy, we want to keep this little selftest so that we 105 * get coverage of any and all submission backends, and we can 106 * always extend this test to ensure we trick the HW into a 107 * compromising position wrt to the various sections that need 108 * to be written into the context state. 109 * 110 * TLDR; this overlaps with the execlists redzone. 111 */ 112 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 113 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 114 115 rq = intel_context_create_request(ce); 116 intel_context_unpin(ce); 117 if (IS_ERR(rq)) { 118 err = PTR_ERR(rq); 119 goto err_unpin; 120 } 121 122 err = request_sync(rq); 123 if (err) 124 goto err_unpin; 125 126 /* Force the context switch */ 127 rq = intel_engine_create_kernel_request(engine); 128 if (IS_ERR(rq)) { 129 err = PTR_ERR(rq); 130 goto err_unpin; 131 } 132 err = request_sync(rq); 133 if (err) 134 goto err_unpin; 135 136 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 137 pr_err("%s context overwrote trailing red-zone!", engine->name); 138 err = -EINVAL; 139 } 140 141 err_unpin: 142 i915_gem_object_unpin_map(ce->state->obj); 143 err: 144 intel_context_put(ce); 145 return err; 146 } 147 148 static int live_context_size(void *arg) 149 { 150 struct intel_gt *gt = arg; 151 struct intel_engine_cs *engine; 152 enum intel_engine_id id; 153 int err = 0; 154 155 /* 156 * Check that our context sizes are correct by seeing if the 157 * HW tries to write past the end of one. 158 */ 159 160 for_each_engine(engine, gt, id) { 161 struct file *saved; 162 163 if (!engine->context_size) 164 continue; 165 166 intel_engine_pm_get(engine); 167 168 /* 169 * Hide the old default state -- we lie about the context size 170 * and get confused when the default state is smaller than 171 * expected. For our do nothing request, inheriting the 172 * active state is sufficient, we are only checking that we 173 * don't use more than we planned. 174 */ 175 saved = fetch_and_zero(&engine->default_state); 176 177 /* Overlaps with the execlists redzone */ 178 engine->context_size += I915_GTT_PAGE_SIZE; 179 180 err = __live_context_size(engine); 181 182 engine->context_size -= I915_GTT_PAGE_SIZE; 183 184 engine->default_state = saved; 185 186 intel_engine_pm_put(engine); 187 188 if (err) 189 break; 190 } 191 192 return err; 193 } 194 195 static int __live_active_context(struct intel_engine_cs *engine) 196 { 197 unsigned long saved_heartbeat; 198 struct intel_context *ce; 199 int pass; 200 int err; 201 202 /* 203 * We keep active contexts alive until after a subsequent context 204 * switch as the final write from the context-save will be after 205 * we retire the final request. We track when we unpin the context, 206 * under the presumption that the final pin is from the last request, 207 * and instead of immediately unpinning the context, we add a task 208 * to unpin the context from the next idle-barrier. 209 * 210 * This test makes sure that the context is kept alive until a 211 * subsequent idle-barrier (emitted when the engine wakeref hits 0 212 * with no more outstanding requests). 213 * 214 * In GuC submission mode we don't use idle barriers and we instead 215 * get a message from the GuC to signal that it is safe to unpin the 216 * context from memory. 217 */ 218 if (intel_engine_uses_guc(engine)) 219 return 0; 220 221 if (intel_engine_pm_is_awake(engine)) { 222 pr_err("%s is awake before starting %s!\n", 223 engine->name, __func__); 224 return -EINVAL; 225 } 226 227 ce = intel_context_create(engine); 228 if (IS_ERR(ce)) 229 return PTR_ERR(ce); 230 231 saved_heartbeat = engine->props.heartbeat_interval_ms; 232 engine->props.heartbeat_interval_ms = 0; 233 234 for (pass = 0; pass <= 2; pass++) { 235 struct i915_request *rq; 236 237 intel_engine_pm_get(engine); 238 239 rq = intel_context_create_request(ce); 240 if (IS_ERR(rq)) { 241 err = PTR_ERR(rq); 242 goto out_engine; 243 } 244 245 err = request_sync(rq); 246 if (err) 247 goto out_engine; 248 249 /* Context will be kept active until after an idle-barrier. */ 250 if (i915_active_is_idle(&ce->active)) { 251 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 252 engine->name, pass); 253 err = -EINVAL; 254 goto out_engine; 255 } 256 257 if (!intel_engine_pm_is_awake(engine)) { 258 pr_err("%s is asleep before idle-barrier\n", 259 engine->name); 260 err = -EINVAL; 261 goto out_engine; 262 } 263 264 out_engine: 265 intel_engine_pm_put(engine); 266 if (err) 267 goto err; 268 } 269 270 /* Now make sure our idle-barriers are flushed */ 271 err = intel_engine_flush_barriers(engine); 272 if (err) 273 goto err; 274 275 /* Wait for the barrier and in the process wait for engine to park */ 276 err = context_sync(engine->kernel_context); 277 if (err) 278 goto err; 279 280 if (!i915_active_is_idle(&ce->active)) { 281 pr_err("context is still active!"); 282 err = -EINVAL; 283 } 284 285 intel_engine_pm_flush(engine); 286 287 if (intel_engine_pm_is_awake(engine)) { 288 struct drm_printer p = drm_debug_printer(__func__); 289 290 intel_engine_dump(engine, &p, 291 "%s is still awake:%d after idle-barriers\n", 292 engine->name, 293 atomic_read(&engine->wakeref.count)); 294 GEM_TRACE_DUMP(); 295 296 err = -EINVAL; 297 goto err; 298 } 299 300 err: 301 engine->props.heartbeat_interval_ms = saved_heartbeat; 302 intel_context_put(ce); 303 return err; 304 } 305 306 static int live_active_context(void *arg) 307 { 308 struct intel_gt *gt = arg; 309 struct intel_engine_cs *engine; 310 enum intel_engine_id id; 311 int err = 0; 312 313 for_each_engine(engine, gt, id) { 314 err = __live_active_context(engine); 315 if (err) 316 break; 317 318 err = igt_flush_test(gt->i915); 319 if (err) 320 break; 321 } 322 323 return err; 324 } 325 326 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 327 { 328 struct i915_request *rq; 329 int err; 330 331 err = intel_context_pin(remote); 332 if (err) 333 return err; 334 335 rq = intel_context_create_request(ce); 336 if (IS_ERR(rq)) { 337 err = PTR_ERR(rq); 338 goto unpin; 339 } 340 341 err = intel_context_prepare_remote_request(remote, rq); 342 if (err) { 343 i915_request_add(rq); 344 goto unpin; 345 } 346 347 err = request_sync(rq); 348 349 unpin: 350 intel_context_unpin(remote); 351 return err; 352 } 353 354 static int __live_remote_context(struct intel_engine_cs *engine) 355 { 356 struct intel_context *local, *remote; 357 unsigned long saved_heartbeat; 358 int pass; 359 int err; 360 361 /* 362 * Check that our idle barriers do not interfere with normal 363 * activity tracking. In particular, check that operating 364 * on the context image remotely (intel_context_prepare_remote_request), 365 * which inserts foreign fences into intel_context.active, does not 366 * clobber the idle-barrier. 367 * 368 * In GuC submission mode we don't use idle barriers. 369 */ 370 if (intel_engine_uses_guc(engine)) 371 return 0; 372 373 if (intel_engine_pm_is_awake(engine)) { 374 pr_err("%s is awake before starting %s!\n", 375 engine->name, __func__); 376 return -EINVAL; 377 } 378 379 remote = intel_context_create(engine); 380 if (IS_ERR(remote)) 381 return PTR_ERR(remote); 382 383 local = intel_context_create(engine); 384 if (IS_ERR(local)) { 385 err = PTR_ERR(local); 386 goto err_remote; 387 } 388 389 saved_heartbeat = engine->props.heartbeat_interval_ms; 390 engine->props.heartbeat_interval_ms = 0; 391 intel_engine_pm_get(engine); 392 393 for (pass = 0; pass <= 2; pass++) { 394 err = __remote_sync(local, remote); 395 if (err) 396 break; 397 398 err = __remote_sync(engine->kernel_context, remote); 399 if (err) 400 break; 401 402 if (i915_active_is_idle(&remote->active)) { 403 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 404 engine->name, pass); 405 err = -EINVAL; 406 break; 407 } 408 } 409 410 intel_engine_pm_put(engine); 411 engine->props.heartbeat_interval_ms = saved_heartbeat; 412 413 intel_context_put(local); 414 err_remote: 415 intel_context_put(remote); 416 return err; 417 } 418 419 static int live_remote_context(void *arg) 420 { 421 struct intel_gt *gt = arg; 422 struct intel_engine_cs *engine; 423 enum intel_engine_id id; 424 int err = 0; 425 426 for_each_engine(engine, gt, id) { 427 err = __live_remote_context(engine); 428 if (err) 429 break; 430 431 err = igt_flush_test(gt->i915); 432 if (err) 433 break; 434 } 435 436 return err; 437 } 438 439 int intel_context_live_selftests(struct drm_i915_private *i915) 440 { 441 static const struct i915_subtest tests[] = { 442 SUBTEST(live_context_size), 443 SUBTEST(live_active_context), 444 SUBTEST(live_remote_context), 445 }; 446 struct intel_gt *gt = to_gt(i915); 447 448 if (intel_gt_is_wedged(gt)) 449 return 0; 450 451 return intel_gt_live_subtests(tests, gt); 452 } 453