1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_selftest.h" 8 #include "intel_engine_heartbeat.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gt.h" 11 12 #include "gem/selftests/mock_context.h" 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 16 static int request_sync(struct i915_request *rq) 17 { 18 struct intel_timeline *tl = i915_request_timeline(rq); 19 long timeout; 20 int err = 0; 21 22 intel_timeline_get(tl); 23 i915_request_get(rq); 24 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 __i915_request_commit(rq); 27 __i915_request_queue(rq, NULL); 28 29 timeout = i915_request_wait(rq, 0, HZ / 10); 30 if (timeout < 0) 31 err = timeout; 32 else 33 i915_request_retire_upto(rq); 34 35 lockdep_unpin_lock(&tl->mutex, rq->cookie); 36 mutex_unlock(&tl->mutex); 37 38 i915_request_put(rq); 39 intel_timeline_put(tl); 40 41 return err; 42 } 43 44 static int context_sync(struct intel_context *ce) 45 { 46 struct intel_timeline *tl = ce->timeline; 47 int err = 0; 48 49 mutex_lock(&tl->mutex); 50 do { 51 struct i915_request *rq; 52 long timeout; 53 54 if (list_empty(&tl->requests)) 55 break; 56 57 rq = list_last_entry(&tl->requests, typeof(*rq), link); 58 i915_request_get(rq); 59 60 timeout = i915_request_wait(rq, 0, HZ / 10); 61 if (timeout < 0) 62 err = timeout; 63 else 64 i915_request_retire_upto(rq); 65 66 i915_request_put(rq); 67 } while (!err); 68 mutex_unlock(&tl->mutex); 69 70 return err; 71 } 72 73 static int __live_context_size(struct intel_engine_cs *engine, 74 struct i915_gem_context *fixme) 75 { 76 struct intel_context *ce; 77 struct i915_request *rq; 78 void *vaddr; 79 int err; 80 81 ce = intel_context_create(fixme, engine); 82 if (IS_ERR(ce)) 83 return PTR_ERR(ce); 84 85 err = intel_context_pin(ce); 86 if (err) 87 goto err; 88 89 vaddr = i915_gem_object_pin_map(ce->state->obj, 90 i915_coherent_map_type(engine->i915)); 91 if (IS_ERR(vaddr)) { 92 err = PTR_ERR(vaddr); 93 intel_context_unpin(ce); 94 goto err; 95 } 96 97 /* 98 * Note that execlists also applies a redzone which it checks on 99 * context unpin when debugging. We are using the same location 100 * and same poison value so that our checks overlap. Despite the 101 * redundancy, we want to keep this little selftest so that we 102 * get coverage of any and all submission backends, and we can 103 * always extend this test to ensure we trick the HW into a 104 * compromising position wrt to the various sections that need 105 * to be written into the context state. 106 * 107 * TLDR; this overlaps with the execlists redzone. 108 */ 109 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 110 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 111 112 rq = intel_context_create_request(ce); 113 intel_context_unpin(ce); 114 if (IS_ERR(rq)) { 115 err = PTR_ERR(rq); 116 goto err_unpin; 117 } 118 119 err = request_sync(rq); 120 if (err) 121 goto err_unpin; 122 123 /* Force the context switch */ 124 rq = i915_request_create(engine->kernel_context); 125 if (IS_ERR(rq)) { 126 err = PTR_ERR(rq); 127 goto err_unpin; 128 } 129 err = request_sync(rq); 130 if (err) 131 goto err_unpin; 132 133 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 134 pr_err("%s context overwrote trailing red-zone!", engine->name); 135 err = -EINVAL; 136 } 137 138 err_unpin: 139 i915_gem_object_unpin_map(ce->state->obj); 140 err: 141 intel_context_put(ce); 142 return err; 143 } 144 145 static int live_context_size(void *arg) 146 { 147 struct intel_gt *gt = arg; 148 struct intel_engine_cs *engine; 149 struct i915_gem_context *fixme; 150 enum intel_engine_id id; 151 int err = 0; 152 153 /* 154 * Check that our context sizes are correct by seeing if the 155 * HW tries to write past the end of one. 156 */ 157 158 fixme = kernel_context(gt->i915); 159 if (IS_ERR(fixme)) 160 return PTR_ERR(fixme); 161 162 for_each_engine(engine, gt, id) { 163 struct { 164 struct drm_i915_gem_object *state; 165 void *pinned; 166 } saved; 167 168 if (!engine->context_size) 169 continue; 170 171 intel_engine_pm_get(engine); 172 173 /* 174 * Hide the old default state -- we lie about the context size 175 * and get confused when the default state is smaller than 176 * expected. For our do nothing request, inheriting the 177 * active state is sufficient, we are only checking that we 178 * don't use more than we planned. 179 */ 180 saved.state = fetch_and_zero(&engine->default_state); 181 saved.pinned = fetch_and_zero(&engine->pinned_default_state); 182 183 /* Overlaps with the execlists redzone */ 184 engine->context_size += I915_GTT_PAGE_SIZE; 185 186 err = __live_context_size(engine, fixme); 187 188 engine->context_size -= I915_GTT_PAGE_SIZE; 189 190 engine->pinned_default_state = saved.pinned; 191 engine->default_state = saved.state; 192 193 intel_engine_pm_put(engine); 194 195 if (err) 196 break; 197 } 198 199 kernel_context_close(fixme); 200 return err; 201 } 202 203 static int __live_active_context(struct intel_engine_cs *engine, 204 struct i915_gem_context *fixme) 205 { 206 unsigned long saved_heartbeat; 207 struct intel_context *ce; 208 int pass; 209 int err; 210 211 /* 212 * We keep active contexts alive until after a subsequent context 213 * switch as the final write from the context-save will be after 214 * we retire the final request. We track when we unpin the context, 215 * under the presumption that the final pin is from the last request, 216 * and instead of immediately unpinning the context, we add a task 217 * to unpin the context from the next idle-barrier. 218 * 219 * This test makes sure that the context is kept alive until a 220 * subsequent idle-barrier (emitted when the engine wakeref hits 0 221 * with no more outstanding requests). 222 */ 223 224 if (intel_engine_pm_is_awake(engine)) { 225 pr_err("%s is awake before starting %s!\n", 226 engine->name, __func__); 227 return -EINVAL; 228 } 229 230 ce = intel_context_create(fixme, engine); 231 if (IS_ERR(ce)) 232 return PTR_ERR(ce); 233 234 saved_heartbeat = engine->props.heartbeat_interval_ms; 235 engine->props.heartbeat_interval_ms = 0; 236 237 for (pass = 0; pass <= 2; pass++) { 238 struct i915_request *rq; 239 240 intel_engine_pm_get(engine); 241 242 rq = intel_context_create_request(ce); 243 if (IS_ERR(rq)) { 244 err = PTR_ERR(rq); 245 goto out_engine; 246 } 247 248 err = request_sync(rq); 249 if (err) 250 goto out_engine; 251 252 /* Context will be kept active until after an idle-barrier. */ 253 if (i915_active_is_idle(&ce->active)) { 254 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 255 engine->name, pass); 256 err = -EINVAL; 257 goto out_engine; 258 } 259 260 if (!intel_engine_pm_is_awake(engine)) { 261 pr_err("%s is asleep before idle-barrier\n", 262 engine->name); 263 err = -EINVAL; 264 goto out_engine; 265 } 266 267 out_engine: 268 intel_engine_pm_put(engine); 269 if (err) 270 goto err; 271 } 272 273 /* Now make sure our idle-barriers are flushed */ 274 err = intel_engine_flush_barriers(engine); 275 if (err) 276 goto err; 277 278 /* Wait for the barrier and in the process wait for engine to park */ 279 err = context_sync(engine->kernel_context); 280 if (err) 281 goto err; 282 283 if (!i915_active_is_idle(&ce->active)) { 284 pr_err("context is still active!"); 285 err = -EINVAL; 286 } 287 288 intel_engine_pm_flush(engine); 289 290 if (intel_engine_pm_is_awake(engine)) { 291 struct drm_printer p = drm_debug_printer(__func__); 292 293 intel_engine_dump(engine, &p, 294 "%s is still awake:%d after idle-barriers\n", 295 engine->name, 296 atomic_read(&engine->wakeref.count)); 297 GEM_TRACE_DUMP(); 298 299 err = -EINVAL; 300 goto err; 301 } 302 303 err: 304 engine->props.heartbeat_interval_ms = saved_heartbeat; 305 intel_context_put(ce); 306 return err; 307 } 308 309 static int live_active_context(void *arg) 310 { 311 struct intel_gt *gt = arg; 312 struct intel_engine_cs *engine; 313 struct i915_gem_context *fixme; 314 enum intel_engine_id id; 315 struct file *file; 316 int err = 0; 317 318 file = mock_file(gt->i915); 319 if (IS_ERR(file)) 320 return PTR_ERR(file); 321 322 fixme = live_context(gt->i915, file); 323 if (IS_ERR(fixme)) { 324 err = PTR_ERR(fixme); 325 goto out_file; 326 } 327 328 for_each_engine(engine, gt, id) { 329 err = __live_active_context(engine, fixme); 330 if (err) 331 break; 332 333 err = igt_flush_test(gt->i915); 334 if (err) 335 break; 336 } 337 338 out_file: 339 fput(file); 340 return err; 341 } 342 343 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 344 { 345 struct i915_request *rq; 346 int err; 347 348 err = intel_context_pin(remote); 349 if (err) 350 return err; 351 352 rq = intel_context_create_request(ce); 353 if (IS_ERR(rq)) { 354 err = PTR_ERR(rq); 355 goto unpin; 356 } 357 358 err = intel_context_prepare_remote_request(remote, rq); 359 if (err) { 360 i915_request_add(rq); 361 goto unpin; 362 } 363 364 err = request_sync(rq); 365 366 unpin: 367 intel_context_unpin(remote); 368 return err; 369 } 370 371 static int __live_remote_context(struct intel_engine_cs *engine, 372 struct i915_gem_context *fixme) 373 { 374 struct intel_context *local, *remote; 375 unsigned long saved_heartbeat; 376 int pass; 377 int err; 378 379 /* 380 * Check that our idle barriers do not interfere with normal 381 * activity tracking. In particular, check that operating 382 * on the context image remotely (intel_context_prepare_remote_request), 383 * which inserts foreign fences into intel_context.active, does not 384 * clobber the idle-barrier. 385 */ 386 387 if (intel_engine_pm_is_awake(engine)) { 388 pr_err("%s is awake before starting %s!\n", 389 engine->name, __func__); 390 return -EINVAL; 391 } 392 393 remote = intel_context_create(fixme, engine); 394 if (IS_ERR(remote)) 395 return PTR_ERR(remote); 396 397 local = intel_context_create(fixme, engine); 398 if (IS_ERR(local)) { 399 err = PTR_ERR(local); 400 goto err_remote; 401 } 402 403 saved_heartbeat = engine->props.heartbeat_interval_ms; 404 engine->props.heartbeat_interval_ms = 0; 405 intel_engine_pm_get(engine); 406 407 for (pass = 0; pass <= 2; pass++) { 408 err = __remote_sync(local, remote); 409 if (err) 410 break; 411 412 err = __remote_sync(engine->kernel_context, remote); 413 if (err) 414 break; 415 416 if (i915_active_is_idle(&remote->active)) { 417 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 418 engine->name, pass); 419 err = -EINVAL; 420 break; 421 } 422 } 423 424 intel_engine_pm_put(engine); 425 engine->props.heartbeat_interval_ms = saved_heartbeat; 426 427 intel_context_put(local); 428 err_remote: 429 intel_context_put(remote); 430 return err; 431 } 432 433 static int live_remote_context(void *arg) 434 { 435 struct intel_gt *gt = arg; 436 struct intel_engine_cs *engine; 437 struct i915_gem_context *fixme; 438 enum intel_engine_id id; 439 struct file *file; 440 int err = 0; 441 442 file = mock_file(gt->i915); 443 if (IS_ERR(file)) 444 return PTR_ERR(file); 445 446 fixme = live_context(gt->i915, file); 447 if (IS_ERR(fixme)) { 448 err = PTR_ERR(fixme); 449 goto out_file; 450 } 451 452 for_each_engine(engine, gt, id) { 453 err = __live_remote_context(engine, fixme); 454 if (err) 455 break; 456 457 err = igt_flush_test(gt->i915); 458 if (err) 459 break; 460 } 461 462 out_file: 463 fput(file); 464 return err; 465 } 466 467 int intel_context_live_selftests(struct drm_i915_private *i915) 468 { 469 static const struct i915_subtest tests[] = { 470 SUBTEST(live_context_size), 471 SUBTEST(live_active_context), 472 SUBTEST(live_remote_context), 473 }; 474 struct intel_gt *gt = &i915->gt; 475 476 if (intel_gt_is_wedged(gt)) 477 return 0; 478 479 return intel_gt_live_subtests(tests, gt); 480 } 481