1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_selftest.h" 8 #include "intel_engine_heartbeat.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gt.h" 11 12 #include "gem/selftests/mock_context.h" 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 16 static int request_sync(struct i915_request *rq) 17 { 18 struct intel_timeline *tl = i915_request_timeline(rq); 19 long timeout; 20 int err = 0; 21 22 intel_timeline_get(tl); 23 i915_request_get(rq); 24 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 __i915_request_commit(rq); 27 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 28 __i915_request_queue_bh(rq); 29 30 timeout = i915_request_wait(rq, 0, HZ / 10); 31 if (timeout < 0) 32 err = timeout; 33 else 34 i915_request_retire_upto(rq); 35 36 lockdep_unpin_lock(&tl->mutex, rq->cookie); 37 mutex_unlock(&tl->mutex); 38 39 i915_request_put(rq); 40 intel_timeline_put(tl); 41 42 return err; 43 } 44 45 static int context_sync(struct intel_context *ce) 46 { 47 struct intel_timeline *tl = ce->timeline; 48 int err = 0; 49 50 mutex_lock(&tl->mutex); 51 do { 52 struct i915_request *rq; 53 long timeout; 54 55 if (list_empty(&tl->requests)) 56 break; 57 58 rq = list_last_entry(&tl->requests, typeof(*rq), link); 59 i915_request_get(rq); 60 61 timeout = i915_request_wait(rq, 0, HZ / 10); 62 if (timeout < 0) 63 err = timeout; 64 else 65 i915_request_retire_upto(rq); 66 67 i915_request_put(rq); 68 } while (!err); 69 mutex_unlock(&tl->mutex); 70 71 /* Wait for all barriers to complete (remote CPU) before we check */ 72 i915_active_unlock_wait(&ce->active); 73 return err; 74 } 75 76 static int __live_context_size(struct intel_engine_cs *engine) 77 { 78 struct intel_context *ce; 79 struct i915_request *rq; 80 void *vaddr; 81 int err; 82 83 ce = intel_context_create(engine); 84 if (IS_ERR(ce)) 85 return PTR_ERR(ce); 86 87 err = intel_context_pin(ce); 88 if (err) 89 goto err; 90 91 vaddr = i915_gem_object_pin_map(ce->state->obj, 92 i915_coherent_map_type(engine->i915)); 93 if (IS_ERR(vaddr)) { 94 err = PTR_ERR(vaddr); 95 intel_context_unpin(ce); 96 goto err; 97 } 98 99 /* 100 * Note that execlists also applies a redzone which it checks on 101 * context unpin when debugging. We are using the same location 102 * and same poison value so that our checks overlap. Despite the 103 * redundancy, we want to keep this little selftest so that we 104 * get coverage of any and all submission backends, and we can 105 * always extend this test to ensure we trick the HW into a 106 * compromising position wrt to the various sections that need 107 * to be written into the context state. 108 * 109 * TLDR; this overlaps with the execlists redzone. 110 */ 111 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 112 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 113 114 rq = intel_context_create_request(ce); 115 intel_context_unpin(ce); 116 if (IS_ERR(rq)) { 117 err = PTR_ERR(rq); 118 goto err_unpin; 119 } 120 121 err = request_sync(rq); 122 if (err) 123 goto err_unpin; 124 125 /* Force the context switch */ 126 rq = intel_engine_create_kernel_request(engine); 127 if (IS_ERR(rq)) { 128 err = PTR_ERR(rq); 129 goto err_unpin; 130 } 131 err = request_sync(rq); 132 if (err) 133 goto err_unpin; 134 135 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 136 pr_err("%s context overwrote trailing red-zone!", engine->name); 137 err = -EINVAL; 138 } 139 140 err_unpin: 141 i915_gem_object_unpin_map(ce->state->obj); 142 err: 143 intel_context_put(ce); 144 return err; 145 } 146 147 static int live_context_size(void *arg) 148 { 149 struct intel_gt *gt = arg; 150 struct intel_engine_cs *engine; 151 enum intel_engine_id id; 152 int err = 0; 153 154 /* 155 * Check that our context sizes are correct by seeing if the 156 * HW tries to write past the end of one. 157 */ 158 159 for_each_engine(engine, gt, id) { 160 struct file *saved; 161 162 if (!engine->context_size) 163 continue; 164 165 intel_engine_pm_get(engine); 166 167 /* 168 * Hide the old default state -- we lie about the context size 169 * and get confused when the default state is smaller than 170 * expected. For our do nothing request, inheriting the 171 * active state is sufficient, we are only checking that we 172 * don't use more than we planned. 173 */ 174 saved = fetch_and_zero(&engine->default_state); 175 176 /* Overlaps with the execlists redzone */ 177 engine->context_size += I915_GTT_PAGE_SIZE; 178 179 err = __live_context_size(engine); 180 181 engine->context_size -= I915_GTT_PAGE_SIZE; 182 183 engine->default_state = saved; 184 185 intel_engine_pm_put(engine); 186 187 if (err) 188 break; 189 } 190 191 return err; 192 } 193 194 static int __live_active_context(struct intel_engine_cs *engine) 195 { 196 unsigned long saved_heartbeat; 197 struct intel_context *ce; 198 int pass; 199 int err; 200 201 /* 202 * We keep active contexts alive until after a subsequent context 203 * switch as the final write from the context-save will be after 204 * we retire the final request. We track when we unpin the context, 205 * under the presumption that the final pin is from the last request, 206 * and instead of immediately unpinning the context, we add a task 207 * to unpin the context from the next idle-barrier. 208 * 209 * This test makes sure that the context is kept alive until a 210 * subsequent idle-barrier (emitted when the engine wakeref hits 0 211 * with no more outstanding requests). 212 */ 213 214 if (intel_engine_pm_is_awake(engine)) { 215 pr_err("%s is awake before starting %s!\n", 216 engine->name, __func__); 217 return -EINVAL; 218 } 219 220 ce = intel_context_create(engine); 221 if (IS_ERR(ce)) 222 return PTR_ERR(ce); 223 224 saved_heartbeat = engine->props.heartbeat_interval_ms; 225 engine->props.heartbeat_interval_ms = 0; 226 227 for (pass = 0; pass <= 2; pass++) { 228 struct i915_request *rq; 229 230 intel_engine_pm_get(engine); 231 232 rq = intel_context_create_request(ce); 233 if (IS_ERR(rq)) { 234 err = PTR_ERR(rq); 235 goto out_engine; 236 } 237 238 err = request_sync(rq); 239 if (err) 240 goto out_engine; 241 242 /* Context will be kept active until after an idle-barrier. */ 243 if (i915_active_is_idle(&ce->active)) { 244 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 245 engine->name, pass); 246 err = -EINVAL; 247 goto out_engine; 248 } 249 250 if (!intel_engine_pm_is_awake(engine)) { 251 pr_err("%s is asleep before idle-barrier\n", 252 engine->name); 253 err = -EINVAL; 254 goto out_engine; 255 } 256 257 out_engine: 258 intel_engine_pm_put(engine); 259 if (err) 260 goto err; 261 } 262 263 /* Now make sure our idle-barriers are flushed */ 264 err = intel_engine_flush_barriers(engine); 265 if (err) 266 goto err; 267 268 /* Wait for the barrier and in the process wait for engine to park */ 269 err = context_sync(engine->kernel_context); 270 if (err) 271 goto err; 272 273 if (!i915_active_is_idle(&ce->active)) { 274 pr_err("context is still active!"); 275 err = -EINVAL; 276 } 277 278 intel_engine_pm_flush(engine); 279 280 if (intel_engine_pm_is_awake(engine)) { 281 struct drm_printer p = drm_debug_printer(__func__); 282 283 intel_engine_dump(engine, &p, 284 "%s is still awake:%d after idle-barriers\n", 285 engine->name, 286 atomic_read(&engine->wakeref.count)); 287 GEM_TRACE_DUMP(); 288 289 err = -EINVAL; 290 goto err; 291 } 292 293 err: 294 engine->props.heartbeat_interval_ms = saved_heartbeat; 295 intel_context_put(ce); 296 return err; 297 } 298 299 static int live_active_context(void *arg) 300 { 301 struct intel_gt *gt = arg; 302 struct intel_engine_cs *engine; 303 enum intel_engine_id id; 304 int err = 0; 305 306 for_each_engine(engine, gt, id) { 307 err = __live_active_context(engine); 308 if (err) 309 break; 310 311 err = igt_flush_test(gt->i915); 312 if (err) 313 break; 314 } 315 316 return err; 317 } 318 319 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 320 { 321 struct i915_request *rq; 322 int err; 323 324 err = intel_context_pin(remote); 325 if (err) 326 return err; 327 328 rq = intel_context_create_request(ce); 329 if (IS_ERR(rq)) { 330 err = PTR_ERR(rq); 331 goto unpin; 332 } 333 334 err = intel_context_prepare_remote_request(remote, rq); 335 if (err) { 336 i915_request_add(rq); 337 goto unpin; 338 } 339 340 err = request_sync(rq); 341 342 unpin: 343 intel_context_unpin(remote); 344 return err; 345 } 346 347 static int __live_remote_context(struct intel_engine_cs *engine) 348 { 349 struct intel_context *local, *remote; 350 unsigned long saved_heartbeat; 351 int pass; 352 int err; 353 354 /* 355 * Check that our idle barriers do not interfere with normal 356 * activity tracking. In particular, check that operating 357 * on the context image remotely (intel_context_prepare_remote_request), 358 * which inserts foreign fences into intel_context.active, does not 359 * clobber the idle-barrier. 360 */ 361 362 if (intel_engine_pm_is_awake(engine)) { 363 pr_err("%s is awake before starting %s!\n", 364 engine->name, __func__); 365 return -EINVAL; 366 } 367 368 remote = intel_context_create(engine); 369 if (IS_ERR(remote)) 370 return PTR_ERR(remote); 371 372 local = intel_context_create(engine); 373 if (IS_ERR(local)) { 374 err = PTR_ERR(local); 375 goto err_remote; 376 } 377 378 saved_heartbeat = engine->props.heartbeat_interval_ms; 379 engine->props.heartbeat_interval_ms = 0; 380 intel_engine_pm_get(engine); 381 382 for (pass = 0; pass <= 2; pass++) { 383 err = __remote_sync(local, remote); 384 if (err) 385 break; 386 387 err = __remote_sync(engine->kernel_context, remote); 388 if (err) 389 break; 390 391 if (i915_active_is_idle(&remote->active)) { 392 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 393 engine->name, pass); 394 err = -EINVAL; 395 break; 396 } 397 } 398 399 intel_engine_pm_put(engine); 400 engine->props.heartbeat_interval_ms = saved_heartbeat; 401 402 intel_context_put(local); 403 err_remote: 404 intel_context_put(remote); 405 return err; 406 } 407 408 static int live_remote_context(void *arg) 409 { 410 struct intel_gt *gt = arg; 411 struct intel_engine_cs *engine; 412 enum intel_engine_id id; 413 int err = 0; 414 415 for_each_engine(engine, gt, id) { 416 err = __live_remote_context(engine); 417 if (err) 418 break; 419 420 err = igt_flush_test(gt->i915); 421 if (err) 422 break; 423 } 424 425 return err; 426 } 427 428 int intel_context_live_selftests(struct drm_i915_private *i915) 429 { 430 static const struct i915_subtest tests[] = { 431 SUBTEST(live_context_size), 432 SUBTEST(live_active_context), 433 SUBTEST(live_remote_context), 434 }; 435 struct intel_gt *gt = &i915->gt; 436 437 if (intel_gt_is_wedged(gt)) 438 return 0; 439 440 return intel_gt_live_subtests(tests, gt); 441 } 442