1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_selftest.h" 8 #include "intel_engine_heartbeat.h" 9 #include "intel_engine_pm.h" 10 #include "intel_gt.h" 11 12 #include "gem/selftests/mock_context.h" 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 16 static int request_sync(struct i915_request *rq) 17 { 18 struct intel_timeline *tl = i915_request_timeline(rq); 19 long timeout; 20 int err = 0; 21 22 intel_timeline_get(tl); 23 i915_request_get(rq); 24 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 __i915_request_commit(rq); 27 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 28 __i915_request_queue(rq, NULL); 29 30 timeout = i915_request_wait(rq, 0, HZ / 10); 31 if (timeout < 0) 32 err = timeout; 33 else 34 i915_request_retire_upto(rq); 35 36 lockdep_unpin_lock(&tl->mutex, rq->cookie); 37 mutex_unlock(&tl->mutex); 38 39 i915_request_put(rq); 40 intel_timeline_put(tl); 41 42 return err; 43 } 44 45 static int context_sync(struct intel_context *ce) 46 { 47 struct intel_timeline *tl = ce->timeline; 48 int err = 0; 49 50 mutex_lock(&tl->mutex); 51 do { 52 struct i915_request *rq; 53 long timeout; 54 55 if (list_empty(&tl->requests)) 56 break; 57 58 rq = list_last_entry(&tl->requests, typeof(*rq), link); 59 i915_request_get(rq); 60 61 timeout = i915_request_wait(rq, 0, HZ / 10); 62 if (timeout < 0) 63 err = timeout; 64 else 65 i915_request_retire_upto(rq); 66 67 i915_request_put(rq); 68 } while (!err); 69 mutex_unlock(&tl->mutex); 70 71 return err; 72 } 73 74 static int __live_context_size(struct intel_engine_cs *engine) 75 { 76 struct intel_context *ce; 77 struct i915_request *rq; 78 void *vaddr; 79 int err; 80 81 ce = intel_context_create(engine); 82 if (IS_ERR(ce)) 83 return PTR_ERR(ce); 84 85 err = intel_context_pin(ce); 86 if (err) 87 goto err; 88 89 vaddr = i915_gem_object_pin_map(ce->state->obj, 90 i915_coherent_map_type(engine->i915)); 91 if (IS_ERR(vaddr)) { 92 err = PTR_ERR(vaddr); 93 intel_context_unpin(ce); 94 goto err; 95 } 96 97 /* 98 * Note that execlists also applies a redzone which it checks on 99 * context unpin when debugging. We are using the same location 100 * and same poison value so that our checks overlap. Despite the 101 * redundancy, we want to keep this little selftest so that we 102 * get coverage of any and all submission backends, and we can 103 * always extend this test to ensure we trick the HW into a 104 * compromising position wrt to the various sections that need 105 * to be written into the context state. 106 * 107 * TLDR; this overlaps with the execlists redzone. 108 */ 109 vaddr += engine->context_size - I915_GTT_PAGE_SIZE; 110 memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE); 111 112 rq = intel_context_create_request(ce); 113 intel_context_unpin(ce); 114 if (IS_ERR(rq)) { 115 err = PTR_ERR(rq); 116 goto err_unpin; 117 } 118 119 err = request_sync(rq); 120 if (err) 121 goto err_unpin; 122 123 /* Force the context switch */ 124 rq = intel_engine_create_kernel_request(engine); 125 if (IS_ERR(rq)) { 126 err = PTR_ERR(rq); 127 goto err_unpin; 128 } 129 err = request_sync(rq); 130 if (err) 131 goto err_unpin; 132 133 if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) { 134 pr_err("%s context overwrote trailing red-zone!", engine->name); 135 err = -EINVAL; 136 } 137 138 err_unpin: 139 i915_gem_object_unpin_map(ce->state->obj); 140 err: 141 intel_context_put(ce); 142 return err; 143 } 144 145 static int live_context_size(void *arg) 146 { 147 struct intel_gt *gt = arg; 148 struct intel_engine_cs *engine; 149 enum intel_engine_id id; 150 int err = 0; 151 152 /* 153 * Check that our context sizes are correct by seeing if the 154 * HW tries to write past the end of one. 155 */ 156 157 for_each_engine(engine, gt, id) { 158 struct file *saved; 159 160 if (!engine->context_size) 161 continue; 162 163 intel_engine_pm_get(engine); 164 165 /* 166 * Hide the old default state -- we lie about the context size 167 * and get confused when the default state is smaller than 168 * expected. For our do nothing request, inheriting the 169 * active state is sufficient, we are only checking that we 170 * don't use more than we planned. 171 */ 172 saved = fetch_and_zero(&engine->default_state); 173 174 /* Overlaps with the execlists redzone */ 175 engine->context_size += I915_GTT_PAGE_SIZE; 176 177 err = __live_context_size(engine); 178 179 engine->context_size -= I915_GTT_PAGE_SIZE; 180 181 engine->default_state = saved; 182 183 intel_engine_pm_put(engine); 184 185 if (err) 186 break; 187 } 188 189 return err; 190 } 191 192 static int __live_active_context(struct intel_engine_cs *engine) 193 { 194 unsigned long saved_heartbeat; 195 struct intel_context *ce; 196 int pass; 197 int err; 198 199 /* 200 * We keep active contexts alive until after a subsequent context 201 * switch as the final write from the context-save will be after 202 * we retire the final request. We track when we unpin the context, 203 * under the presumption that the final pin is from the last request, 204 * and instead of immediately unpinning the context, we add a task 205 * to unpin the context from the next idle-barrier. 206 * 207 * This test makes sure that the context is kept alive until a 208 * subsequent idle-barrier (emitted when the engine wakeref hits 0 209 * with no more outstanding requests). 210 */ 211 212 if (intel_engine_pm_is_awake(engine)) { 213 pr_err("%s is awake before starting %s!\n", 214 engine->name, __func__); 215 return -EINVAL; 216 } 217 218 ce = intel_context_create(engine); 219 if (IS_ERR(ce)) 220 return PTR_ERR(ce); 221 222 saved_heartbeat = engine->props.heartbeat_interval_ms; 223 engine->props.heartbeat_interval_ms = 0; 224 225 for (pass = 0; pass <= 2; pass++) { 226 struct i915_request *rq; 227 228 intel_engine_pm_get(engine); 229 230 rq = intel_context_create_request(ce); 231 if (IS_ERR(rq)) { 232 err = PTR_ERR(rq); 233 goto out_engine; 234 } 235 236 err = request_sync(rq); 237 if (err) 238 goto out_engine; 239 240 /* Context will be kept active until after an idle-barrier. */ 241 if (i915_active_is_idle(&ce->active)) { 242 pr_err("context is not active; expected idle-barrier (%s pass %d)\n", 243 engine->name, pass); 244 err = -EINVAL; 245 goto out_engine; 246 } 247 248 if (!intel_engine_pm_is_awake(engine)) { 249 pr_err("%s is asleep before idle-barrier\n", 250 engine->name); 251 err = -EINVAL; 252 goto out_engine; 253 } 254 255 out_engine: 256 intel_engine_pm_put(engine); 257 if (err) 258 goto err; 259 } 260 261 /* Now make sure our idle-barriers are flushed */ 262 err = intel_engine_flush_barriers(engine); 263 if (err) 264 goto err; 265 266 /* Wait for the barrier and in the process wait for engine to park */ 267 err = context_sync(engine->kernel_context); 268 if (err) 269 goto err; 270 271 if (!i915_active_is_idle(&ce->active)) { 272 pr_err("context is still active!"); 273 err = -EINVAL; 274 } 275 276 intel_engine_pm_flush(engine); 277 278 if (intel_engine_pm_is_awake(engine)) { 279 struct drm_printer p = drm_debug_printer(__func__); 280 281 intel_engine_dump(engine, &p, 282 "%s is still awake:%d after idle-barriers\n", 283 engine->name, 284 atomic_read(&engine->wakeref.count)); 285 GEM_TRACE_DUMP(); 286 287 err = -EINVAL; 288 goto err; 289 } 290 291 err: 292 engine->props.heartbeat_interval_ms = saved_heartbeat; 293 intel_context_put(ce); 294 return err; 295 } 296 297 static int live_active_context(void *arg) 298 { 299 struct intel_gt *gt = arg; 300 struct intel_engine_cs *engine; 301 enum intel_engine_id id; 302 int err = 0; 303 304 for_each_engine(engine, gt, id) { 305 err = __live_active_context(engine); 306 if (err) 307 break; 308 309 err = igt_flush_test(gt->i915); 310 if (err) 311 break; 312 } 313 314 return err; 315 } 316 317 static int __remote_sync(struct intel_context *ce, struct intel_context *remote) 318 { 319 struct i915_request *rq; 320 int err; 321 322 err = intel_context_pin(remote); 323 if (err) 324 return err; 325 326 rq = intel_context_create_request(ce); 327 if (IS_ERR(rq)) { 328 err = PTR_ERR(rq); 329 goto unpin; 330 } 331 332 err = intel_context_prepare_remote_request(remote, rq); 333 if (err) { 334 i915_request_add(rq); 335 goto unpin; 336 } 337 338 err = request_sync(rq); 339 340 unpin: 341 intel_context_unpin(remote); 342 return err; 343 } 344 345 static int __live_remote_context(struct intel_engine_cs *engine) 346 { 347 struct intel_context *local, *remote; 348 unsigned long saved_heartbeat; 349 int pass; 350 int err; 351 352 /* 353 * Check that our idle barriers do not interfere with normal 354 * activity tracking. In particular, check that operating 355 * on the context image remotely (intel_context_prepare_remote_request), 356 * which inserts foreign fences into intel_context.active, does not 357 * clobber the idle-barrier. 358 */ 359 360 if (intel_engine_pm_is_awake(engine)) { 361 pr_err("%s is awake before starting %s!\n", 362 engine->name, __func__); 363 return -EINVAL; 364 } 365 366 remote = intel_context_create(engine); 367 if (IS_ERR(remote)) 368 return PTR_ERR(remote); 369 370 local = intel_context_create(engine); 371 if (IS_ERR(local)) { 372 err = PTR_ERR(local); 373 goto err_remote; 374 } 375 376 saved_heartbeat = engine->props.heartbeat_interval_ms; 377 engine->props.heartbeat_interval_ms = 0; 378 intel_engine_pm_get(engine); 379 380 for (pass = 0; pass <= 2; pass++) { 381 err = __remote_sync(local, remote); 382 if (err) 383 break; 384 385 err = __remote_sync(engine->kernel_context, remote); 386 if (err) 387 break; 388 389 if (i915_active_is_idle(&remote->active)) { 390 pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n", 391 engine->name, pass); 392 err = -EINVAL; 393 break; 394 } 395 } 396 397 intel_engine_pm_put(engine); 398 engine->props.heartbeat_interval_ms = saved_heartbeat; 399 400 intel_context_put(local); 401 err_remote: 402 intel_context_put(remote); 403 return err; 404 } 405 406 static int live_remote_context(void *arg) 407 { 408 struct intel_gt *gt = arg; 409 struct intel_engine_cs *engine; 410 enum intel_engine_id id; 411 int err = 0; 412 413 for_each_engine(engine, gt, id) { 414 err = __live_remote_context(engine); 415 if (err) 416 break; 417 418 err = igt_flush_test(gt->i915); 419 if (err) 420 break; 421 } 422 423 return err; 424 } 425 426 int intel_context_live_selftests(struct drm_i915_private *i915) 427 { 428 static const struct i915_subtest tests[] = { 429 SUBTEST(live_context_size), 430 SUBTEST(live_active_context), 431 SUBTEST(live_remote_context), 432 }; 433 struct intel_gt *gt = &i915->gt; 434 435 if (intel_gt_is_wedged(gt)) 436 return 0; 437 438 return intel_gt_live_subtests(tests, gt); 439 } 440