1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/kref.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_gt.h" 11 12 #include "i915_selftest.h" 13 14 #include "igt_flush_test.h" 15 #include "lib_sw_fence.h" 16 17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab" 18 19 static int 20 alloc_empty_config(struct i915_perf *perf) 21 { 22 struct i915_oa_config *oa_config; 23 24 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 25 if (!oa_config) 26 return -ENOMEM; 27 28 oa_config->perf = perf; 29 kref_init(&oa_config->ref); 30 31 strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid)); 32 33 mutex_lock(&perf->metrics_lock); 34 35 oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); 36 if (oa_config->id < 0) { 37 mutex_unlock(&perf->metrics_lock); 38 i915_oa_config_put(oa_config); 39 return -ENOMEM; 40 } 41 42 mutex_unlock(&perf->metrics_lock); 43 44 return 0; 45 } 46 47 static void 48 destroy_empty_config(struct i915_perf *perf) 49 { 50 struct i915_oa_config *oa_config = NULL, *tmp; 51 int id; 52 53 mutex_lock(&perf->metrics_lock); 54 55 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 56 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { 57 oa_config = tmp; 58 break; 59 } 60 } 61 62 if (oa_config) 63 idr_remove(&perf->metrics_idr, oa_config->id); 64 65 mutex_unlock(&perf->metrics_lock); 66 67 if (oa_config) 68 i915_oa_config_put(oa_config); 69 } 70 71 static struct i915_oa_config * 72 get_empty_config(struct i915_perf *perf) 73 { 74 struct i915_oa_config *oa_config = NULL, *tmp; 75 int id; 76 77 mutex_lock(&perf->metrics_lock); 78 79 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 80 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { 81 oa_config = i915_oa_config_get(tmp); 82 break; 83 } 84 } 85 86 mutex_unlock(&perf->metrics_lock); 87 88 return oa_config; 89 } 90 91 static struct i915_perf_stream * 92 test_stream(struct i915_perf *perf) 93 { 94 struct drm_i915_perf_open_param param = {}; 95 struct i915_oa_config *oa_config = get_empty_config(perf); 96 struct perf_open_properties props = { 97 .engine = intel_engine_lookup_user(perf->i915, 98 I915_ENGINE_CLASS_RENDER, 99 0), 100 .sample_flags = SAMPLE_OA_REPORT, 101 .oa_format = GRAPHICS_VER(perf->i915) == 12 ? 102 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, 103 }; 104 struct i915_perf_stream *stream; 105 struct intel_gt *gt; 106 107 if (!props.engine) 108 return NULL; 109 110 gt = props.engine->gt; 111 112 if (!oa_config) 113 return NULL; 114 115 props.metrics_set = oa_config->id; 116 117 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 118 if (!stream) { 119 i915_oa_config_put(oa_config); 120 return NULL; 121 } 122 123 stream->perf = perf; 124 125 mutex_lock(>->perf.lock); 126 if (i915_oa_stream_init(stream, ¶m, &props)) { 127 kfree(stream); 128 stream = NULL; 129 } 130 mutex_unlock(>->perf.lock); 131 132 i915_oa_config_put(oa_config); 133 134 return stream; 135 } 136 137 static void stream_destroy(struct i915_perf_stream *stream) 138 { 139 struct intel_gt *gt = stream->engine->gt; 140 141 mutex_lock(>->perf.lock); 142 i915_perf_destroy_locked(stream); 143 mutex_unlock(>->perf.lock); 144 } 145 146 static int live_sanitycheck(void *arg) 147 { 148 struct drm_i915_private *i915 = arg; 149 struct i915_perf_stream *stream; 150 151 /* Quick check we can create a perf stream */ 152 153 stream = test_stream(&i915->perf); 154 if (!stream) 155 return -EINVAL; 156 157 stream_destroy(stream); 158 return 0; 159 } 160 161 static int write_timestamp(struct i915_request *rq, int slot) 162 { 163 u32 *cs; 164 int len; 165 166 cs = intel_ring_begin(rq, 6); 167 if (IS_ERR(cs)) 168 return PTR_ERR(cs); 169 170 len = 5; 171 if (GRAPHICS_VER(rq->engine->i915) >= 8) 172 len++; 173 174 *cs++ = GFX_OP_PIPE_CONTROL(len); 175 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | 176 PIPE_CONTROL_STORE_DATA_INDEX | 177 PIPE_CONTROL_WRITE_TIMESTAMP; 178 *cs++ = slot * sizeof(u32); 179 *cs++ = 0; 180 *cs++ = 0; 181 *cs++ = 0; 182 183 intel_ring_advance(rq, cs); 184 185 return 0; 186 } 187 188 static ktime_t poll_status(struct i915_request *rq, int slot) 189 { 190 while (!intel_read_status_page(rq->engine, slot) && 191 !i915_request_completed(rq)) 192 cpu_relax(); 193 194 return ktime_get(); 195 } 196 197 static int live_noa_delay(void *arg) 198 { 199 struct drm_i915_private *i915 = arg; 200 struct i915_perf_stream *stream; 201 struct i915_request *rq; 202 ktime_t t0, t1; 203 u64 expected; 204 u32 delay; 205 int err; 206 int i; 207 208 /* Check that the GPU delays matches expectations */ 209 210 stream = test_stream(&i915->perf); 211 if (!stream) 212 return -ENOMEM; 213 214 expected = atomic64_read(&stream->perf->noa_programming_delay); 215 216 if (stream->engine->class != RENDER_CLASS) { 217 err = -ENODEV; 218 goto out; 219 } 220 221 for (i = 0; i < 4; i++) 222 intel_write_status_page(stream->engine, 0x100 + i, 0); 223 224 rq = intel_engine_create_kernel_request(stream->engine); 225 if (IS_ERR(rq)) { 226 err = PTR_ERR(rq); 227 goto out; 228 } 229 230 if (rq->engine->emit_init_breadcrumb) { 231 err = rq->engine->emit_init_breadcrumb(rq); 232 if (err) { 233 i915_request_add(rq); 234 goto out; 235 } 236 } 237 238 err = write_timestamp(rq, 0x100); 239 if (err) { 240 i915_request_add(rq); 241 goto out; 242 } 243 244 err = rq->engine->emit_bb_start(rq, 245 i915_ggtt_offset(stream->noa_wait), 0, 246 I915_DISPATCH_SECURE); 247 if (err) { 248 i915_request_add(rq); 249 goto out; 250 } 251 252 err = write_timestamp(rq, 0x102); 253 if (err) { 254 i915_request_add(rq); 255 goto out; 256 } 257 258 i915_request_get(rq); 259 i915_request_add(rq); 260 261 preempt_disable(); 262 t0 = poll_status(rq, 0x100); 263 t1 = poll_status(rq, 0x102); 264 preempt_enable(); 265 266 pr_info("CPU delay: %lluns, expected %lluns\n", 267 ktime_sub(t1, t0), expected); 268 269 delay = intel_read_status_page(stream->engine, 0x102); 270 delay -= intel_read_status_page(stream->engine, 0x100); 271 delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay); 272 pr_info("GPU delay: %uns, expected %lluns\n", 273 delay, expected); 274 275 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { 276 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", 277 delay / 1000, 278 div_u64(3 * expected, 4000), 279 div_u64(3 * expected, 2000)); 280 err = -EINVAL; 281 } 282 283 i915_request_put(rq); 284 out: 285 stream_destroy(stream); 286 return err; 287 } 288 289 static int live_noa_gpr(void *arg) 290 { 291 struct drm_i915_private *i915 = arg; 292 struct i915_perf_stream *stream; 293 struct intel_context *ce; 294 struct i915_request *rq; 295 u32 *cs, *store; 296 void *scratch; 297 u32 gpr0; 298 int err; 299 int i; 300 301 /* Check that the delay does not clobber user context state (GPR) */ 302 303 stream = test_stream(&i915->perf); 304 if (!stream) 305 return -ENOMEM; 306 307 gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0)); 308 309 ce = intel_context_create(stream->engine); 310 if (IS_ERR(ce)) { 311 err = PTR_ERR(ce); 312 goto out; 313 } 314 315 /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ 316 scratch = __px_vaddr(ce->vm->scratch[0]); 317 memset(scratch, POISON_FREE, PAGE_SIZE); 318 319 rq = intel_context_create_request(ce); 320 if (IS_ERR(rq)) { 321 err = PTR_ERR(rq); 322 goto out_ce; 323 } 324 i915_request_get(rq); 325 326 if (rq->engine->emit_init_breadcrumb) { 327 err = rq->engine->emit_init_breadcrumb(rq); 328 if (err) { 329 i915_request_add(rq); 330 goto out_rq; 331 } 332 } 333 334 /* Fill the 16 qword [32 dword] GPR with a known unlikely value */ 335 cs = intel_ring_begin(rq, 2 * 32 + 2); 336 if (IS_ERR(cs)) { 337 err = PTR_ERR(cs); 338 i915_request_add(rq); 339 goto out_rq; 340 } 341 342 *cs++ = MI_LOAD_REGISTER_IMM(32); 343 for (i = 0; i < 32; i++) { 344 *cs++ = gpr0 + i * sizeof(u32); 345 *cs++ = STACK_MAGIC; 346 } 347 *cs++ = MI_NOOP; 348 intel_ring_advance(rq, cs); 349 350 /* Execute the GPU delay */ 351 err = rq->engine->emit_bb_start(rq, 352 i915_ggtt_offset(stream->noa_wait), 0, 353 I915_DISPATCH_SECURE); 354 if (err) { 355 i915_request_add(rq); 356 goto out_rq; 357 } 358 359 /* Read the GPR back, using the pinned global HWSP for convenience */ 360 store = memset32(rq->engine->status_page.addr + 512, 0, 32); 361 for (i = 0; i < 32; i++) { 362 u32 cmd; 363 364 cs = intel_ring_begin(rq, 4); 365 if (IS_ERR(cs)) { 366 err = PTR_ERR(cs); 367 i915_request_add(rq); 368 goto out_rq; 369 } 370 371 cmd = MI_STORE_REGISTER_MEM; 372 if (GRAPHICS_VER(i915) >= 8) 373 cmd++; 374 cmd |= MI_USE_GGTT; 375 376 *cs++ = cmd; 377 *cs++ = gpr0 + i * sizeof(u32); 378 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) + 379 offset_in_page(store) + 380 i * sizeof(u32); 381 *cs++ = 0; 382 intel_ring_advance(rq, cs); 383 } 384 385 i915_request_add(rq); 386 387 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) { 388 pr_err("noa_wait timed out\n"); 389 intel_gt_set_wedged(stream->engine->gt); 390 err = -EIO; 391 goto out_rq; 392 } 393 394 /* Verify that the GPR contain our expected values */ 395 for (i = 0; i < 32; i++) { 396 if (store[i] == STACK_MAGIC) 397 continue; 398 399 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n", 400 i, store[i], STACK_MAGIC); 401 err = -EINVAL; 402 } 403 404 /* Verify that the user's scratch page was not used for GPR storage */ 405 if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) { 406 pr_err("Scratch page overwritten!\n"); 407 igt_hexdump(scratch, 4096); 408 err = -EINVAL; 409 } 410 411 out_rq: 412 i915_request_put(rq); 413 out_ce: 414 intel_context_put(ce); 415 out: 416 stream_destroy(stream); 417 return err; 418 } 419 420 int i915_perf_live_selftests(struct drm_i915_private *i915) 421 { 422 static const struct i915_subtest tests[] = { 423 SUBTEST(live_sanitycheck), 424 SUBTEST(live_noa_delay), 425 SUBTEST(live_noa_gpr), 426 }; 427 struct i915_perf *perf = &i915->perf; 428 int err; 429 430 if (!perf->metrics_kobj || !perf->ops.enable_metric_set) 431 return 0; 432 433 if (intel_gt_is_wedged(to_gt(i915))) 434 return 0; 435 436 err = alloc_empty_config(&i915->perf); 437 if (err) 438 return err; 439 440 err = i915_live_subtests(tests, i915); 441 442 destroy_empty_config(&i915->perf); 443 444 return err; 445 } 446