1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "intel_gpu_commands.h" 9 #include "intel_gt_pm.h" 10 #include "intel_rps.h" 11 12 #include "i915_selftest.h" 13 #include "selftests/igt_flush_test.h" 14 15 #define COUNT 5 16 17 static int cmp_u32(const void *A, const void *B) 18 { 19 const u32 *a = A, *b = B; 20 21 return *a - *b; 22 } 23 24 static void perf_begin(struct intel_gt *gt) 25 { 26 intel_gt_pm_get(gt); 27 28 /* Boost gpufreq to max [waitboost] and keep it fixed */ 29 atomic_inc(>->rps.num_waiters); 30 schedule_work(>->rps.work); 31 flush_work(>->rps.work); 32 } 33 34 static int perf_end(struct intel_gt *gt) 35 { 36 atomic_dec(>->rps.num_waiters); 37 intel_gt_pm_put(gt); 38 39 return igt_flush_test(gt->i915); 40 } 41 42 static int write_timestamp(struct i915_request *rq, int slot) 43 { 44 struct intel_timeline *tl = 45 rcu_dereference_protected(rq->timeline, 46 !i915_request_signaled(rq)); 47 u32 cmd; 48 u32 *cs; 49 50 cs = intel_ring_begin(rq, 4); 51 if (IS_ERR(cs)) 52 return PTR_ERR(cs); 53 54 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 55 if (INTEL_GEN(rq->engine->i915) >= 8) 56 cmd++; 57 *cs++ = cmd; 58 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 59 *cs++ = tl->hwsp_offset + slot * sizeof(u32); 60 *cs++ = 0; 61 62 intel_ring_advance(rq, cs); 63 64 return 0; 65 } 66 67 static struct i915_vma *create_empty_batch(struct intel_context *ce) 68 { 69 struct drm_i915_gem_object *obj; 70 struct i915_vma *vma; 71 u32 *cs; 72 int err; 73 74 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); 75 if (IS_ERR(obj)) 76 return ERR_CAST(obj); 77 78 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 79 if (IS_ERR(cs)) { 80 err = PTR_ERR(cs); 81 goto err_put; 82 } 83 84 cs[0] = MI_BATCH_BUFFER_END; 85 86 i915_gem_object_flush_map(obj); 87 88 vma = i915_vma_instance(obj, ce->vm, NULL); 89 if (IS_ERR(vma)) { 90 err = PTR_ERR(vma); 91 goto err_unpin; 92 } 93 94 err = i915_vma_pin(vma, 0, 0, PIN_USER); 95 if (err) 96 goto err_unpin; 97 98 i915_gem_object_unpin_map(obj); 99 return vma; 100 101 err_unpin: 102 i915_gem_object_unpin_map(obj); 103 err_put: 104 i915_gem_object_put(obj); 105 return ERR_PTR(err); 106 } 107 108 static u32 trifilter(u32 *a) 109 { 110 u64 sum; 111 112 sort(a, COUNT, sizeof(*a), cmp_u32, NULL); 113 114 sum = mul_u32_u32(a[2], 2); 115 sum += a[1]; 116 sum += a[3]; 117 118 return sum >> 2; 119 } 120 121 static int perf_mi_bb_start(void *arg) 122 { 123 struct intel_gt *gt = arg; 124 struct intel_engine_cs *engine; 125 enum intel_engine_id id; 126 int err = 0; 127 128 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 129 return 0; 130 131 perf_begin(gt); 132 for_each_engine(engine, gt, id) { 133 struct intel_context *ce = engine->kernel_context; 134 struct i915_vma *batch; 135 u32 cycles[COUNT]; 136 int i; 137 138 intel_engine_pm_get(engine); 139 140 batch = create_empty_batch(ce); 141 if (IS_ERR(batch)) { 142 err = PTR_ERR(batch); 143 intel_engine_pm_put(engine); 144 break; 145 } 146 147 err = i915_vma_sync(batch); 148 if (err) { 149 intel_engine_pm_put(engine); 150 i915_vma_put(batch); 151 break; 152 } 153 154 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 155 struct i915_request *rq; 156 157 rq = i915_request_create(ce); 158 if (IS_ERR(rq)) { 159 err = PTR_ERR(rq); 160 break; 161 } 162 163 err = write_timestamp(rq, 2); 164 if (err) 165 goto out; 166 167 err = rq->engine->emit_bb_start(rq, 168 batch->node.start, 8, 169 0); 170 if (err) 171 goto out; 172 173 err = write_timestamp(rq, 3); 174 if (err) 175 goto out; 176 177 out: 178 i915_request_get(rq); 179 i915_request_add(rq); 180 181 if (i915_request_wait(rq, 0, HZ / 5) < 0) 182 err = -EIO; 183 i915_request_put(rq); 184 if (err) 185 break; 186 187 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; 188 } 189 i915_vma_put(batch); 190 intel_engine_pm_put(engine); 191 if (err) 192 break; 193 194 pr_info("%s: MI_BB_START cycles: %u\n", 195 engine->name, trifilter(cycles)); 196 } 197 if (perf_end(gt)) 198 err = -EIO; 199 200 return err; 201 } 202 203 static struct i915_vma *create_nop_batch(struct intel_context *ce) 204 { 205 struct drm_i915_gem_object *obj; 206 struct i915_vma *vma; 207 u32 *cs; 208 int err; 209 210 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); 211 if (IS_ERR(obj)) 212 return ERR_CAST(obj); 213 214 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 215 if (IS_ERR(cs)) { 216 err = PTR_ERR(cs); 217 goto err_put; 218 } 219 220 memset(cs, 0, SZ_64K); 221 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; 222 223 i915_gem_object_flush_map(obj); 224 225 vma = i915_vma_instance(obj, ce->vm, NULL); 226 if (IS_ERR(vma)) { 227 err = PTR_ERR(vma); 228 goto err_unpin; 229 } 230 231 err = i915_vma_pin(vma, 0, 0, PIN_USER); 232 if (err) 233 goto err_unpin; 234 235 i915_gem_object_unpin_map(obj); 236 return vma; 237 238 err_unpin: 239 i915_gem_object_unpin_map(obj); 240 err_put: 241 i915_gem_object_put(obj); 242 return ERR_PTR(err); 243 } 244 245 static int perf_mi_noop(void *arg) 246 { 247 struct intel_gt *gt = arg; 248 struct intel_engine_cs *engine; 249 enum intel_engine_id id; 250 int err = 0; 251 252 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 253 return 0; 254 255 perf_begin(gt); 256 for_each_engine(engine, gt, id) { 257 struct intel_context *ce = engine->kernel_context; 258 struct i915_vma *base, *nop; 259 u32 cycles[COUNT]; 260 int i; 261 262 intel_engine_pm_get(engine); 263 264 base = create_empty_batch(ce); 265 if (IS_ERR(base)) { 266 err = PTR_ERR(base); 267 intel_engine_pm_put(engine); 268 break; 269 } 270 271 err = i915_vma_sync(base); 272 if (err) { 273 i915_vma_put(base); 274 intel_engine_pm_put(engine); 275 break; 276 } 277 278 nop = create_nop_batch(ce); 279 if (IS_ERR(nop)) { 280 err = PTR_ERR(nop); 281 i915_vma_put(base); 282 intel_engine_pm_put(engine); 283 break; 284 } 285 286 err = i915_vma_sync(nop); 287 if (err) { 288 i915_vma_put(nop); 289 i915_vma_put(base); 290 intel_engine_pm_put(engine); 291 break; 292 } 293 294 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 295 struct i915_request *rq; 296 297 rq = i915_request_create(ce); 298 if (IS_ERR(rq)) { 299 err = PTR_ERR(rq); 300 break; 301 } 302 303 err = write_timestamp(rq, 2); 304 if (err) 305 goto out; 306 307 err = rq->engine->emit_bb_start(rq, 308 base->node.start, 8, 309 0); 310 if (err) 311 goto out; 312 313 err = write_timestamp(rq, 3); 314 if (err) 315 goto out; 316 317 err = rq->engine->emit_bb_start(rq, 318 nop->node.start, 319 nop->node.size, 320 0); 321 if (err) 322 goto out; 323 324 err = write_timestamp(rq, 4); 325 if (err) 326 goto out; 327 328 out: 329 i915_request_get(rq); 330 i915_request_add(rq); 331 332 if (i915_request_wait(rq, 0, HZ / 5) < 0) 333 err = -EIO; 334 i915_request_put(rq); 335 if (err) 336 break; 337 338 cycles[i] = 339 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - 340 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); 341 } 342 i915_vma_put(nop); 343 i915_vma_put(base); 344 intel_engine_pm_put(engine); 345 if (err) 346 break; 347 348 pr_info("%s: 16K MI_NOOP cycles: %u\n", 349 engine->name, trifilter(cycles)); 350 } 351 if (perf_end(gt)) 352 err = -EIO; 353 354 return err; 355 } 356 357 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) 358 { 359 static const struct i915_subtest tests[] = { 360 SUBTEST(perf_mi_bb_start), 361 SUBTEST(perf_mi_noop), 362 }; 363 364 if (intel_gt_is_wedged(&i915->gt)) 365 return 0; 366 367 return intel_gt_live_subtests(tests, &i915->gt); 368 } 369 370 static int intel_mmio_bases_check(void *arg) 371 { 372 int i, j; 373 374 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 375 const struct engine_info *info = &intel_engines[i]; 376 u8 prev = U8_MAX; 377 378 for (j = 0; j < MAX_MMIO_BASES; j++) { 379 u8 gen = info->mmio_bases[j].gen; 380 u32 base = info->mmio_bases[j].base; 381 382 if (gen >= prev) { 383 pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", 384 __func__, 385 intel_engine_class_repr(info->class), 386 info->class, info->instance, 387 prev, gen); 388 return -EINVAL; 389 } 390 391 if (gen == 0) 392 break; 393 394 if (!base) { 395 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", 396 __func__, 397 intel_engine_class_repr(info->class), 398 info->class, info->instance, 399 base, gen, j); 400 return -EINVAL; 401 } 402 403 prev = gen; 404 } 405 406 pr_debug("%s: min gen supported for %s%d is %d\n", 407 __func__, 408 intel_engine_class_repr(info->class), 409 info->instance, 410 prev); 411 } 412 413 return 0; 414 } 415 416 int intel_engine_cs_mock_selftests(void) 417 { 418 static const struct i915_subtest tests[] = { 419 SUBTEST(intel_mmio_bases_check), 420 }; 421 422 return i915_subtests(tests, NULL); 423 } 424