1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/sort.h> 8 9 #include "intel_gt_pm.h" 10 #include "intel_rps.h" 11 12 #include "i915_selftest.h" 13 #include "selftests/igt_flush_test.h" 14 15 #define COUNT 5 16 17 static int cmp_u32(const void *A, const void *B) 18 { 19 const u32 *a = A, *b = B; 20 21 return *a - *b; 22 } 23 24 static void perf_begin(struct intel_gt *gt) 25 { 26 intel_gt_pm_get(gt); 27 28 /* Boost gpufreq to max [waitboost] and keep it fixed */ 29 atomic_inc(>->rps.num_waiters); 30 schedule_work(>->rps.work); 31 flush_work(>->rps.work); 32 } 33 34 static int perf_end(struct intel_gt *gt) 35 { 36 atomic_dec(>->rps.num_waiters); 37 intel_gt_pm_put(gt); 38 39 return igt_flush_test(gt->i915); 40 } 41 42 static int write_timestamp(struct i915_request *rq, int slot) 43 { 44 u32 cmd; 45 u32 *cs; 46 47 cs = intel_ring_begin(rq, 4); 48 if (IS_ERR(cs)) 49 return PTR_ERR(cs); 50 51 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 52 if (INTEL_GEN(rq->engine->i915) >= 8) 53 cmd++; 54 *cs++ = cmd; 55 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 56 *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); 57 *cs++ = 0; 58 59 intel_ring_advance(rq, cs); 60 61 return 0; 62 } 63 64 static struct i915_vma *create_empty_batch(struct intel_context *ce) 65 { 66 struct drm_i915_gem_object *obj; 67 struct i915_vma *vma; 68 u32 *cs; 69 int err; 70 71 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); 72 if (IS_ERR(obj)) 73 return ERR_CAST(obj); 74 75 cs = i915_gem_object_pin_map(obj, I915_MAP_WB); 76 if (IS_ERR(cs)) { 77 err = PTR_ERR(cs); 78 goto err_put; 79 } 80 81 cs[0] = MI_BATCH_BUFFER_END; 82 83 i915_gem_object_flush_map(obj); 84 85 vma = i915_vma_instance(obj, ce->vm, NULL); 86 if (IS_ERR(vma)) { 87 err = PTR_ERR(vma); 88 goto err_unpin; 89 } 90 91 err = i915_vma_pin(vma, 0, 0, PIN_USER); 92 if (err) 93 goto err_unpin; 94 95 i915_gem_object_unpin_map(obj); 96 return vma; 97 98 err_unpin: 99 i915_gem_object_unpin_map(obj); 100 err_put: 101 i915_gem_object_put(obj); 102 return ERR_PTR(err); 103 } 104 105 static u32 trifilter(u32 *a) 106 { 107 u64 sum; 108 109 sort(a, COUNT, sizeof(*a), cmp_u32, NULL); 110 111 sum = mul_u32_u32(a[2], 2); 112 sum += a[1]; 113 sum += a[3]; 114 115 return sum >> 2; 116 } 117 118 static int perf_mi_bb_start(void *arg) 119 { 120 struct intel_gt *gt = arg; 121 struct intel_engine_cs *engine; 122 enum intel_engine_id id; 123 int err = 0; 124 125 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 126 return 0; 127 128 perf_begin(gt); 129 for_each_engine(engine, gt, id) { 130 struct intel_context *ce = engine->kernel_context; 131 struct i915_vma *batch; 132 u32 cycles[COUNT]; 133 int i; 134 135 intel_engine_pm_get(engine); 136 137 batch = create_empty_batch(ce); 138 if (IS_ERR(batch)) { 139 err = PTR_ERR(batch); 140 intel_engine_pm_put(engine); 141 break; 142 } 143 144 err = i915_vma_sync(batch); 145 if (err) { 146 intel_engine_pm_put(engine); 147 i915_vma_put(batch); 148 break; 149 } 150 151 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 152 struct i915_request *rq; 153 154 rq = i915_request_create(ce); 155 if (IS_ERR(rq)) { 156 err = PTR_ERR(rq); 157 break; 158 } 159 160 err = write_timestamp(rq, 2); 161 if (err) 162 goto out; 163 164 err = rq->engine->emit_bb_start(rq, 165 batch->node.start, 8, 166 0); 167 if (err) 168 goto out; 169 170 err = write_timestamp(rq, 3); 171 if (err) 172 goto out; 173 174 out: 175 i915_request_get(rq); 176 i915_request_add(rq); 177 178 if (i915_request_wait(rq, 0, HZ / 5) < 0) 179 err = -EIO; 180 i915_request_put(rq); 181 if (err) 182 break; 183 184 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; 185 } 186 i915_vma_put(batch); 187 intel_engine_pm_put(engine); 188 if (err) 189 break; 190 191 pr_info("%s: MI_BB_START cycles: %u\n", 192 engine->name, trifilter(cycles)); 193 } 194 if (perf_end(gt)) 195 err = -EIO; 196 197 return err; 198 } 199 200 static struct i915_vma *create_nop_batch(struct intel_context *ce) 201 { 202 struct drm_i915_gem_object *obj; 203 struct i915_vma *vma; 204 u32 *cs; 205 int err; 206 207 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); 208 if (IS_ERR(obj)) 209 return ERR_CAST(obj); 210 211 cs = i915_gem_object_pin_map(obj, I915_MAP_WB); 212 if (IS_ERR(cs)) { 213 err = PTR_ERR(cs); 214 goto err_put; 215 } 216 217 memset(cs, 0, SZ_64K); 218 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; 219 220 i915_gem_object_flush_map(obj); 221 222 vma = i915_vma_instance(obj, ce->vm, NULL); 223 if (IS_ERR(vma)) { 224 err = PTR_ERR(vma); 225 goto err_unpin; 226 } 227 228 err = i915_vma_pin(vma, 0, 0, PIN_USER); 229 if (err) 230 goto err_unpin; 231 232 i915_gem_object_unpin_map(obj); 233 return vma; 234 235 err_unpin: 236 i915_gem_object_unpin_map(obj); 237 err_put: 238 i915_gem_object_put(obj); 239 return ERR_PTR(err); 240 } 241 242 static int perf_mi_noop(void *arg) 243 { 244 struct intel_gt *gt = arg; 245 struct intel_engine_cs *engine; 246 enum intel_engine_id id; 247 int err = 0; 248 249 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 250 return 0; 251 252 perf_begin(gt); 253 for_each_engine(engine, gt, id) { 254 struct intel_context *ce = engine->kernel_context; 255 struct i915_vma *base, *nop; 256 u32 cycles[COUNT]; 257 int i; 258 259 intel_engine_pm_get(engine); 260 261 base = create_empty_batch(ce); 262 if (IS_ERR(base)) { 263 err = PTR_ERR(base); 264 intel_engine_pm_put(engine); 265 break; 266 } 267 268 err = i915_vma_sync(base); 269 if (err) { 270 i915_vma_put(base); 271 intel_engine_pm_put(engine); 272 break; 273 } 274 275 nop = create_nop_batch(ce); 276 if (IS_ERR(nop)) { 277 err = PTR_ERR(nop); 278 i915_vma_put(base); 279 intel_engine_pm_put(engine); 280 break; 281 } 282 283 err = i915_vma_sync(nop); 284 if (err) { 285 i915_vma_put(nop); 286 i915_vma_put(base); 287 intel_engine_pm_put(engine); 288 break; 289 } 290 291 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 292 struct i915_request *rq; 293 294 rq = i915_request_create(ce); 295 if (IS_ERR(rq)) { 296 err = PTR_ERR(rq); 297 break; 298 } 299 300 err = write_timestamp(rq, 2); 301 if (err) 302 goto out; 303 304 err = rq->engine->emit_bb_start(rq, 305 base->node.start, 8, 306 0); 307 if (err) 308 goto out; 309 310 err = write_timestamp(rq, 3); 311 if (err) 312 goto out; 313 314 err = rq->engine->emit_bb_start(rq, 315 nop->node.start, 316 nop->node.size, 317 0); 318 if (err) 319 goto out; 320 321 err = write_timestamp(rq, 4); 322 if (err) 323 goto out; 324 325 out: 326 i915_request_get(rq); 327 i915_request_add(rq); 328 329 if (i915_request_wait(rq, 0, HZ / 5) < 0) 330 err = -EIO; 331 i915_request_put(rq); 332 if (err) 333 break; 334 335 cycles[i] = 336 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - 337 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); 338 } 339 i915_vma_put(nop); 340 i915_vma_put(base); 341 intel_engine_pm_put(engine); 342 if (err) 343 break; 344 345 pr_info("%s: 16K MI_NOOP cycles: %u\n", 346 engine->name, trifilter(cycles)); 347 } 348 if (perf_end(gt)) 349 err = -EIO; 350 351 return err; 352 } 353 354 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) 355 { 356 static const struct i915_subtest tests[] = { 357 SUBTEST(perf_mi_bb_start), 358 SUBTEST(perf_mi_noop), 359 }; 360 361 if (intel_gt_is_wedged(&i915->gt)) 362 return 0; 363 364 return intel_gt_live_subtests(tests, &i915->gt); 365 } 366 367 static int intel_mmio_bases_check(void *arg) 368 { 369 int i, j; 370 371 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 372 const struct engine_info *info = &intel_engines[i]; 373 u8 prev = U8_MAX; 374 375 for (j = 0; j < MAX_MMIO_BASES; j++) { 376 u8 gen = info->mmio_bases[j].gen; 377 u32 base = info->mmio_bases[j].base; 378 379 if (gen >= prev) { 380 pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", 381 __func__, 382 intel_engine_class_repr(info->class), 383 info->class, info->instance, 384 prev, gen); 385 return -EINVAL; 386 } 387 388 if (gen == 0) 389 break; 390 391 if (!base) { 392 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", 393 __func__, 394 intel_engine_class_repr(info->class), 395 info->class, info->instance, 396 base, gen, j); 397 return -EINVAL; 398 } 399 400 prev = gen; 401 } 402 403 pr_debug("%s: min gen supported for %s%d is %d\n", 404 __func__, 405 intel_engine_class_repr(info->class), 406 info->instance, 407 prev); 408 } 409 410 return 0; 411 } 412 413 int intel_engine_cs_mock_selftests(void) 414 { 415 static const struct i915_subtest tests[] = { 416 SUBTEST(intel_mmio_bases_check), 417 }; 418 419 return i915_subtests(tests, NULL); 420 } 421