1 /* 2 * SPDX-License-Identifier: GPL-2.0 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/sort.h> 8 9 #include "intel_gpu_commands.h" 10 #include "intel_gt_pm.h" 11 #include "intel_rps.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/igt_flush_test.h" 15 16 #define COUNT 5 17 18 static int cmp_u32(const void *A, const void *B) 19 { 20 const u32 *a = A, *b = B; 21 22 return *a - *b; 23 } 24 25 static void perf_begin(struct intel_gt *gt) 26 { 27 intel_gt_pm_get(gt); 28 29 /* Boost gpufreq to max [waitboost] and keep it fixed */ 30 atomic_inc(>->rps.num_waiters); 31 schedule_work(>->rps.work); 32 flush_work(>->rps.work); 33 } 34 35 static int perf_end(struct intel_gt *gt) 36 { 37 atomic_dec(>->rps.num_waiters); 38 intel_gt_pm_put(gt); 39 40 return igt_flush_test(gt->i915); 41 } 42 43 static int write_timestamp(struct i915_request *rq, int slot) 44 { 45 u32 cmd; 46 u32 *cs; 47 48 cs = intel_ring_begin(rq, 4); 49 if (IS_ERR(cs)) 50 return PTR_ERR(cs); 51 52 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 53 if (INTEL_GEN(rq->engine->i915) >= 8) 54 cmd++; 55 *cs++ = cmd; 56 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 57 *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); 58 *cs++ = 0; 59 60 intel_ring_advance(rq, cs); 61 62 return 0; 63 } 64 65 static struct i915_vma *create_empty_batch(struct intel_context *ce) 66 { 67 struct drm_i915_gem_object *obj; 68 struct i915_vma *vma; 69 u32 *cs; 70 int err; 71 72 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); 73 if (IS_ERR(obj)) 74 return ERR_CAST(obj); 75 76 cs = i915_gem_object_pin_map(obj, I915_MAP_WB); 77 if (IS_ERR(cs)) { 78 err = PTR_ERR(cs); 79 goto err_put; 80 } 81 82 cs[0] = MI_BATCH_BUFFER_END; 83 84 i915_gem_object_flush_map(obj); 85 86 vma = i915_vma_instance(obj, ce->vm, NULL); 87 if (IS_ERR(vma)) { 88 err = PTR_ERR(vma); 89 goto err_unpin; 90 } 91 92 err = i915_vma_pin(vma, 0, 0, PIN_USER); 93 if (err) 94 goto err_unpin; 95 96 i915_gem_object_unpin_map(obj); 97 return vma; 98 99 err_unpin: 100 i915_gem_object_unpin_map(obj); 101 err_put: 102 i915_gem_object_put(obj); 103 return ERR_PTR(err); 104 } 105 106 static u32 trifilter(u32 *a) 107 { 108 u64 sum; 109 110 sort(a, COUNT, sizeof(*a), cmp_u32, NULL); 111 112 sum = mul_u32_u32(a[2], 2); 113 sum += a[1]; 114 sum += a[3]; 115 116 return sum >> 2; 117 } 118 119 static int perf_mi_bb_start(void *arg) 120 { 121 struct intel_gt *gt = arg; 122 struct intel_engine_cs *engine; 123 enum intel_engine_id id; 124 int err = 0; 125 126 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 127 return 0; 128 129 perf_begin(gt); 130 for_each_engine(engine, gt, id) { 131 struct intel_context *ce = engine->kernel_context; 132 struct i915_vma *batch; 133 u32 cycles[COUNT]; 134 int i; 135 136 intel_engine_pm_get(engine); 137 138 batch = create_empty_batch(ce); 139 if (IS_ERR(batch)) { 140 err = PTR_ERR(batch); 141 intel_engine_pm_put(engine); 142 break; 143 } 144 145 err = i915_vma_sync(batch); 146 if (err) { 147 intel_engine_pm_put(engine); 148 i915_vma_put(batch); 149 break; 150 } 151 152 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 153 struct i915_request *rq; 154 155 rq = i915_request_create(ce); 156 if (IS_ERR(rq)) { 157 err = PTR_ERR(rq); 158 break; 159 } 160 161 err = write_timestamp(rq, 2); 162 if (err) 163 goto out; 164 165 err = rq->engine->emit_bb_start(rq, 166 batch->node.start, 8, 167 0); 168 if (err) 169 goto out; 170 171 err = write_timestamp(rq, 3); 172 if (err) 173 goto out; 174 175 out: 176 i915_request_get(rq); 177 i915_request_add(rq); 178 179 if (i915_request_wait(rq, 0, HZ / 5) < 0) 180 err = -EIO; 181 i915_request_put(rq); 182 if (err) 183 break; 184 185 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; 186 } 187 i915_vma_put(batch); 188 intel_engine_pm_put(engine); 189 if (err) 190 break; 191 192 pr_info("%s: MI_BB_START cycles: %u\n", 193 engine->name, trifilter(cycles)); 194 } 195 if (perf_end(gt)) 196 err = -EIO; 197 198 return err; 199 } 200 201 static struct i915_vma *create_nop_batch(struct intel_context *ce) 202 { 203 struct drm_i915_gem_object *obj; 204 struct i915_vma *vma; 205 u32 *cs; 206 int err; 207 208 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); 209 if (IS_ERR(obj)) 210 return ERR_CAST(obj); 211 212 cs = i915_gem_object_pin_map(obj, I915_MAP_WB); 213 if (IS_ERR(cs)) { 214 err = PTR_ERR(cs); 215 goto err_put; 216 } 217 218 memset(cs, 0, SZ_64K); 219 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; 220 221 i915_gem_object_flush_map(obj); 222 223 vma = i915_vma_instance(obj, ce->vm, NULL); 224 if (IS_ERR(vma)) { 225 err = PTR_ERR(vma); 226 goto err_unpin; 227 } 228 229 err = i915_vma_pin(vma, 0, 0, PIN_USER); 230 if (err) 231 goto err_unpin; 232 233 i915_gem_object_unpin_map(obj); 234 return vma; 235 236 err_unpin: 237 i915_gem_object_unpin_map(obj); 238 err_put: 239 i915_gem_object_put(obj); 240 return ERR_PTR(err); 241 } 242 243 static int perf_mi_noop(void *arg) 244 { 245 struct intel_gt *gt = arg; 246 struct intel_engine_cs *engine; 247 enum intel_engine_id id; 248 int err = 0; 249 250 if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ 251 return 0; 252 253 perf_begin(gt); 254 for_each_engine(engine, gt, id) { 255 struct intel_context *ce = engine->kernel_context; 256 struct i915_vma *base, *nop; 257 u32 cycles[COUNT]; 258 int i; 259 260 intel_engine_pm_get(engine); 261 262 base = create_empty_batch(ce); 263 if (IS_ERR(base)) { 264 err = PTR_ERR(base); 265 intel_engine_pm_put(engine); 266 break; 267 } 268 269 err = i915_vma_sync(base); 270 if (err) { 271 i915_vma_put(base); 272 intel_engine_pm_put(engine); 273 break; 274 } 275 276 nop = create_nop_batch(ce); 277 if (IS_ERR(nop)) { 278 err = PTR_ERR(nop); 279 i915_vma_put(base); 280 intel_engine_pm_put(engine); 281 break; 282 } 283 284 err = i915_vma_sync(nop); 285 if (err) { 286 i915_vma_put(nop); 287 i915_vma_put(base); 288 intel_engine_pm_put(engine); 289 break; 290 } 291 292 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 293 struct i915_request *rq; 294 295 rq = i915_request_create(ce); 296 if (IS_ERR(rq)) { 297 err = PTR_ERR(rq); 298 break; 299 } 300 301 err = write_timestamp(rq, 2); 302 if (err) 303 goto out; 304 305 err = rq->engine->emit_bb_start(rq, 306 base->node.start, 8, 307 0); 308 if (err) 309 goto out; 310 311 err = write_timestamp(rq, 3); 312 if (err) 313 goto out; 314 315 err = rq->engine->emit_bb_start(rq, 316 nop->node.start, 317 nop->node.size, 318 0); 319 if (err) 320 goto out; 321 322 err = write_timestamp(rq, 4); 323 if (err) 324 goto out; 325 326 out: 327 i915_request_get(rq); 328 i915_request_add(rq); 329 330 if (i915_request_wait(rq, 0, HZ / 5) < 0) 331 err = -EIO; 332 i915_request_put(rq); 333 if (err) 334 break; 335 336 cycles[i] = 337 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - 338 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); 339 } 340 i915_vma_put(nop); 341 i915_vma_put(base); 342 intel_engine_pm_put(engine); 343 if (err) 344 break; 345 346 pr_info("%s: 16K MI_NOOP cycles: %u\n", 347 engine->name, trifilter(cycles)); 348 } 349 if (perf_end(gt)) 350 err = -EIO; 351 352 return err; 353 } 354 355 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) 356 { 357 static const struct i915_subtest tests[] = { 358 SUBTEST(perf_mi_bb_start), 359 SUBTEST(perf_mi_noop), 360 }; 361 362 if (intel_gt_is_wedged(&i915->gt)) 363 return 0; 364 365 return intel_gt_live_subtests(tests, &i915->gt); 366 } 367 368 static int intel_mmio_bases_check(void *arg) 369 { 370 int i, j; 371 372 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 373 const struct engine_info *info = &intel_engines[i]; 374 u8 prev = U8_MAX; 375 376 for (j = 0; j < MAX_MMIO_BASES; j++) { 377 u8 gen = info->mmio_bases[j].gen; 378 u32 base = info->mmio_bases[j].base; 379 380 if (gen >= prev) { 381 pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", 382 __func__, 383 intel_engine_class_repr(info->class), 384 info->class, info->instance, 385 prev, gen); 386 return -EINVAL; 387 } 388 389 if (gen == 0) 390 break; 391 392 if (!base) { 393 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", 394 __func__, 395 intel_engine_class_repr(info->class), 396 info->class, info->instance, 397 base, gen, j); 398 return -EINVAL; 399 } 400 401 prev = gen; 402 } 403 404 pr_debug("%s: min gen supported for %s%d is %d\n", 405 __func__, 406 intel_engine_class_repr(info->class), 407 info->instance, 408 prev); 409 } 410 411 return 0; 412 } 413 414 int intel_engine_cs_mock_selftests(void) 415 { 416 static const struct i915_subtest tests[] = { 417 SUBTEST(intel_mmio_bases_check), 418 }; 419 420 return i915_subtests(tests, NULL); 421 } 422