1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "intel_gpu_commands.h" 9 #include "intel_gt_pm.h" 10 #include "intel_rps.h" 11 12 #include "i915_selftest.h" 13 #include "selftests/igt_flush_test.h" 14 15 #define COUNT 5 16 17 static int cmp_u32(const void *A, const void *B) 18 { 19 const u32 *a = A, *b = B; 20 21 return *a - *b; 22 } 23 24 static void perf_begin(struct intel_gt *gt) 25 { 26 intel_gt_pm_get(gt); 27 28 /* Boost gpufreq to max [waitboost] and keep it fixed */ 29 atomic_inc(>->rps.num_waiters); 30 queue_work(gt->i915->unordered_wq, >->rps.work); 31 flush_work(>->rps.work); 32 } 33 34 static int perf_end(struct intel_gt *gt) 35 { 36 atomic_dec(>->rps.num_waiters); 37 intel_gt_pm_put(gt); 38 39 return igt_flush_test(gt->i915); 40 } 41 42 static i915_reg_t timestamp_reg(struct intel_engine_cs *engine) 43 { 44 struct drm_i915_private *i915 = engine->i915; 45 46 if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) 47 return RING_TIMESTAMP_UDW(engine->mmio_base); 48 else 49 return RING_TIMESTAMP(engine->mmio_base); 50 } 51 52 static int write_timestamp(struct i915_request *rq, int slot) 53 { 54 struct intel_timeline *tl = 55 rcu_dereference_protected(rq->timeline, 56 !i915_request_signaled(rq)); 57 u32 cmd; 58 u32 *cs; 59 60 cs = intel_ring_begin(rq, 4); 61 if (IS_ERR(cs)) 62 return PTR_ERR(cs); 63 64 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 65 if (GRAPHICS_VER(rq->i915) >= 8) 66 cmd++; 67 *cs++ = cmd; 68 *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); 69 *cs++ = tl->hwsp_offset + slot * sizeof(u32); 70 *cs++ = 0; 71 72 intel_ring_advance(rq, cs); 73 74 return 0; 75 } 76 77 static struct i915_vma *create_empty_batch(struct intel_context *ce) 78 { 79 struct drm_i915_gem_object *obj; 80 struct i915_vma *vma; 81 u32 *cs; 82 int err; 83 84 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); 85 if (IS_ERR(obj)) 86 return ERR_CAST(obj); 87 88 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 89 if (IS_ERR(cs)) { 90 err = PTR_ERR(cs); 91 goto err_put; 92 } 93 94 cs[0] = MI_BATCH_BUFFER_END; 95 96 i915_gem_object_flush_map(obj); 97 98 vma = i915_vma_instance(obj, ce->vm, NULL); 99 if (IS_ERR(vma)) { 100 err = PTR_ERR(vma); 101 goto err_unpin; 102 } 103 104 err = i915_vma_pin(vma, 0, 0, PIN_USER); 105 if (err) 106 goto err_unpin; 107 108 i915_gem_object_unpin_map(obj); 109 return vma; 110 111 err_unpin: 112 i915_gem_object_unpin_map(obj); 113 err_put: 114 i915_gem_object_put(obj); 115 return ERR_PTR(err); 116 } 117 118 static u32 trifilter(u32 *a) 119 { 120 u64 sum; 121 122 sort(a, COUNT, sizeof(*a), cmp_u32, NULL); 123 124 sum = mul_u32_u32(a[2], 2); 125 sum += a[1]; 126 sum += a[3]; 127 128 return sum >> 2; 129 } 130 131 static int perf_mi_bb_start(void *arg) 132 { 133 struct intel_gt *gt = arg; 134 struct intel_engine_cs *engine; 135 enum intel_engine_id id; 136 int err = 0; 137 138 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ 139 return 0; 140 141 perf_begin(gt); 142 for_each_engine(engine, gt, id) { 143 struct intel_context *ce = engine->kernel_context; 144 struct i915_vma *batch; 145 u32 cycles[COUNT]; 146 int i; 147 148 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) 149 continue; 150 151 intel_engine_pm_get(engine); 152 153 batch = create_empty_batch(ce); 154 if (IS_ERR(batch)) { 155 err = PTR_ERR(batch); 156 intel_engine_pm_put(engine); 157 break; 158 } 159 160 err = i915_vma_sync(batch); 161 if (err) { 162 intel_engine_pm_put(engine); 163 i915_vma_put(batch); 164 break; 165 } 166 167 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 168 struct i915_request *rq; 169 170 rq = i915_request_create(ce); 171 if (IS_ERR(rq)) { 172 err = PTR_ERR(rq); 173 break; 174 } 175 176 err = write_timestamp(rq, 2); 177 if (err) 178 goto out; 179 180 err = rq->engine->emit_bb_start(rq, 181 i915_vma_offset(batch), 8, 182 0); 183 if (err) 184 goto out; 185 186 err = write_timestamp(rq, 3); 187 if (err) 188 goto out; 189 190 out: 191 i915_request_get(rq); 192 i915_request_add(rq); 193 194 if (i915_request_wait(rq, 0, HZ / 5) < 0) 195 err = -EIO; 196 i915_request_put(rq); 197 if (err) 198 break; 199 200 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; 201 } 202 i915_vma_put(batch); 203 intel_engine_pm_put(engine); 204 if (err) 205 break; 206 207 pr_info("%s: MI_BB_START cycles: %u\n", 208 engine->name, trifilter(cycles)); 209 } 210 if (perf_end(gt)) 211 err = -EIO; 212 213 return err; 214 } 215 216 static struct i915_vma *create_nop_batch(struct intel_context *ce) 217 { 218 struct drm_i915_gem_object *obj; 219 struct i915_vma *vma; 220 u32 *cs; 221 int err; 222 223 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); 224 if (IS_ERR(obj)) 225 return ERR_CAST(obj); 226 227 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 228 if (IS_ERR(cs)) { 229 err = PTR_ERR(cs); 230 goto err_put; 231 } 232 233 memset(cs, 0, SZ_64K); 234 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; 235 236 i915_gem_object_flush_map(obj); 237 238 vma = i915_vma_instance(obj, ce->vm, NULL); 239 if (IS_ERR(vma)) { 240 err = PTR_ERR(vma); 241 goto err_unpin; 242 } 243 244 err = i915_vma_pin(vma, 0, 0, PIN_USER); 245 if (err) 246 goto err_unpin; 247 248 i915_gem_object_unpin_map(obj); 249 return vma; 250 251 err_unpin: 252 i915_gem_object_unpin_map(obj); 253 err_put: 254 i915_gem_object_put(obj); 255 return ERR_PTR(err); 256 } 257 258 static int perf_mi_noop(void *arg) 259 { 260 struct intel_gt *gt = arg; 261 struct intel_engine_cs *engine; 262 enum intel_engine_id id; 263 int err = 0; 264 265 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ 266 return 0; 267 268 perf_begin(gt); 269 for_each_engine(engine, gt, id) { 270 struct intel_context *ce = engine->kernel_context; 271 struct i915_vma *base, *nop; 272 u32 cycles[COUNT]; 273 int i; 274 275 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) 276 continue; 277 278 intel_engine_pm_get(engine); 279 280 base = create_empty_batch(ce); 281 if (IS_ERR(base)) { 282 err = PTR_ERR(base); 283 intel_engine_pm_put(engine); 284 break; 285 } 286 287 err = i915_vma_sync(base); 288 if (err) { 289 i915_vma_put(base); 290 intel_engine_pm_put(engine); 291 break; 292 } 293 294 nop = create_nop_batch(ce); 295 if (IS_ERR(nop)) { 296 err = PTR_ERR(nop); 297 i915_vma_put(base); 298 intel_engine_pm_put(engine); 299 break; 300 } 301 302 err = i915_vma_sync(nop); 303 if (err) { 304 i915_vma_put(nop); 305 i915_vma_put(base); 306 intel_engine_pm_put(engine); 307 break; 308 } 309 310 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 311 struct i915_request *rq; 312 313 rq = i915_request_create(ce); 314 if (IS_ERR(rq)) { 315 err = PTR_ERR(rq); 316 break; 317 } 318 319 err = write_timestamp(rq, 2); 320 if (err) 321 goto out; 322 323 err = rq->engine->emit_bb_start(rq, 324 i915_vma_offset(base), 8, 325 0); 326 if (err) 327 goto out; 328 329 err = write_timestamp(rq, 3); 330 if (err) 331 goto out; 332 333 err = rq->engine->emit_bb_start(rq, 334 i915_vma_offset(nop), 335 i915_vma_size(nop), 336 0); 337 if (err) 338 goto out; 339 340 err = write_timestamp(rq, 4); 341 if (err) 342 goto out; 343 344 out: 345 i915_request_get(rq); 346 i915_request_add(rq); 347 348 if (i915_request_wait(rq, 0, HZ / 5) < 0) 349 err = -EIO; 350 i915_request_put(rq); 351 if (err) 352 break; 353 354 cycles[i] = 355 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - 356 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); 357 } 358 i915_vma_put(nop); 359 i915_vma_put(base); 360 intel_engine_pm_put(engine); 361 if (err) 362 break; 363 364 pr_info("%s: 16K MI_NOOP cycles: %u\n", 365 engine->name, trifilter(cycles)); 366 } 367 if (perf_end(gt)) 368 err = -EIO; 369 370 return err; 371 } 372 373 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) 374 { 375 static const struct i915_subtest tests[] = { 376 SUBTEST(perf_mi_bb_start), 377 SUBTEST(perf_mi_noop), 378 }; 379 380 if (intel_gt_is_wedged(to_gt(i915))) 381 return 0; 382 383 return intel_gt_live_subtests(tests, to_gt(i915)); 384 } 385 386 static int intel_mmio_bases_check(void *arg) 387 { 388 int i, j; 389 390 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 391 const struct engine_info *info = &intel_engines[i]; 392 u8 prev = U8_MAX; 393 394 for (j = 0; j < MAX_MMIO_BASES; j++) { 395 u8 ver = info->mmio_bases[j].graphics_ver; 396 u32 base = info->mmio_bases[j].base; 397 398 if (ver >= prev) { 399 pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n", 400 __func__, 401 intel_engine_class_repr(info->class), 402 info->class, info->instance, 403 prev, ver); 404 return -EINVAL; 405 } 406 407 if (ver == 0) 408 break; 409 410 if (!base) { 411 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n", 412 __func__, 413 intel_engine_class_repr(info->class), 414 info->class, info->instance, 415 base, ver, j); 416 return -EINVAL; 417 } 418 419 prev = ver; 420 } 421 422 pr_debug("%s: min graphics version supported for %s%d is %u\n", 423 __func__, 424 intel_engine_class_repr(info->class), 425 info->instance, 426 prev); 427 } 428 429 return 0; 430 } 431 432 int intel_engine_cs_mock_selftests(void) 433 { 434 static const struct i915_subtest tests[] = { 435 SUBTEST(intel_mmio_bases_check), 436 }; 437 438 return i915_subtests(tests, NULL); 439 } 440