1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "intel_engine_heartbeat.h" 10 #include "intel_engine_pm.h" 11 #include "intel_engine_regs.h" 12 #include "intel_gpu_commands.h" 13 #include "intel_gt_clock_utils.h" 14 #include "intel_gt_pm.h" 15 #include "intel_rc6.h" 16 #include "selftest_engine_heartbeat.h" 17 #include "selftest_rps.h" 18 #include "selftests/igt_flush_test.h" 19 #include "selftests/igt_spinner.h" 20 #include "selftests/librapl.h" 21 22 /* Try to isolate the impact of cstates from determing frequency response */ 23 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 24 25 static void dummy_rps_work(struct work_struct *wrk) 26 { 27 } 28 29 static int cmp_u64(const void *A, const void *B) 30 { 31 const u64 *a = A, *b = B; 32 33 if (*a < *b) 34 return -1; 35 else if (*a > *b) 36 return 1; 37 else 38 return 0; 39 } 40 41 static int cmp_u32(const void *A, const void *B) 42 { 43 const u32 *a = A, *b = B; 44 45 if (*a < *b) 46 return -1; 47 else if (*a > *b) 48 return 1; 49 else 50 return 0; 51 } 52 53 static struct i915_vma * 54 create_spin_counter(struct intel_engine_cs *engine, 55 struct i915_address_space *vm, 56 bool srm, 57 u32 **cancel, 58 u32 **counter) 59 { 60 enum { 61 COUNT, 62 INC, 63 __NGPR__, 64 }; 65 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 66 struct drm_i915_gem_object *obj; 67 struct i915_vma *vma; 68 unsigned long end; 69 u32 *base, *cs; 70 int loop, i; 71 int err; 72 73 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 74 if (IS_ERR(obj)) 75 return ERR_CAST(obj); 76 77 end = obj->base.size / sizeof(u32) - 1; 78 79 vma = i915_vma_instance(obj, vm, NULL); 80 if (IS_ERR(vma)) { 81 err = PTR_ERR(vma); 82 goto err_put; 83 } 84 85 err = i915_vma_pin(vma, 0, 0, PIN_USER); 86 if (err) 87 goto err_unlock; 88 89 i915_vma_lock(vma); 90 91 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 92 if (IS_ERR(base)) { 93 err = PTR_ERR(base); 94 goto err_unpin; 95 } 96 cs = base; 97 98 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 99 for (i = 0; i < __NGPR__; i++) { 100 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 101 *cs++ = 0; 102 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 103 *cs++ = 0; 104 } 105 106 *cs++ = MI_LOAD_REGISTER_IMM(1); 107 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 108 *cs++ = 1; 109 110 loop = cs - base; 111 112 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 113 for (i = 0; i < 1024; i++) { 114 *cs++ = MI_MATH(4); 115 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 116 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 117 *cs++ = MI_MATH_ADD; 118 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 119 120 if (srm) { 121 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 122 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 123 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); 124 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); 125 } 126 } 127 128 *cs++ = MI_BATCH_BUFFER_START_GEN8; 129 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); 130 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); 131 GEM_BUG_ON(cs - base > end); 132 133 i915_gem_object_flush_map(obj); 134 135 *cancel = base + loop; 136 *counter = srm ? memset32(base + end, 0, 1) : NULL; 137 return vma; 138 139 err_unpin: 140 i915_vma_unpin(vma); 141 err_unlock: 142 i915_vma_unlock(vma); 143 err_put: 144 i915_gem_object_put(obj); 145 return ERR_PTR(err); 146 } 147 148 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 149 { 150 u8 history[64], i; 151 unsigned long end; 152 int sleep; 153 154 i = 0; 155 memset(history, freq, sizeof(history)); 156 sleep = 20; 157 158 /* The PCU does not change instantly, but drifts towards the goal? */ 159 end = jiffies + msecs_to_jiffies(timeout_ms); 160 do { 161 u8 act; 162 163 act = read_cagf(rps); 164 if (time_after(jiffies, end)) 165 return act; 166 167 /* Target acquired */ 168 if (act == freq) 169 return act; 170 171 /* Any change within the last N samples? */ 172 if (!memchr_inv(history, act, sizeof(history))) 173 return act; 174 175 history[i] = act; 176 i = (i + 1) % ARRAY_SIZE(history); 177 178 usleep_range(sleep, 2 * sleep); 179 sleep *= 2; 180 if (sleep > timeout_ms * 20) 181 sleep = timeout_ms * 20; 182 } while (1); 183 } 184 185 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 186 { 187 mutex_lock(&rps->lock); 188 GEM_BUG_ON(!intel_rps_is_active(rps)); 189 if (wait_for(!intel_rps_set(rps, freq), 50)) { 190 mutex_unlock(&rps->lock); 191 return 0; 192 } 193 GEM_BUG_ON(rps->last_freq != freq); 194 mutex_unlock(&rps->lock); 195 196 return wait_for_freq(rps, freq, 50); 197 } 198 199 static void show_pstate_limits(struct intel_rps *rps) 200 { 201 struct drm_i915_private *i915 = rps_to_i915(rps); 202 203 if (IS_BROXTON(i915)) { 204 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 205 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 206 intel_uncore_read(rps_to_uncore(rps), 207 BXT_RP_STATE_CAP)); 208 } else if (GRAPHICS_VER(i915) == 9) { 209 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 210 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 211 intel_uncore_read(rps_to_uncore(rps), 212 GEN9_RP_STATE_LIMITS)); 213 } 214 } 215 216 int live_rps_clock_interval(void *arg) 217 { 218 struct intel_gt *gt = arg; 219 struct intel_rps *rps = >->rps; 220 void (*saved_work)(struct work_struct *wrk); 221 struct intel_engine_cs *engine; 222 enum intel_engine_id id; 223 struct igt_spinner spin; 224 int err = 0; 225 226 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 227 return 0; 228 229 if (igt_spinner_init(&spin, gt)) 230 return -ENOMEM; 231 232 intel_gt_pm_wait_for_idle(gt); 233 saved_work = rps->work.func; 234 rps->work.func = dummy_rps_work; 235 236 intel_gt_pm_get(gt); 237 intel_rps_disable(>->rps); 238 239 intel_gt_check_clock_frequency(gt); 240 241 for_each_engine(engine, gt, id) { 242 struct i915_request *rq; 243 u32 cycles; 244 u64 dt; 245 246 if (!intel_engine_can_store_dword(engine)) 247 continue; 248 249 st_engine_heartbeat_disable(engine); 250 251 rq = igt_spinner_create_request(&spin, 252 engine->kernel_context, 253 MI_NOOP); 254 if (IS_ERR(rq)) { 255 st_engine_heartbeat_enable(engine); 256 err = PTR_ERR(rq); 257 break; 258 } 259 260 i915_request_add(rq); 261 262 if (!igt_wait_for_spinner(&spin, rq)) { 263 pr_err("%s: RPS spinner did not start\n", 264 engine->name); 265 igt_spinner_end(&spin); 266 st_engine_heartbeat_enable(engine); 267 intel_gt_set_wedged(engine->gt); 268 err = -EIO; 269 break; 270 } 271 272 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 273 274 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 275 276 /* Set the evaluation interval to infinity! */ 277 intel_uncore_write_fw(gt->uncore, 278 GEN6_RP_UP_EI, 0xffffffff); 279 intel_uncore_write_fw(gt->uncore, 280 GEN6_RP_UP_THRESHOLD, 0xffffffff); 281 282 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 283 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 284 285 if (wait_for(intel_uncore_read_fw(gt->uncore, 286 GEN6_RP_CUR_UP_EI), 287 10)) { 288 /* Just skip the test; assume lack of HW support */ 289 pr_notice("%s: rps evaluation interval not ticking\n", 290 engine->name); 291 err = -ENODEV; 292 } else { 293 ktime_t dt_[5]; 294 u32 cycles_[5]; 295 int i; 296 297 for (i = 0; i < 5; i++) { 298 preempt_disable(); 299 300 dt_[i] = ktime_get(); 301 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 302 303 udelay(1000); 304 305 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 306 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 307 308 preempt_enable(); 309 } 310 311 /* Use the median of both cycle/dt; close enough */ 312 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 313 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 314 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 315 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 316 } 317 318 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 319 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 320 321 igt_spinner_end(&spin); 322 st_engine_heartbeat_enable(engine); 323 324 if (err == 0) { 325 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 326 u32 expected = 327 intel_gt_ns_to_pm_interval(gt, dt); 328 329 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 330 engine->name, cycles, time, dt, expected, 331 gt->clock_frequency / 1000); 332 333 if (10 * time < 8 * dt || 334 8 * time > 10 * dt) { 335 pr_err("%s: rps clock time does not match walltime!\n", 336 engine->name); 337 err = -EINVAL; 338 } 339 340 if (10 * expected < 8 * cycles || 341 8 * expected > 10 * cycles) { 342 pr_err("%s: walltime does not match rps clock ticks!\n", 343 engine->name); 344 err = -EINVAL; 345 } 346 } 347 348 if (igt_flush_test(gt->i915)) 349 err = -EIO; 350 351 break; /* once is enough */ 352 } 353 354 intel_rps_enable(>->rps); 355 intel_gt_pm_put(gt); 356 357 igt_spinner_fini(&spin); 358 359 intel_gt_pm_wait_for_idle(gt); 360 rps->work.func = saved_work; 361 362 if (err == -ENODEV) /* skipped, don't report a fail */ 363 err = 0; 364 365 return err; 366 } 367 368 int live_rps_control(void *arg) 369 { 370 struct intel_gt *gt = arg; 371 struct intel_rps *rps = >->rps; 372 void (*saved_work)(struct work_struct *wrk); 373 struct intel_engine_cs *engine; 374 enum intel_engine_id id; 375 struct igt_spinner spin; 376 int err = 0; 377 378 /* 379 * Check that the actual frequency matches our requested frequency, 380 * to verify our control mechanism. We have to be careful that the 381 * PCU may throttle the GPU in which case the actual frequency used 382 * will be lowered than requested. 383 */ 384 385 if (!intel_rps_is_enabled(rps)) 386 return 0; 387 388 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 389 return 0; 390 391 if (igt_spinner_init(&spin, gt)) 392 return -ENOMEM; 393 394 intel_gt_pm_wait_for_idle(gt); 395 saved_work = rps->work.func; 396 rps->work.func = dummy_rps_work; 397 398 intel_gt_pm_get(gt); 399 for_each_engine(engine, gt, id) { 400 struct i915_request *rq; 401 ktime_t min_dt, max_dt; 402 int f, limit; 403 int min, max; 404 405 if (!intel_engine_can_store_dword(engine)) 406 continue; 407 408 st_engine_heartbeat_disable(engine); 409 410 rq = igt_spinner_create_request(&spin, 411 engine->kernel_context, 412 MI_NOOP); 413 if (IS_ERR(rq)) { 414 err = PTR_ERR(rq); 415 break; 416 } 417 418 i915_request_add(rq); 419 420 if (!igt_wait_for_spinner(&spin, rq)) { 421 pr_err("%s: RPS spinner did not start\n", 422 engine->name); 423 igt_spinner_end(&spin); 424 st_engine_heartbeat_enable(engine); 425 intel_gt_set_wedged(engine->gt); 426 err = -EIO; 427 break; 428 } 429 430 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 431 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 432 engine->name, rps->min_freq, read_cagf(rps)); 433 igt_spinner_end(&spin); 434 st_engine_heartbeat_enable(engine); 435 show_pstate_limits(rps); 436 err = -EINVAL; 437 break; 438 } 439 440 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 441 if (rps_set_check(rps, f) < f) 442 break; 443 } 444 445 limit = rps_set_check(rps, f); 446 447 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 448 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 449 engine->name, rps->min_freq, read_cagf(rps)); 450 igt_spinner_end(&spin); 451 st_engine_heartbeat_enable(engine); 452 show_pstate_limits(rps); 453 err = -EINVAL; 454 break; 455 } 456 457 max_dt = ktime_get(); 458 max = rps_set_check(rps, limit); 459 max_dt = ktime_sub(ktime_get(), max_dt); 460 461 min_dt = ktime_get(); 462 min = rps_set_check(rps, rps->min_freq); 463 min_dt = ktime_sub(ktime_get(), min_dt); 464 465 igt_spinner_end(&spin); 466 st_engine_heartbeat_enable(engine); 467 468 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 469 engine->name, 470 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 471 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 472 limit, intel_gpu_freq(rps, limit), 473 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 474 475 if (limit == rps->min_freq) { 476 pr_err("%s: GPU throttled to minimum!\n", 477 engine->name); 478 show_pstate_limits(rps); 479 err = -ENODEV; 480 break; 481 } 482 483 if (igt_flush_test(gt->i915)) { 484 err = -EIO; 485 break; 486 } 487 } 488 intel_gt_pm_put(gt); 489 490 igt_spinner_fini(&spin); 491 492 intel_gt_pm_wait_for_idle(gt); 493 rps->work.func = saved_work; 494 495 return err; 496 } 497 498 static void show_pcu_config(struct intel_rps *rps) 499 { 500 struct drm_i915_private *i915 = rps_to_i915(rps); 501 unsigned int max_gpu_freq, min_gpu_freq; 502 intel_wakeref_t wakeref; 503 int gpu_freq; 504 505 if (!HAS_LLC(i915)) 506 return; 507 508 min_gpu_freq = rps->min_freq; 509 max_gpu_freq = rps->max_freq; 510 if (GRAPHICS_VER(i915) >= 9) { 511 /* Convert GT frequency to 50 HZ units */ 512 min_gpu_freq /= GEN9_FREQ_SCALER; 513 max_gpu_freq /= GEN9_FREQ_SCALER; 514 } 515 516 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 517 518 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 519 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 520 int ia_freq = gpu_freq; 521 522 snb_pcode_read(i915, GEN6_PCODE_READ_MIN_FREQ_TABLE, 523 &ia_freq, NULL); 524 525 pr_info("%5d %5d %5d\n", 526 gpu_freq * 50, 527 ((ia_freq >> 0) & 0xff) * 100, 528 ((ia_freq >> 8) & 0xff) * 100); 529 } 530 531 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 532 } 533 534 static u64 __measure_frequency(u32 *cntr, int duration_ms) 535 { 536 u64 dc, dt; 537 538 dt = ktime_get(); 539 dc = READ_ONCE(*cntr); 540 usleep_range(1000 * duration_ms, 2000 * duration_ms); 541 dc = READ_ONCE(*cntr) - dc; 542 dt = ktime_get() - dt; 543 544 return div64_u64(1000 * 1000 * dc, dt); 545 } 546 547 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 548 { 549 u64 x[5]; 550 int i; 551 552 *freq = rps_set_check(rps, *freq); 553 for (i = 0; i < 5; i++) 554 x[i] = __measure_frequency(cntr, 2); 555 *freq = (*freq + read_cagf(rps)) / 2; 556 557 /* A simple triangle filter for better result stability */ 558 sort(x, 5, sizeof(*x), cmp_u64, NULL); 559 return div_u64(x[1] + 2 * x[2] + x[3], 4); 560 } 561 562 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 563 int duration_ms) 564 { 565 u64 dc, dt; 566 567 dt = ktime_get(); 568 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 569 usleep_range(1000 * duration_ms, 2000 * duration_ms); 570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 571 dt = ktime_get() - dt; 572 573 return div64_u64(1000 * 1000 * dc, dt); 574 } 575 576 static u64 measure_cs_frequency_at(struct intel_rps *rps, 577 struct intel_engine_cs *engine, 578 int *freq) 579 { 580 u64 x[5]; 581 int i; 582 583 *freq = rps_set_check(rps, *freq); 584 for (i = 0; i < 5; i++) 585 x[i] = __measure_cs_frequency(engine, 2); 586 *freq = (*freq + read_cagf(rps)) / 2; 587 588 /* A simple triangle filter for better result stability */ 589 sort(x, 5, sizeof(*x), cmp_u64, NULL); 590 return div_u64(x[1] + 2 * x[2] + x[3], 4); 591 } 592 593 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 594 { 595 return f_d * x > f_n * y && f_n * x < f_d * y; 596 } 597 598 int live_rps_frequency_cs(void *arg) 599 { 600 void (*saved_work)(struct work_struct *wrk); 601 struct intel_gt *gt = arg; 602 struct intel_rps *rps = >->rps; 603 struct intel_engine_cs *engine; 604 struct pm_qos_request qos; 605 enum intel_engine_id id; 606 int err = 0; 607 608 /* 609 * The premise is that the GPU does change frequency at our behest. 610 * Let's check there is a correspondence between the requested 611 * frequency, the actual frequency, and the observed clock rate. 612 */ 613 614 if (!intel_rps_is_enabled(rps)) 615 return 0; 616 617 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 618 return 0; 619 620 if (CPU_LATENCY >= 0) 621 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 622 623 intel_gt_pm_wait_for_idle(gt); 624 saved_work = rps->work.func; 625 rps->work.func = dummy_rps_work; 626 627 for_each_engine(engine, gt, id) { 628 struct i915_request *rq; 629 struct i915_vma *vma; 630 u32 *cancel, *cntr; 631 struct { 632 u64 count; 633 int freq; 634 } min, max; 635 636 st_engine_heartbeat_disable(engine); 637 638 vma = create_spin_counter(engine, 639 engine->kernel_context->vm, false, 640 &cancel, &cntr); 641 if (IS_ERR(vma)) { 642 err = PTR_ERR(vma); 643 st_engine_heartbeat_enable(engine); 644 break; 645 } 646 647 rq = intel_engine_create_kernel_request(engine); 648 if (IS_ERR(rq)) { 649 err = PTR_ERR(rq); 650 goto err_vma; 651 } 652 653 err = i915_request_await_object(rq, vma->obj, false); 654 if (!err) 655 err = i915_vma_move_to_active(vma, rq, 0); 656 if (!err) 657 err = rq->engine->emit_bb_start(rq, 658 vma->node.start, 659 PAGE_SIZE, 0); 660 i915_request_add(rq); 661 if (err) 662 goto err_vma; 663 664 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 665 10)) { 666 pr_err("%s: timed loop did not start\n", 667 engine->name); 668 goto err_vma; 669 } 670 671 min.freq = rps->min_freq; 672 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 673 674 max.freq = rps->max_freq; 675 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 676 677 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 678 engine->name, 679 min.count, intel_gpu_freq(rps, min.freq), 680 max.count, intel_gpu_freq(rps, max.freq), 681 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 682 max.freq * min.count)); 683 684 if (!scaled_within(max.freq * min.count, 685 min.freq * max.count, 686 2, 3)) { 687 int f; 688 689 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 690 engine->name, 691 max.freq * min.count, 692 min.freq * max.count); 693 show_pcu_config(rps); 694 695 for (f = min.freq + 1; f <= rps->max_freq; f++) { 696 int act = f; 697 u64 count; 698 699 count = measure_cs_frequency_at(rps, engine, &act); 700 if (act < f) 701 break; 702 703 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 704 engine->name, 705 act, intel_gpu_freq(rps, act), count, 706 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 707 act * min.count)); 708 709 f = act; /* may skip ahead [pcu granularity] */ 710 } 711 712 err = -EINTR; /* ignore error, continue on with test */ 713 } 714 715 err_vma: 716 *cancel = MI_BATCH_BUFFER_END; 717 i915_gem_object_flush_map(vma->obj); 718 i915_gem_object_unpin_map(vma->obj); 719 i915_vma_unpin(vma); 720 i915_vma_unlock(vma); 721 i915_vma_put(vma); 722 723 st_engine_heartbeat_enable(engine); 724 if (igt_flush_test(gt->i915)) 725 err = -EIO; 726 if (err) 727 break; 728 } 729 730 intel_gt_pm_wait_for_idle(gt); 731 rps->work.func = saved_work; 732 733 if (CPU_LATENCY >= 0) 734 cpu_latency_qos_remove_request(&qos); 735 736 return err; 737 } 738 739 int live_rps_frequency_srm(void *arg) 740 { 741 void (*saved_work)(struct work_struct *wrk); 742 struct intel_gt *gt = arg; 743 struct intel_rps *rps = >->rps; 744 struct intel_engine_cs *engine; 745 struct pm_qos_request qos; 746 enum intel_engine_id id; 747 int err = 0; 748 749 /* 750 * The premise is that the GPU does change frequency at our behest. 751 * Let's check there is a correspondence between the requested 752 * frequency, the actual frequency, and the observed clock rate. 753 */ 754 755 if (!intel_rps_is_enabled(rps)) 756 return 0; 757 758 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 759 return 0; 760 761 if (CPU_LATENCY >= 0) 762 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 763 764 intel_gt_pm_wait_for_idle(gt); 765 saved_work = rps->work.func; 766 rps->work.func = dummy_rps_work; 767 768 for_each_engine(engine, gt, id) { 769 struct i915_request *rq; 770 struct i915_vma *vma; 771 u32 *cancel, *cntr; 772 struct { 773 u64 count; 774 int freq; 775 } min, max; 776 777 st_engine_heartbeat_disable(engine); 778 779 vma = create_spin_counter(engine, 780 engine->kernel_context->vm, true, 781 &cancel, &cntr); 782 if (IS_ERR(vma)) { 783 err = PTR_ERR(vma); 784 st_engine_heartbeat_enable(engine); 785 break; 786 } 787 788 rq = intel_engine_create_kernel_request(engine); 789 if (IS_ERR(rq)) { 790 err = PTR_ERR(rq); 791 goto err_vma; 792 } 793 794 err = i915_request_await_object(rq, vma->obj, false); 795 if (!err) 796 err = i915_vma_move_to_active(vma, rq, 0); 797 if (!err) 798 err = rq->engine->emit_bb_start(rq, 799 vma->node.start, 800 PAGE_SIZE, 0); 801 i915_request_add(rq); 802 if (err) 803 goto err_vma; 804 805 if (wait_for(READ_ONCE(*cntr), 10)) { 806 pr_err("%s: timed loop did not start\n", 807 engine->name); 808 goto err_vma; 809 } 810 811 min.freq = rps->min_freq; 812 min.count = measure_frequency_at(rps, cntr, &min.freq); 813 814 max.freq = rps->max_freq; 815 max.count = measure_frequency_at(rps, cntr, &max.freq); 816 817 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 818 engine->name, 819 min.count, intel_gpu_freq(rps, min.freq), 820 max.count, intel_gpu_freq(rps, max.freq), 821 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 822 max.freq * min.count)); 823 824 if (!scaled_within(max.freq * min.count, 825 min.freq * max.count, 826 1, 2)) { 827 int f; 828 829 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 830 engine->name, 831 max.freq * min.count, 832 min.freq * max.count); 833 show_pcu_config(rps); 834 835 for (f = min.freq + 1; f <= rps->max_freq; f++) { 836 int act = f; 837 u64 count; 838 839 count = measure_frequency_at(rps, cntr, &act); 840 if (act < f) 841 break; 842 843 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 844 engine->name, 845 act, intel_gpu_freq(rps, act), count, 846 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 847 act * min.count)); 848 849 f = act; /* may skip ahead [pcu granularity] */ 850 } 851 852 err = -EINTR; /* ignore error, continue on with test */ 853 } 854 855 err_vma: 856 *cancel = MI_BATCH_BUFFER_END; 857 i915_gem_object_flush_map(vma->obj); 858 i915_gem_object_unpin_map(vma->obj); 859 i915_vma_unpin(vma); 860 i915_vma_unlock(vma); 861 i915_vma_put(vma); 862 863 st_engine_heartbeat_enable(engine); 864 if (igt_flush_test(gt->i915)) 865 err = -EIO; 866 if (err) 867 break; 868 } 869 870 intel_gt_pm_wait_for_idle(gt); 871 rps->work.func = saved_work; 872 873 if (CPU_LATENCY >= 0) 874 cpu_latency_qos_remove_request(&qos); 875 876 return err; 877 } 878 879 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 880 { 881 /* Flush any previous EI */ 882 usleep_range(timeout_us, 2 * timeout_us); 883 884 /* Reset the interrupt status */ 885 rps_disable_interrupts(rps); 886 GEM_BUG_ON(rps->pm_iir); 887 rps_enable_interrupts(rps); 888 889 /* And then wait for the timeout, for real this time */ 890 usleep_range(2 * timeout_us, 3 * timeout_us); 891 } 892 893 static int __rps_up_interrupt(struct intel_rps *rps, 894 struct intel_engine_cs *engine, 895 struct igt_spinner *spin) 896 { 897 struct intel_uncore *uncore = engine->uncore; 898 struct i915_request *rq; 899 u32 timeout; 900 901 if (!intel_engine_can_store_dword(engine)) 902 return 0; 903 904 rps_set_check(rps, rps->min_freq); 905 906 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 907 if (IS_ERR(rq)) 908 return PTR_ERR(rq); 909 910 i915_request_get(rq); 911 i915_request_add(rq); 912 913 if (!igt_wait_for_spinner(spin, rq)) { 914 pr_err("%s: RPS spinner did not start\n", 915 engine->name); 916 i915_request_put(rq); 917 intel_gt_set_wedged(engine->gt); 918 return -EIO; 919 } 920 921 if (!intel_rps_is_active(rps)) { 922 pr_err("%s: RPS not enabled on starting spinner\n", 923 engine->name); 924 igt_spinner_end(spin); 925 i915_request_put(rq); 926 return -EINVAL; 927 } 928 929 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 930 pr_err("%s: RPS did not register UP interrupt\n", 931 engine->name); 932 i915_request_put(rq); 933 return -EINVAL; 934 } 935 936 if (rps->last_freq != rps->min_freq) { 937 pr_err("%s: RPS did not program min frequency\n", 938 engine->name); 939 i915_request_put(rq); 940 return -EINVAL; 941 } 942 943 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 944 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 945 timeout = DIV_ROUND_UP(timeout, 1000); 946 947 sleep_for_ei(rps, timeout); 948 GEM_BUG_ON(i915_request_completed(rq)); 949 950 igt_spinner_end(spin); 951 i915_request_put(rq); 952 953 if (rps->cur_freq != rps->min_freq) { 954 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 955 engine->name, intel_rps_read_actual_frequency(rps)); 956 return -EINVAL; 957 } 958 959 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 960 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 961 engine->name, rps->pm_iir, 962 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 963 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 964 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 965 return -EINVAL; 966 } 967 968 return 0; 969 } 970 971 static int __rps_down_interrupt(struct intel_rps *rps, 972 struct intel_engine_cs *engine) 973 { 974 struct intel_uncore *uncore = engine->uncore; 975 u32 timeout; 976 977 rps_set_check(rps, rps->max_freq); 978 979 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 980 pr_err("%s: RPS did not register DOWN interrupt\n", 981 engine->name); 982 return -EINVAL; 983 } 984 985 if (rps->last_freq != rps->max_freq) { 986 pr_err("%s: RPS did not program max frequency\n", 987 engine->name); 988 return -EINVAL; 989 } 990 991 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 992 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 993 timeout = DIV_ROUND_UP(timeout, 1000); 994 995 sleep_for_ei(rps, timeout); 996 997 if (rps->cur_freq != rps->max_freq) { 998 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 999 engine->name, 1000 intel_rps_read_actual_frequency(rps)); 1001 return -EINVAL; 1002 } 1003 1004 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1005 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1006 engine->name, rps->pm_iir, 1007 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1008 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1009 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1010 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1011 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1012 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1013 return -EINVAL; 1014 } 1015 1016 return 0; 1017 } 1018 1019 int live_rps_interrupt(void *arg) 1020 { 1021 struct intel_gt *gt = arg; 1022 struct intel_rps *rps = >->rps; 1023 void (*saved_work)(struct work_struct *wrk); 1024 struct intel_engine_cs *engine; 1025 enum intel_engine_id id; 1026 struct igt_spinner spin; 1027 u32 pm_events; 1028 int err = 0; 1029 1030 /* 1031 * First, let's check whether or not we are receiving interrupts. 1032 */ 1033 1034 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1035 return 0; 1036 1037 intel_gt_pm_get(gt); 1038 pm_events = rps->pm_events; 1039 intel_gt_pm_put(gt); 1040 if (!pm_events) { 1041 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1042 return -ENODEV; 1043 } 1044 1045 if (igt_spinner_init(&spin, gt)) 1046 return -ENOMEM; 1047 1048 intel_gt_pm_wait_for_idle(gt); 1049 saved_work = rps->work.func; 1050 rps->work.func = dummy_rps_work; 1051 1052 for_each_engine(engine, gt, id) { 1053 /* Keep the engine busy with a spinner; expect an UP! */ 1054 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1055 intel_gt_pm_wait_for_idle(engine->gt); 1056 GEM_BUG_ON(intel_rps_is_active(rps)); 1057 1058 st_engine_heartbeat_disable(engine); 1059 1060 err = __rps_up_interrupt(rps, engine, &spin); 1061 1062 st_engine_heartbeat_enable(engine); 1063 if (err) 1064 goto out; 1065 1066 intel_gt_pm_wait_for_idle(engine->gt); 1067 } 1068 1069 /* Keep the engine awake but idle and check for DOWN */ 1070 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1071 st_engine_heartbeat_disable(engine); 1072 intel_rc6_disable(>->rc6); 1073 1074 err = __rps_down_interrupt(rps, engine); 1075 1076 intel_rc6_enable(>->rc6); 1077 st_engine_heartbeat_enable(engine); 1078 if (err) 1079 goto out; 1080 } 1081 } 1082 1083 out: 1084 if (igt_flush_test(gt->i915)) 1085 err = -EIO; 1086 1087 igt_spinner_fini(&spin); 1088 1089 intel_gt_pm_wait_for_idle(gt); 1090 rps->work.func = saved_work; 1091 1092 return err; 1093 } 1094 1095 static u64 __measure_power(int duration_ms) 1096 { 1097 u64 dE, dt; 1098 1099 dt = ktime_get(); 1100 dE = librapl_energy_uJ(); 1101 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1102 dE = librapl_energy_uJ() - dE; 1103 dt = ktime_get() - dt; 1104 1105 return div64_u64(1000 * 1000 * dE, dt); 1106 } 1107 1108 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1109 { 1110 u64 x[5]; 1111 int i; 1112 1113 *freq = rps_set_check(rps, *freq); 1114 for (i = 0; i < 5; i++) 1115 x[i] = __measure_power(5); 1116 *freq = (*freq + read_cagf(rps)) / 2; 1117 1118 /* A simple triangle filter for better result stability */ 1119 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1120 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1121 } 1122 1123 int live_rps_power(void *arg) 1124 { 1125 struct intel_gt *gt = arg; 1126 struct intel_rps *rps = >->rps; 1127 void (*saved_work)(struct work_struct *wrk); 1128 struct intel_engine_cs *engine; 1129 enum intel_engine_id id; 1130 struct igt_spinner spin; 1131 int err = 0; 1132 1133 /* 1134 * Our fundamental assumption is that running at lower frequency 1135 * actually saves power. Let's see if our RAPL measurement support 1136 * that theory. 1137 */ 1138 1139 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1140 return 0; 1141 1142 if (!librapl_supported(gt->i915)) 1143 return 0; 1144 1145 if (igt_spinner_init(&spin, gt)) 1146 return -ENOMEM; 1147 1148 intel_gt_pm_wait_for_idle(gt); 1149 saved_work = rps->work.func; 1150 rps->work.func = dummy_rps_work; 1151 1152 for_each_engine(engine, gt, id) { 1153 struct i915_request *rq; 1154 struct { 1155 u64 power; 1156 int freq; 1157 } min, max; 1158 1159 if (!intel_engine_can_store_dword(engine)) 1160 continue; 1161 1162 st_engine_heartbeat_disable(engine); 1163 1164 rq = igt_spinner_create_request(&spin, 1165 engine->kernel_context, 1166 MI_NOOP); 1167 if (IS_ERR(rq)) { 1168 st_engine_heartbeat_enable(engine); 1169 err = PTR_ERR(rq); 1170 break; 1171 } 1172 1173 i915_request_add(rq); 1174 1175 if (!igt_wait_for_spinner(&spin, rq)) { 1176 pr_err("%s: RPS spinner did not start\n", 1177 engine->name); 1178 igt_spinner_end(&spin); 1179 st_engine_heartbeat_enable(engine); 1180 intel_gt_set_wedged(engine->gt); 1181 err = -EIO; 1182 break; 1183 } 1184 1185 max.freq = rps->max_freq; 1186 max.power = measure_power_at(rps, &max.freq); 1187 1188 min.freq = rps->min_freq; 1189 min.power = measure_power_at(rps, &min.freq); 1190 1191 igt_spinner_end(&spin); 1192 st_engine_heartbeat_enable(engine); 1193 1194 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1195 engine->name, 1196 min.power, intel_gpu_freq(rps, min.freq), 1197 max.power, intel_gpu_freq(rps, max.freq)); 1198 1199 if (10 * min.freq >= 9 * max.freq) { 1200 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1201 min.freq, intel_gpu_freq(rps, min.freq), 1202 max.freq, intel_gpu_freq(rps, max.freq)); 1203 continue; 1204 } 1205 1206 if (11 * min.power > 10 * max.power) { 1207 pr_err("%s: did not conserve power when setting lower frequency!\n", 1208 engine->name); 1209 err = -EINVAL; 1210 break; 1211 } 1212 1213 if (igt_flush_test(gt->i915)) { 1214 err = -EIO; 1215 break; 1216 } 1217 } 1218 1219 igt_spinner_fini(&spin); 1220 1221 intel_gt_pm_wait_for_idle(gt); 1222 rps->work.func = saved_work; 1223 1224 return err; 1225 } 1226 1227 int live_rps_dynamic(void *arg) 1228 { 1229 struct intel_gt *gt = arg; 1230 struct intel_rps *rps = >->rps; 1231 struct intel_engine_cs *engine; 1232 enum intel_engine_id id; 1233 struct igt_spinner spin; 1234 int err = 0; 1235 1236 /* 1237 * We've looked at the bascs, and have established that we 1238 * can change the clock frequency and that the HW will generate 1239 * interrupts based on load. Now we check how we integrate those 1240 * moving parts into dynamic reclocking based on load. 1241 */ 1242 1243 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1244 return 0; 1245 1246 if (igt_spinner_init(&spin, gt)) 1247 return -ENOMEM; 1248 1249 if (intel_rps_has_interrupts(rps)) 1250 pr_info("RPS has interrupt support\n"); 1251 if (intel_rps_uses_timer(rps)) 1252 pr_info("RPS has timer support\n"); 1253 1254 for_each_engine(engine, gt, id) { 1255 struct i915_request *rq; 1256 struct { 1257 ktime_t dt; 1258 u8 freq; 1259 } min, max; 1260 1261 if (!intel_engine_can_store_dword(engine)) 1262 continue; 1263 1264 intel_gt_pm_wait_for_idle(gt); 1265 GEM_BUG_ON(intel_rps_is_active(rps)); 1266 rps->cur_freq = rps->min_freq; 1267 1268 intel_engine_pm_get(engine); 1269 intel_rc6_disable(>->rc6); 1270 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1271 1272 rq = igt_spinner_create_request(&spin, 1273 engine->kernel_context, 1274 MI_NOOP); 1275 if (IS_ERR(rq)) { 1276 err = PTR_ERR(rq); 1277 goto err; 1278 } 1279 1280 i915_request_add(rq); 1281 1282 max.dt = ktime_get(); 1283 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1284 max.dt = ktime_sub(ktime_get(), max.dt); 1285 1286 igt_spinner_end(&spin); 1287 1288 min.dt = ktime_get(); 1289 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1290 min.dt = ktime_sub(ktime_get(), min.dt); 1291 1292 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1293 engine->name, 1294 max.freq, intel_gpu_freq(rps, max.freq), 1295 ktime_to_ns(max.dt), 1296 min.freq, intel_gpu_freq(rps, min.freq), 1297 ktime_to_ns(min.dt)); 1298 if (min.freq >= max.freq) { 1299 pr_err("%s: dynamic reclocking of spinner failed\n!", 1300 engine->name); 1301 err = -EINVAL; 1302 } 1303 1304 err: 1305 intel_rc6_enable(>->rc6); 1306 intel_engine_pm_put(engine); 1307 1308 if (igt_flush_test(gt->i915)) 1309 err = -EIO; 1310 if (err) 1311 break; 1312 } 1313 1314 igt_spinner_fini(&spin); 1315 1316 return err; 1317 } 1318