1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "i915_reg.h" 12 #include "intel_engine_heartbeat.h" 13 #include "intel_engine_pm.h" 14 #include "intel_engine_regs.h" 15 #include "intel_gpu_commands.h" 16 #include "intel_gt_clock_utils.h" 17 #include "intel_gt_pm.h" 18 #include "intel_rc6.h" 19 #include "selftest_engine_heartbeat.h" 20 #include "selftest_rps.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_spinner.h" 23 #include "selftests/librapl.h" 24 25 /* Try to isolate the impact of cstates from determing frequency response */ 26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 27 28 static void dummy_rps_work(struct work_struct *wrk) 29 { 30 } 31 32 static int cmp_u64(const void *A, const void *B) 33 { 34 const u64 *a = A, *b = B; 35 36 if (*a < *b) 37 return -1; 38 else if (*a > *b) 39 return 1; 40 else 41 return 0; 42 } 43 44 static int cmp_u32(const void *A, const void *B) 45 { 46 const u32 *a = A, *b = B; 47 48 if (*a < *b) 49 return -1; 50 else if (*a > *b) 51 return 1; 52 else 53 return 0; 54 } 55 56 static struct i915_vma * 57 create_spin_counter(struct intel_engine_cs *engine, 58 struct i915_address_space *vm, 59 bool srm, 60 u32 **cancel, 61 u32 **counter) 62 { 63 enum { 64 COUNT, 65 INC, 66 __NGPR__, 67 }; 68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 69 struct drm_i915_gem_object *obj; 70 struct i915_vma *vma; 71 unsigned long end; 72 u32 *base, *cs; 73 int loop, i; 74 int err; 75 76 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 77 if (IS_ERR(obj)) 78 return ERR_CAST(obj); 79 80 end = obj->base.size / sizeof(u32) - 1; 81 82 vma = i915_vma_instance(obj, vm, NULL); 83 if (IS_ERR(vma)) { 84 err = PTR_ERR(vma); 85 goto err_put; 86 } 87 88 err = i915_vma_pin(vma, 0, 0, PIN_USER); 89 if (err) 90 goto err_unlock; 91 92 i915_vma_lock(vma); 93 94 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 95 if (IS_ERR(base)) { 96 err = PTR_ERR(base); 97 goto err_unpin; 98 } 99 cs = base; 100 101 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 102 for (i = 0; i < __NGPR__; i++) { 103 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 104 *cs++ = 0; 105 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 106 *cs++ = 0; 107 } 108 109 *cs++ = MI_LOAD_REGISTER_IMM(1); 110 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 111 *cs++ = 1; 112 113 loop = cs - base; 114 115 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 116 for (i = 0; i < 1024; i++) { 117 *cs++ = MI_MATH(4); 118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 119 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 120 *cs++ = MI_MATH_ADD; 121 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 122 123 if (srm) { 124 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 125 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 126 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 127 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 128 } 129 } 130 131 *cs++ = MI_BATCH_BUFFER_START_GEN8; 132 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 133 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 134 GEM_BUG_ON(cs - base > end); 135 136 i915_gem_object_flush_map(obj); 137 138 *cancel = base + loop; 139 *counter = srm ? memset32(base + end, 0, 1) : NULL; 140 return vma; 141 142 err_unpin: 143 i915_vma_unpin(vma); 144 err_unlock: 145 i915_vma_unlock(vma); 146 err_put: 147 i915_gem_object_put(obj); 148 return ERR_PTR(err); 149 } 150 151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 152 { 153 u8 history[64], i; 154 unsigned long end; 155 int sleep; 156 157 i = 0; 158 memset(history, freq, sizeof(history)); 159 sleep = 20; 160 161 /* The PCU does not change instantly, but drifts towards the goal? */ 162 end = jiffies + msecs_to_jiffies(timeout_ms); 163 do { 164 u8 act; 165 166 act = read_cagf(rps); 167 if (time_after(jiffies, end)) 168 return act; 169 170 /* Target acquired */ 171 if (act == freq) 172 return act; 173 174 /* Any change within the last N samples? */ 175 if (!memchr_inv(history, act, sizeof(history))) 176 return act; 177 178 history[i] = act; 179 i = (i + 1) % ARRAY_SIZE(history); 180 181 usleep_range(sleep, 2 * sleep); 182 sleep *= 2; 183 if (sleep > timeout_ms * 20) 184 sleep = timeout_ms * 20; 185 } while (1); 186 } 187 188 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 189 { 190 mutex_lock(&rps->lock); 191 GEM_BUG_ON(!intel_rps_is_active(rps)); 192 if (wait_for(!intel_rps_set(rps, freq), 50)) { 193 mutex_unlock(&rps->lock); 194 return 0; 195 } 196 GEM_BUG_ON(rps->last_freq != freq); 197 mutex_unlock(&rps->lock); 198 199 return wait_for_freq(rps, freq, 50); 200 } 201 202 static void show_pstate_limits(struct intel_rps *rps) 203 { 204 struct drm_i915_private *i915 = rps_to_i915(rps); 205 206 if (IS_BROXTON(i915)) { 207 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 208 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 209 intel_uncore_read(rps_to_uncore(rps), 210 BXT_RP_STATE_CAP)); 211 } else if (GRAPHICS_VER(i915) == 9) { 212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 214 intel_uncore_read(rps_to_uncore(rps), 215 GEN9_RP_STATE_LIMITS)); 216 } 217 } 218 219 int live_rps_clock_interval(void *arg) 220 { 221 struct intel_gt *gt = arg; 222 struct intel_rps *rps = >->rps; 223 void (*saved_work)(struct work_struct *wrk); 224 struct intel_engine_cs *engine; 225 enum intel_engine_id id; 226 struct igt_spinner spin; 227 int err = 0; 228 229 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 230 return 0; 231 232 if (igt_spinner_init(&spin, gt)) 233 return -ENOMEM; 234 235 intel_gt_pm_wait_for_idle(gt); 236 saved_work = rps->work.func; 237 rps->work.func = dummy_rps_work; 238 239 intel_gt_pm_get(gt); 240 intel_rps_disable(>->rps); 241 242 intel_gt_check_clock_frequency(gt); 243 244 for_each_engine(engine, gt, id) { 245 struct i915_request *rq; 246 u32 cycles; 247 u64 dt; 248 249 if (!intel_engine_can_store_dword(engine)) 250 continue; 251 252 st_engine_heartbeat_disable(engine); 253 254 rq = igt_spinner_create_request(&spin, 255 engine->kernel_context, 256 MI_NOOP); 257 if (IS_ERR(rq)) { 258 st_engine_heartbeat_enable(engine); 259 err = PTR_ERR(rq); 260 break; 261 } 262 263 i915_request_add(rq); 264 265 if (!igt_wait_for_spinner(&spin, rq)) { 266 pr_err("%s: RPS spinner did not start\n", 267 engine->name); 268 igt_spinner_end(&spin); 269 st_engine_heartbeat_enable(engine); 270 intel_gt_set_wedged(engine->gt); 271 err = -EIO; 272 break; 273 } 274 275 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 276 277 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 278 279 /* Set the evaluation interval to infinity! */ 280 intel_uncore_write_fw(gt->uncore, 281 GEN6_RP_UP_EI, 0xffffffff); 282 intel_uncore_write_fw(gt->uncore, 283 GEN6_RP_UP_THRESHOLD, 0xffffffff); 284 285 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 286 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 287 288 if (wait_for(intel_uncore_read_fw(gt->uncore, 289 GEN6_RP_CUR_UP_EI), 290 10)) { 291 /* Just skip the test; assume lack of HW support */ 292 pr_notice("%s: rps evaluation interval not ticking\n", 293 engine->name); 294 err = -ENODEV; 295 } else { 296 ktime_t dt_[5]; 297 u32 cycles_[5]; 298 int i; 299 300 for (i = 0; i < 5; i++) { 301 preempt_disable(); 302 303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 304 dt_[i] = ktime_get(); 305 306 udelay(1000); 307 308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 309 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 310 311 preempt_enable(); 312 } 313 314 /* Use the median of both cycle/dt; close enough */ 315 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 316 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 317 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 318 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 319 } 320 321 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 322 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 323 324 igt_spinner_end(&spin); 325 st_engine_heartbeat_enable(engine); 326 327 if (err == 0) { 328 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 329 u32 expected = 330 intel_gt_ns_to_pm_interval(gt, dt); 331 332 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 333 engine->name, cycles, time, dt, expected, 334 gt->clock_frequency / 1000); 335 336 if (10 * time < 8 * dt || 337 8 * time > 10 * dt) { 338 pr_err("%s: rps clock time does not match walltime!\n", 339 engine->name); 340 err = -EINVAL; 341 } 342 343 if (10 * expected < 8 * cycles || 344 8 * expected > 10 * cycles) { 345 pr_err("%s: walltime does not match rps clock ticks!\n", 346 engine->name); 347 err = -EINVAL; 348 } 349 } 350 351 if (igt_flush_test(gt->i915)) 352 err = -EIO; 353 354 break; /* once is enough */ 355 } 356 357 intel_rps_enable(>->rps); 358 intel_gt_pm_put(gt); 359 360 igt_spinner_fini(&spin); 361 362 intel_gt_pm_wait_for_idle(gt); 363 rps->work.func = saved_work; 364 365 if (err == -ENODEV) /* skipped, don't report a fail */ 366 err = 0; 367 368 return err; 369 } 370 371 int live_rps_control(void *arg) 372 { 373 struct intel_gt *gt = arg; 374 struct intel_rps *rps = >->rps; 375 void (*saved_work)(struct work_struct *wrk); 376 struct intel_engine_cs *engine; 377 enum intel_engine_id id; 378 struct igt_spinner spin; 379 int err = 0; 380 381 /* 382 * Check that the actual frequency matches our requested frequency, 383 * to verify our control mechanism. We have to be careful that the 384 * PCU may throttle the GPU in which case the actual frequency used 385 * will be lowered than requested. 386 */ 387 388 if (!intel_rps_is_enabled(rps)) 389 return 0; 390 391 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 392 return 0; 393 394 if (igt_spinner_init(&spin, gt)) 395 return -ENOMEM; 396 397 intel_gt_pm_wait_for_idle(gt); 398 saved_work = rps->work.func; 399 rps->work.func = dummy_rps_work; 400 401 intel_gt_pm_get(gt); 402 for_each_engine(engine, gt, id) { 403 struct i915_request *rq; 404 ktime_t min_dt, max_dt; 405 int f, limit; 406 int min, max; 407 408 if (!intel_engine_can_store_dword(engine)) 409 continue; 410 411 st_engine_heartbeat_disable(engine); 412 413 rq = igt_spinner_create_request(&spin, 414 engine->kernel_context, 415 MI_NOOP); 416 if (IS_ERR(rq)) { 417 err = PTR_ERR(rq); 418 break; 419 } 420 421 i915_request_add(rq); 422 423 if (!igt_wait_for_spinner(&spin, rq)) { 424 pr_err("%s: RPS spinner did not start\n", 425 engine->name); 426 igt_spinner_end(&spin); 427 st_engine_heartbeat_enable(engine); 428 intel_gt_set_wedged(engine->gt); 429 err = -EIO; 430 break; 431 } 432 433 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 434 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 435 engine->name, rps->min_freq, read_cagf(rps)); 436 igt_spinner_end(&spin); 437 st_engine_heartbeat_enable(engine); 438 show_pstate_limits(rps); 439 err = -EINVAL; 440 break; 441 } 442 443 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 444 if (rps_set_check(rps, f) < f) 445 break; 446 } 447 448 limit = rps_set_check(rps, f); 449 450 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 451 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 452 engine->name, rps->min_freq, read_cagf(rps)); 453 igt_spinner_end(&spin); 454 st_engine_heartbeat_enable(engine); 455 show_pstate_limits(rps); 456 err = -EINVAL; 457 break; 458 } 459 460 max_dt = ktime_get(); 461 max = rps_set_check(rps, limit); 462 max_dt = ktime_sub(ktime_get(), max_dt); 463 464 min_dt = ktime_get(); 465 min = rps_set_check(rps, rps->min_freq); 466 min_dt = ktime_sub(ktime_get(), min_dt); 467 468 igt_spinner_end(&spin); 469 st_engine_heartbeat_enable(engine); 470 471 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 472 engine->name, 473 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 474 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 475 limit, intel_gpu_freq(rps, limit), 476 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 477 478 if (limit == rps->min_freq) { 479 pr_err("%s: GPU throttled to minimum!\n", 480 engine->name); 481 show_pstate_limits(rps); 482 err = -ENODEV; 483 break; 484 } 485 486 if (igt_flush_test(gt->i915)) { 487 err = -EIO; 488 break; 489 } 490 } 491 intel_gt_pm_put(gt); 492 493 igt_spinner_fini(&spin); 494 495 intel_gt_pm_wait_for_idle(gt); 496 rps->work.func = saved_work; 497 498 return err; 499 } 500 501 static void show_pcu_config(struct intel_rps *rps) 502 { 503 struct drm_i915_private *i915 = rps_to_i915(rps); 504 unsigned int max_gpu_freq, min_gpu_freq; 505 intel_wakeref_t wakeref; 506 int gpu_freq; 507 508 if (!HAS_LLC(i915)) 509 return; 510 511 min_gpu_freq = rps->min_freq; 512 max_gpu_freq = rps->max_freq; 513 if (GRAPHICS_VER(i915) >= 9) { 514 /* Convert GT frequency to 50 HZ units */ 515 min_gpu_freq /= GEN9_FREQ_SCALER; 516 max_gpu_freq /= GEN9_FREQ_SCALER; 517 } 518 519 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 520 521 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 522 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 523 int ia_freq = gpu_freq; 524 525 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, 526 &ia_freq, NULL); 527 528 pr_info("%5d %5d %5d\n", 529 gpu_freq * 50, 530 ((ia_freq >> 0) & 0xff) * 100, 531 ((ia_freq >> 8) & 0xff) * 100); 532 } 533 534 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 535 } 536 537 static u64 __measure_frequency(u32 *cntr, int duration_ms) 538 { 539 u64 dc, dt; 540 541 dc = READ_ONCE(*cntr); 542 dt = ktime_get(); 543 usleep_range(1000 * duration_ms, 2000 * duration_ms); 544 dc = READ_ONCE(*cntr) - dc; 545 dt = ktime_get() - dt; 546 547 return div64_u64(1000 * 1000 * dc, dt); 548 } 549 550 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 551 { 552 u64 x[5]; 553 int i; 554 555 *freq = rps_set_check(rps, *freq); 556 for (i = 0; i < 5; i++) 557 x[i] = __measure_frequency(cntr, 2); 558 *freq = (*freq + read_cagf(rps)) / 2; 559 560 /* A simple triangle filter for better result stability */ 561 sort(x, 5, sizeof(*x), cmp_u64, NULL); 562 return div_u64(x[1] + 2 * x[2] + x[3], 4); 563 } 564 565 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 566 int duration_ms) 567 { 568 u64 dc, dt; 569 570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 571 dt = ktime_get(); 572 usleep_range(1000 * duration_ms, 2000 * duration_ms); 573 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 574 dt = ktime_get() - dt; 575 576 return div64_u64(1000 * 1000 * dc, dt); 577 } 578 579 static u64 measure_cs_frequency_at(struct intel_rps *rps, 580 struct intel_engine_cs *engine, 581 int *freq) 582 { 583 u64 x[5]; 584 int i; 585 586 *freq = rps_set_check(rps, *freq); 587 for (i = 0; i < 5; i++) 588 x[i] = __measure_cs_frequency(engine, 2); 589 *freq = (*freq + read_cagf(rps)) / 2; 590 591 /* A simple triangle filter for better result stability */ 592 sort(x, 5, sizeof(*x), cmp_u64, NULL); 593 return div_u64(x[1] + 2 * x[2] + x[3], 4); 594 } 595 596 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 597 { 598 return f_d * x > f_n * y && f_n * x < f_d * y; 599 } 600 601 int live_rps_frequency_cs(void *arg) 602 { 603 void (*saved_work)(struct work_struct *wrk); 604 struct intel_gt *gt = arg; 605 struct intel_rps *rps = >->rps; 606 struct intel_engine_cs *engine; 607 struct pm_qos_request qos; 608 enum intel_engine_id id; 609 int err = 0; 610 611 /* 612 * The premise is that the GPU does change frequency at our behest. 613 * Let's check there is a correspondence between the requested 614 * frequency, the actual frequency, and the observed clock rate. 615 */ 616 617 if (!intel_rps_is_enabled(rps)) 618 return 0; 619 620 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 621 return 0; 622 623 if (CPU_LATENCY >= 0) 624 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 625 626 intel_gt_pm_wait_for_idle(gt); 627 saved_work = rps->work.func; 628 rps->work.func = dummy_rps_work; 629 630 for_each_engine(engine, gt, id) { 631 struct i915_request *rq; 632 struct i915_vma *vma; 633 u32 *cancel, *cntr; 634 struct { 635 u64 count; 636 int freq; 637 } min, max; 638 639 st_engine_heartbeat_disable(engine); 640 641 vma = create_spin_counter(engine, 642 engine->kernel_context->vm, false, 643 &cancel, &cntr); 644 if (IS_ERR(vma)) { 645 err = PTR_ERR(vma); 646 st_engine_heartbeat_enable(engine); 647 break; 648 } 649 650 rq = intel_engine_create_kernel_request(engine); 651 if (IS_ERR(rq)) { 652 err = PTR_ERR(rq); 653 goto err_vma; 654 } 655 656 err = i915_vma_move_to_active(vma, rq, 0); 657 if (!err) 658 err = rq->engine->emit_bb_start(rq, 659 i915_vma_offset(vma), 660 PAGE_SIZE, 0); 661 i915_request_add(rq); 662 if (err) 663 goto err_vma; 664 665 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 666 10)) { 667 pr_err("%s: timed loop did not start\n", 668 engine->name); 669 goto err_vma; 670 } 671 672 min.freq = rps->min_freq; 673 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 674 675 max.freq = rps->max_freq; 676 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 677 678 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 679 engine->name, 680 min.count, intel_gpu_freq(rps, min.freq), 681 max.count, intel_gpu_freq(rps, max.freq), 682 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 683 max.freq * min.count)); 684 685 if (!scaled_within(max.freq * min.count, 686 min.freq * max.count, 687 2, 3)) { 688 int f; 689 690 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 691 engine->name, 692 max.freq * min.count, 693 min.freq * max.count); 694 show_pcu_config(rps); 695 696 for (f = min.freq + 1; f <= rps->max_freq; f++) { 697 int act = f; 698 u64 count; 699 700 count = measure_cs_frequency_at(rps, engine, &act); 701 if (act < f) 702 break; 703 704 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 705 engine->name, 706 act, intel_gpu_freq(rps, act), count, 707 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 708 act * min.count)); 709 710 f = act; /* may skip ahead [pcu granularity] */ 711 } 712 713 err = -EINTR; /* ignore error, continue on with test */ 714 } 715 716 err_vma: 717 *cancel = MI_BATCH_BUFFER_END; 718 i915_gem_object_flush_map(vma->obj); 719 i915_gem_object_unpin_map(vma->obj); 720 i915_vma_unpin(vma); 721 i915_vma_unlock(vma); 722 i915_vma_put(vma); 723 724 st_engine_heartbeat_enable(engine); 725 if (igt_flush_test(gt->i915)) 726 err = -EIO; 727 if (err) 728 break; 729 } 730 731 intel_gt_pm_wait_for_idle(gt); 732 rps->work.func = saved_work; 733 734 if (CPU_LATENCY >= 0) 735 cpu_latency_qos_remove_request(&qos); 736 737 return err; 738 } 739 740 int live_rps_frequency_srm(void *arg) 741 { 742 void (*saved_work)(struct work_struct *wrk); 743 struct intel_gt *gt = arg; 744 struct intel_rps *rps = >->rps; 745 struct intel_engine_cs *engine; 746 struct pm_qos_request qos; 747 enum intel_engine_id id; 748 int err = 0; 749 750 /* 751 * The premise is that the GPU does change frequency at our behest. 752 * Let's check there is a correspondence between the requested 753 * frequency, the actual frequency, and the observed clock rate. 754 */ 755 756 if (!intel_rps_is_enabled(rps)) 757 return 0; 758 759 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 760 return 0; 761 762 if (CPU_LATENCY >= 0) 763 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 764 765 intel_gt_pm_wait_for_idle(gt); 766 saved_work = rps->work.func; 767 rps->work.func = dummy_rps_work; 768 769 for_each_engine(engine, gt, id) { 770 struct i915_request *rq; 771 struct i915_vma *vma; 772 u32 *cancel, *cntr; 773 struct { 774 u64 count; 775 int freq; 776 } min, max; 777 778 st_engine_heartbeat_disable(engine); 779 780 vma = create_spin_counter(engine, 781 engine->kernel_context->vm, true, 782 &cancel, &cntr); 783 if (IS_ERR(vma)) { 784 err = PTR_ERR(vma); 785 st_engine_heartbeat_enable(engine); 786 break; 787 } 788 789 rq = intel_engine_create_kernel_request(engine); 790 if (IS_ERR(rq)) { 791 err = PTR_ERR(rq); 792 goto err_vma; 793 } 794 795 err = i915_vma_move_to_active(vma, rq, 0); 796 if (!err) 797 err = rq->engine->emit_bb_start(rq, 798 i915_vma_offset(vma), 799 PAGE_SIZE, 0); 800 i915_request_add(rq); 801 if (err) 802 goto err_vma; 803 804 if (wait_for(READ_ONCE(*cntr), 10)) { 805 pr_err("%s: timed loop did not start\n", 806 engine->name); 807 goto err_vma; 808 } 809 810 min.freq = rps->min_freq; 811 min.count = measure_frequency_at(rps, cntr, &min.freq); 812 813 max.freq = rps->max_freq; 814 max.count = measure_frequency_at(rps, cntr, &max.freq); 815 816 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 817 engine->name, 818 min.count, intel_gpu_freq(rps, min.freq), 819 max.count, intel_gpu_freq(rps, max.freq), 820 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 821 max.freq * min.count)); 822 823 if (!scaled_within(max.freq * min.count, 824 min.freq * max.count, 825 1, 2)) { 826 int f; 827 828 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 829 engine->name, 830 max.freq * min.count, 831 min.freq * max.count); 832 show_pcu_config(rps); 833 834 for (f = min.freq + 1; f <= rps->max_freq; f++) { 835 int act = f; 836 u64 count; 837 838 count = measure_frequency_at(rps, cntr, &act); 839 if (act < f) 840 break; 841 842 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 843 engine->name, 844 act, intel_gpu_freq(rps, act), count, 845 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 846 act * min.count)); 847 848 f = act; /* may skip ahead [pcu granularity] */ 849 } 850 851 err = -EINTR; /* ignore error, continue on with test */ 852 } 853 854 err_vma: 855 *cancel = MI_BATCH_BUFFER_END; 856 i915_gem_object_flush_map(vma->obj); 857 i915_gem_object_unpin_map(vma->obj); 858 i915_vma_unpin(vma); 859 i915_vma_unlock(vma); 860 i915_vma_put(vma); 861 862 st_engine_heartbeat_enable(engine); 863 if (igt_flush_test(gt->i915)) 864 err = -EIO; 865 if (err) 866 break; 867 } 868 869 intel_gt_pm_wait_for_idle(gt); 870 rps->work.func = saved_work; 871 872 if (CPU_LATENCY >= 0) 873 cpu_latency_qos_remove_request(&qos); 874 875 return err; 876 } 877 878 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 879 { 880 /* Flush any previous EI */ 881 usleep_range(timeout_us, 2 * timeout_us); 882 883 /* Reset the interrupt status */ 884 rps_disable_interrupts(rps); 885 GEM_BUG_ON(rps->pm_iir); 886 rps_enable_interrupts(rps); 887 888 /* And then wait for the timeout, for real this time */ 889 usleep_range(2 * timeout_us, 3 * timeout_us); 890 } 891 892 static int __rps_up_interrupt(struct intel_rps *rps, 893 struct intel_engine_cs *engine, 894 struct igt_spinner *spin) 895 { 896 struct intel_uncore *uncore = engine->uncore; 897 struct i915_request *rq; 898 u32 timeout; 899 900 if (!intel_engine_can_store_dword(engine)) 901 return 0; 902 903 rps_set_check(rps, rps->min_freq); 904 905 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 906 if (IS_ERR(rq)) 907 return PTR_ERR(rq); 908 909 i915_request_get(rq); 910 i915_request_add(rq); 911 912 if (!igt_wait_for_spinner(spin, rq)) { 913 pr_err("%s: RPS spinner did not start\n", 914 engine->name); 915 i915_request_put(rq); 916 intel_gt_set_wedged(engine->gt); 917 return -EIO; 918 } 919 920 if (!intel_rps_is_active(rps)) { 921 pr_err("%s: RPS not enabled on starting spinner\n", 922 engine->name); 923 igt_spinner_end(spin); 924 i915_request_put(rq); 925 return -EINVAL; 926 } 927 928 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 929 pr_err("%s: RPS did not register UP interrupt\n", 930 engine->name); 931 i915_request_put(rq); 932 return -EINVAL; 933 } 934 935 if (rps->last_freq != rps->min_freq) { 936 pr_err("%s: RPS did not program min frequency\n", 937 engine->name); 938 i915_request_put(rq); 939 return -EINVAL; 940 } 941 942 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 943 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 944 timeout = DIV_ROUND_UP(timeout, 1000); 945 946 sleep_for_ei(rps, timeout); 947 GEM_BUG_ON(i915_request_completed(rq)); 948 949 igt_spinner_end(spin); 950 i915_request_put(rq); 951 952 if (rps->cur_freq != rps->min_freq) { 953 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 954 engine->name, intel_rps_read_actual_frequency(rps)); 955 return -EINVAL; 956 } 957 958 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 959 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 960 engine->name, rps->pm_iir, 961 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 962 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 963 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 964 return -EINVAL; 965 } 966 967 return 0; 968 } 969 970 static int __rps_down_interrupt(struct intel_rps *rps, 971 struct intel_engine_cs *engine) 972 { 973 struct intel_uncore *uncore = engine->uncore; 974 u32 timeout; 975 976 rps_set_check(rps, rps->max_freq); 977 978 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 979 pr_err("%s: RPS did not register DOWN interrupt\n", 980 engine->name); 981 return -EINVAL; 982 } 983 984 if (rps->last_freq != rps->max_freq) { 985 pr_err("%s: RPS did not program max frequency\n", 986 engine->name); 987 return -EINVAL; 988 } 989 990 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 991 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 992 timeout = DIV_ROUND_UP(timeout, 1000); 993 994 sleep_for_ei(rps, timeout); 995 996 if (rps->cur_freq != rps->max_freq) { 997 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 998 engine->name, 999 intel_rps_read_actual_frequency(rps)); 1000 return -EINVAL; 1001 } 1002 1003 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1004 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1005 engine->name, rps->pm_iir, 1006 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1007 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1008 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1009 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1010 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1011 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1012 return -EINVAL; 1013 } 1014 1015 return 0; 1016 } 1017 1018 int live_rps_interrupt(void *arg) 1019 { 1020 struct intel_gt *gt = arg; 1021 struct intel_rps *rps = >->rps; 1022 void (*saved_work)(struct work_struct *wrk); 1023 struct intel_engine_cs *engine; 1024 enum intel_engine_id id; 1025 struct igt_spinner spin; 1026 u32 pm_events; 1027 int err = 0; 1028 1029 /* 1030 * First, let's check whether or not we are receiving interrupts. 1031 */ 1032 1033 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1034 return 0; 1035 1036 intel_gt_pm_get(gt); 1037 pm_events = rps->pm_events; 1038 intel_gt_pm_put(gt); 1039 if (!pm_events) { 1040 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1041 return -ENODEV; 1042 } 1043 1044 if (igt_spinner_init(&spin, gt)) 1045 return -ENOMEM; 1046 1047 intel_gt_pm_wait_for_idle(gt); 1048 saved_work = rps->work.func; 1049 rps->work.func = dummy_rps_work; 1050 1051 for_each_engine(engine, gt, id) { 1052 /* Keep the engine busy with a spinner; expect an UP! */ 1053 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1054 intel_gt_pm_wait_for_idle(engine->gt); 1055 GEM_BUG_ON(intel_rps_is_active(rps)); 1056 1057 st_engine_heartbeat_disable(engine); 1058 1059 err = __rps_up_interrupt(rps, engine, &spin); 1060 1061 st_engine_heartbeat_enable(engine); 1062 if (err) 1063 goto out; 1064 1065 intel_gt_pm_wait_for_idle(engine->gt); 1066 } 1067 1068 /* Keep the engine awake but idle and check for DOWN */ 1069 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1070 st_engine_heartbeat_disable(engine); 1071 intel_rc6_disable(>->rc6); 1072 1073 err = __rps_down_interrupt(rps, engine); 1074 1075 intel_rc6_enable(>->rc6); 1076 st_engine_heartbeat_enable(engine); 1077 if (err) 1078 goto out; 1079 } 1080 } 1081 1082 out: 1083 if (igt_flush_test(gt->i915)) 1084 err = -EIO; 1085 1086 igt_spinner_fini(&spin); 1087 1088 intel_gt_pm_wait_for_idle(gt); 1089 rps->work.func = saved_work; 1090 1091 return err; 1092 } 1093 1094 static u64 __measure_power(int duration_ms) 1095 { 1096 u64 dE, dt; 1097 1098 dE = librapl_energy_uJ(); 1099 dt = ktime_get(); 1100 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1101 dE = librapl_energy_uJ() - dE; 1102 dt = ktime_get() - dt; 1103 1104 return div64_u64(1000 * 1000 * dE, dt); 1105 } 1106 1107 static u64 measure_power(struct intel_rps *rps, int *freq) 1108 { 1109 u64 x[5]; 1110 int i; 1111 1112 for (i = 0; i < 5; i++) 1113 x[i] = __measure_power(5); 1114 1115 *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; 1116 1117 /* A simple triangle filter for better result stability */ 1118 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1119 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1120 } 1121 1122 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1123 { 1124 *freq = rps_set_check(rps, *freq); 1125 return measure_power(rps, freq); 1126 } 1127 1128 int live_rps_power(void *arg) 1129 { 1130 struct intel_gt *gt = arg; 1131 struct intel_rps *rps = >->rps; 1132 void (*saved_work)(struct work_struct *wrk); 1133 struct intel_engine_cs *engine; 1134 enum intel_engine_id id; 1135 struct igt_spinner spin; 1136 int err = 0; 1137 1138 /* 1139 * Our fundamental assumption is that running at lower frequency 1140 * actually saves power. Let's see if our RAPL measurement support 1141 * that theory. 1142 */ 1143 1144 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1145 return 0; 1146 1147 if (!librapl_supported(gt->i915)) 1148 return 0; 1149 1150 if (igt_spinner_init(&spin, gt)) 1151 return -ENOMEM; 1152 1153 intel_gt_pm_wait_for_idle(gt); 1154 saved_work = rps->work.func; 1155 rps->work.func = dummy_rps_work; 1156 1157 for_each_engine(engine, gt, id) { 1158 struct i915_request *rq; 1159 struct { 1160 u64 power; 1161 int freq; 1162 } min, max; 1163 1164 if (!intel_engine_can_store_dword(engine)) 1165 continue; 1166 1167 st_engine_heartbeat_disable(engine); 1168 1169 rq = igt_spinner_create_request(&spin, 1170 engine->kernel_context, 1171 MI_NOOP); 1172 if (IS_ERR(rq)) { 1173 st_engine_heartbeat_enable(engine); 1174 err = PTR_ERR(rq); 1175 break; 1176 } 1177 1178 i915_request_add(rq); 1179 1180 if (!igt_wait_for_spinner(&spin, rq)) { 1181 pr_err("%s: RPS spinner did not start\n", 1182 engine->name); 1183 igt_spinner_end(&spin); 1184 st_engine_heartbeat_enable(engine); 1185 intel_gt_set_wedged(engine->gt); 1186 err = -EIO; 1187 break; 1188 } 1189 1190 max.freq = rps->max_freq; 1191 max.power = measure_power_at(rps, &max.freq); 1192 1193 min.freq = rps->min_freq; 1194 min.power = measure_power_at(rps, &min.freq); 1195 1196 igt_spinner_end(&spin); 1197 st_engine_heartbeat_enable(engine); 1198 1199 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1200 engine->name, 1201 min.power, intel_gpu_freq(rps, min.freq), 1202 max.power, intel_gpu_freq(rps, max.freq)); 1203 1204 if (10 * min.freq >= 9 * max.freq) { 1205 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1206 min.freq, intel_gpu_freq(rps, min.freq), 1207 max.freq, intel_gpu_freq(rps, max.freq)); 1208 continue; 1209 } 1210 1211 if (11 * min.power > 10 * max.power) { 1212 pr_err("%s: did not conserve power when setting lower frequency!\n", 1213 engine->name); 1214 err = -EINVAL; 1215 break; 1216 } 1217 1218 if (igt_flush_test(gt->i915)) { 1219 err = -EIO; 1220 break; 1221 } 1222 } 1223 1224 igt_spinner_fini(&spin); 1225 1226 intel_gt_pm_wait_for_idle(gt); 1227 rps->work.func = saved_work; 1228 1229 return err; 1230 } 1231 1232 int live_rps_dynamic(void *arg) 1233 { 1234 struct intel_gt *gt = arg; 1235 struct intel_rps *rps = >->rps; 1236 struct intel_engine_cs *engine; 1237 enum intel_engine_id id; 1238 struct igt_spinner spin; 1239 int err = 0; 1240 1241 /* 1242 * We've looked at the bascs, and have established that we 1243 * can change the clock frequency and that the HW will generate 1244 * interrupts based on load. Now we check how we integrate those 1245 * moving parts into dynamic reclocking based on load. 1246 */ 1247 1248 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1249 return 0; 1250 1251 if (igt_spinner_init(&spin, gt)) 1252 return -ENOMEM; 1253 1254 if (intel_rps_has_interrupts(rps)) 1255 pr_info("RPS has interrupt support\n"); 1256 if (intel_rps_uses_timer(rps)) 1257 pr_info("RPS has timer support\n"); 1258 1259 for_each_engine(engine, gt, id) { 1260 struct i915_request *rq; 1261 struct { 1262 ktime_t dt; 1263 u8 freq; 1264 } min, max; 1265 1266 if (!intel_engine_can_store_dword(engine)) 1267 continue; 1268 1269 intel_gt_pm_wait_for_idle(gt); 1270 GEM_BUG_ON(intel_rps_is_active(rps)); 1271 rps->cur_freq = rps->min_freq; 1272 1273 intel_engine_pm_get(engine); 1274 intel_rc6_disable(>->rc6); 1275 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1276 1277 rq = igt_spinner_create_request(&spin, 1278 engine->kernel_context, 1279 MI_NOOP); 1280 if (IS_ERR(rq)) { 1281 err = PTR_ERR(rq); 1282 goto err; 1283 } 1284 1285 i915_request_add(rq); 1286 1287 max.dt = ktime_get(); 1288 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1289 max.dt = ktime_sub(ktime_get(), max.dt); 1290 1291 igt_spinner_end(&spin); 1292 1293 min.dt = ktime_get(); 1294 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1295 min.dt = ktime_sub(ktime_get(), min.dt); 1296 1297 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1298 engine->name, 1299 max.freq, intel_gpu_freq(rps, max.freq), 1300 ktime_to_ns(max.dt), 1301 min.freq, intel_gpu_freq(rps, min.freq), 1302 ktime_to_ns(min.dt)); 1303 if (min.freq >= max.freq) { 1304 pr_err("%s: dynamic reclocking of spinner failed\n!", 1305 engine->name); 1306 err = -EINVAL; 1307 } 1308 1309 err: 1310 intel_rc6_enable(>->rc6); 1311 intel_engine_pm_put(engine); 1312 1313 if (igt_flush_test(gt->i915)) 1314 err = -EIO; 1315 if (err) 1316 break; 1317 } 1318 1319 igt_spinner_fini(&spin); 1320 1321 return err; 1322 } 1323