1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_engine_regs.h" 14 #include "intel_gpu_commands.h" 15 #include "intel_gt_clock_utils.h" 16 #include "intel_gt_pm.h" 17 #include "intel_rc6.h" 18 #include "selftest_engine_heartbeat.h" 19 #include "selftest_rps.h" 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/igt_spinner.h" 22 #include "selftests/librapl.h" 23 24 /* Try to isolate the impact of cstates from determing frequency response */ 25 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 26 27 static void dummy_rps_work(struct work_struct *wrk) 28 { 29 } 30 31 static int cmp_u64(const void *A, const void *B) 32 { 33 const u64 *a = A, *b = B; 34 35 if (*a < *b) 36 return -1; 37 else if (*a > *b) 38 return 1; 39 else 40 return 0; 41 } 42 43 static int cmp_u32(const void *A, const void *B) 44 { 45 const u32 *a = A, *b = B; 46 47 if (*a < *b) 48 return -1; 49 else if (*a > *b) 50 return 1; 51 else 52 return 0; 53 } 54 55 static struct i915_vma * 56 create_spin_counter(struct intel_engine_cs *engine, 57 struct i915_address_space *vm, 58 bool srm, 59 u32 **cancel, 60 u32 **counter) 61 { 62 enum { 63 COUNT, 64 INC, 65 __NGPR__, 66 }; 67 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 68 struct drm_i915_gem_object *obj; 69 struct i915_vma *vma; 70 unsigned long end; 71 u32 *base, *cs; 72 int loop, i; 73 int err; 74 75 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 76 if (IS_ERR(obj)) 77 return ERR_CAST(obj); 78 79 end = obj->base.size / sizeof(u32) - 1; 80 81 vma = i915_vma_instance(obj, vm, NULL); 82 if (IS_ERR(vma)) { 83 err = PTR_ERR(vma); 84 goto err_put; 85 } 86 87 err = i915_vma_pin(vma, 0, 0, PIN_USER); 88 if (err) 89 goto err_unlock; 90 91 i915_vma_lock(vma); 92 93 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 94 if (IS_ERR(base)) { 95 err = PTR_ERR(base); 96 goto err_unpin; 97 } 98 cs = base; 99 100 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 101 for (i = 0; i < __NGPR__; i++) { 102 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 103 *cs++ = 0; 104 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 105 *cs++ = 0; 106 } 107 108 *cs++ = MI_LOAD_REGISTER_IMM(1); 109 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 110 *cs++ = 1; 111 112 loop = cs - base; 113 114 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 115 for (i = 0; i < 1024; i++) { 116 *cs++ = MI_MATH(4); 117 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 119 *cs++ = MI_MATH_ADD; 120 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 121 122 if (srm) { 123 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 124 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 125 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); 126 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); 127 } 128 } 129 130 *cs++ = MI_BATCH_BUFFER_START_GEN8; 131 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); 132 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); 133 GEM_BUG_ON(cs - base > end); 134 135 i915_gem_object_flush_map(obj); 136 137 *cancel = base + loop; 138 *counter = srm ? memset32(base + end, 0, 1) : NULL; 139 return vma; 140 141 err_unpin: 142 i915_vma_unpin(vma); 143 err_unlock: 144 i915_vma_unlock(vma); 145 err_put: 146 i915_gem_object_put(obj); 147 return ERR_PTR(err); 148 } 149 150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 151 { 152 u8 history[64], i; 153 unsigned long end; 154 int sleep; 155 156 i = 0; 157 memset(history, freq, sizeof(history)); 158 sleep = 20; 159 160 /* The PCU does not change instantly, but drifts towards the goal? */ 161 end = jiffies + msecs_to_jiffies(timeout_ms); 162 do { 163 u8 act; 164 165 act = read_cagf(rps); 166 if (time_after(jiffies, end)) 167 return act; 168 169 /* Target acquired */ 170 if (act == freq) 171 return act; 172 173 /* Any change within the last N samples? */ 174 if (!memchr_inv(history, act, sizeof(history))) 175 return act; 176 177 history[i] = act; 178 i = (i + 1) % ARRAY_SIZE(history); 179 180 usleep_range(sleep, 2 * sleep); 181 sleep *= 2; 182 if (sleep > timeout_ms * 20) 183 sleep = timeout_ms * 20; 184 } while (1); 185 } 186 187 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 188 { 189 mutex_lock(&rps->lock); 190 GEM_BUG_ON(!intel_rps_is_active(rps)); 191 if (wait_for(!intel_rps_set(rps, freq), 50)) { 192 mutex_unlock(&rps->lock); 193 return 0; 194 } 195 GEM_BUG_ON(rps->last_freq != freq); 196 mutex_unlock(&rps->lock); 197 198 return wait_for_freq(rps, freq, 50); 199 } 200 201 static void show_pstate_limits(struct intel_rps *rps) 202 { 203 struct drm_i915_private *i915 = rps_to_i915(rps); 204 205 if (IS_BROXTON(i915)) { 206 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 207 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 208 intel_uncore_read(rps_to_uncore(rps), 209 BXT_RP_STATE_CAP)); 210 } else if (GRAPHICS_VER(i915) == 9) { 211 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 212 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 213 intel_uncore_read(rps_to_uncore(rps), 214 GEN9_RP_STATE_LIMITS)); 215 } 216 } 217 218 int live_rps_clock_interval(void *arg) 219 { 220 struct intel_gt *gt = arg; 221 struct intel_rps *rps = >->rps; 222 void (*saved_work)(struct work_struct *wrk); 223 struct intel_engine_cs *engine; 224 enum intel_engine_id id; 225 struct igt_spinner spin; 226 int err = 0; 227 228 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 229 return 0; 230 231 if (igt_spinner_init(&spin, gt)) 232 return -ENOMEM; 233 234 intel_gt_pm_wait_for_idle(gt); 235 saved_work = rps->work.func; 236 rps->work.func = dummy_rps_work; 237 238 intel_gt_pm_get(gt); 239 intel_rps_disable(>->rps); 240 241 intel_gt_check_clock_frequency(gt); 242 243 for_each_engine(engine, gt, id) { 244 struct i915_request *rq; 245 u32 cycles; 246 u64 dt; 247 248 if (!intel_engine_can_store_dword(engine)) 249 continue; 250 251 st_engine_heartbeat_disable(engine); 252 253 rq = igt_spinner_create_request(&spin, 254 engine->kernel_context, 255 MI_NOOP); 256 if (IS_ERR(rq)) { 257 st_engine_heartbeat_enable(engine); 258 err = PTR_ERR(rq); 259 break; 260 } 261 262 i915_request_add(rq); 263 264 if (!igt_wait_for_spinner(&spin, rq)) { 265 pr_err("%s: RPS spinner did not start\n", 266 engine->name); 267 igt_spinner_end(&spin); 268 st_engine_heartbeat_enable(engine); 269 intel_gt_set_wedged(engine->gt); 270 err = -EIO; 271 break; 272 } 273 274 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 275 276 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 277 278 /* Set the evaluation interval to infinity! */ 279 intel_uncore_write_fw(gt->uncore, 280 GEN6_RP_UP_EI, 0xffffffff); 281 intel_uncore_write_fw(gt->uncore, 282 GEN6_RP_UP_THRESHOLD, 0xffffffff); 283 284 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 285 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 286 287 if (wait_for(intel_uncore_read_fw(gt->uncore, 288 GEN6_RP_CUR_UP_EI), 289 10)) { 290 /* Just skip the test; assume lack of HW support */ 291 pr_notice("%s: rps evaluation interval not ticking\n", 292 engine->name); 293 err = -ENODEV; 294 } else { 295 ktime_t dt_[5]; 296 u32 cycles_[5]; 297 int i; 298 299 for (i = 0; i < 5; i++) { 300 preempt_disable(); 301 302 dt_[i] = ktime_get(); 303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 304 305 udelay(1000); 306 307 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 309 310 preempt_enable(); 311 } 312 313 /* Use the median of both cycle/dt; close enough */ 314 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 315 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 316 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 317 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 318 } 319 320 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 321 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 322 323 igt_spinner_end(&spin); 324 st_engine_heartbeat_enable(engine); 325 326 if (err == 0) { 327 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 328 u32 expected = 329 intel_gt_ns_to_pm_interval(gt, dt); 330 331 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 332 engine->name, cycles, time, dt, expected, 333 gt->clock_frequency / 1000); 334 335 if (10 * time < 8 * dt || 336 8 * time > 10 * dt) { 337 pr_err("%s: rps clock time does not match walltime!\n", 338 engine->name); 339 err = -EINVAL; 340 } 341 342 if (10 * expected < 8 * cycles || 343 8 * expected > 10 * cycles) { 344 pr_err("%s: walltime does not match rps clock ticks!\n", 345 engine->name); 346 err = -EINVAL; 347 } 348 } 349 350 if (igt_flush_test(gt->i915)) 351 err = -EIO; 352 353 break; /* once is enough */ 354 } 355 356 intel_rps_enable(>->rps); 357 intel_gt_pm_put(gt); 358 359 igt_spinner_fini(&spin); 360 361 intel_gt_pm_wait_for_idle(gt); 362 rps->work.func = saved_work; 363 364 if (err == -ENODEV) /* skipped, don't report a fail */ 365 err = 0; 366 367 return err; 368 } 369 370 int live_rps_control(void *arg) 371 { 372 struct intel_gt *gt = arg; 373 struct intel_rps *rps = >->rps; 374 void (*saved_work)(struct work_struct *wrk); 375 struct intel_engine_cs *engine; 376 enum intel_engine_id id; 377 struct igt_spinner spin; 378 int err = 0; 379 380 /* 381 * Check that the actual frequency matches our requested frequency, 382 * to verify our control mechanism. We have to be careful that the 383 * PCU may throttle the GPU in which case the actual frequency used 384 * will be lowered than requested. 385 */ 386 387 if (!intel_rps_is_enabled(rps)) 388 return 0; 389 390 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 391 return 0; 392 393 if (igt_spinner_init(&spin, gt)) 394 return -ENOMEM; 395 396 intel_gt_pm_wait_for_idle(gt); 397 saved_work = rps->work.func; 398 rps->work.func = dummy_rps_work; 399 400 intel_gt_pm_get(gt); 401 for_each_engine(engine, gt, id) { 402 struct i915_request *rq; 403 ktime_t min_dt, max_dt; 404 int f, limit; 405 int min, max; 406 407 if (!intel_engine_can_store_dword(engine)) 408 continue; 409 410 st_engine_heartbeat_disable(engine); 411 412 rq = igt_spinner_create_request(&spin, 413 engine->kernel_context, 414 MI_NOOP); 415 if (IS_ERR(rq)) { 416 err = PTR_ERR(rq); 417 break; 418 } 419 420 i915_request_add(rq); 421 422 if (!igt_wait_for_spinner(&spin, rq)) { 423 pr_err("%s: RPS spinner did not start\n", 424 engine->name); 425 igt_spinner_end(&spin); 426 st_engine_heartbeat_enable(engine); 427 intel_gt_set_wedged(engine->gt); 428 err = -EIO; 429 break; 430 } 431 432 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 433 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 434 engine->name, rps->min_freq, read_cagf(rps)); 435 igt_spinner_end(&spin); 436 st_engine_heartbeat_enable(engine); 437 show_pstate_limits(rps); 438 err = -EINVAL; 439 break; 440 } 441 442 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 443 if (rps_set_check(rps, f) < f) 444 break; 445 } 446 447 limit = rps_set_check(rps, f); 448 449 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 450 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 451 engine->name, rps->min_freq, read_cagf(rps)); 452 igt_spinner_end(&spin); 453 st_engine_heartbeat_enable(engine); 454 show_pstate_limits(rps); 455 err = -EINVAL; 456 break; 457 } 458 459 max_dt = ktime_get(); 460 max = rps_set_check(rps, limit); 461 max_dt = ktime_sub(ktime_get(), max_dt); 462 463 min_dt = ktime_get(); 464 min = rps_set_check(rps, rps->min_freq); 465 min_dt = ktime_sub(ktime_get(), min_dt); 466 467 igt_spinner_end(&spin); 468 st_engine_heartbeat_enable(engine); 469 470 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 471 engine->name, 472 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 473 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 474 limit, intel_gpu_freq(rps, limit), 475 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 476 477 if (limit == rps->min_freq) { 478 pr_err("%s: GPU throttled to minimum!\n", 479 engine->name); 480 show_pstate_limits(rps); 481 err = -ENODEV; 482 break; 483 } 484 485 if (igt_flush_test(gt->i915)) { 486 err = -EIO; 487 break; 488 } 489 } 490 intel_gt_pm_put(gt); 491 492 igt_spinner_fini(&spin); 493 494 intel_gt_pm_wait_for_idle(gt); 495 rps->work.func = saved_work; 496 497 return err; 498 } 499 500 static void show_pcu_config(struct intel_rps *rps) 501 { 502 struct drm_i915_private *i915 = rps_to_i915(rps); 503 unsigned int max_gpu_freq, min_gpu_freq; 504 intel_wakeref_t wakeref; 505 int gpu_freq; 506 507 if (!HAS_LLC(i915)) 508 return; 509 510 min_gpu_freq = rps->min_freq; 511 max_gpu_freq = rps->max_freq; 512 if (GRAPHICS_VER(i915) >= 9) { 513 /* Convert GT frequency to 50 HZ units */ 514 min_gpu_freq /= GEN9_FREQ_SCALER; 515 max_gpu_freq /= GEN9_FREQ_SCALER; 516 } 517 518 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 519 520 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 521 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 522 int ia_freq = gpu_freq; 523 524 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, 525 &ia_freq, NULL); 526 527 pr_info("%5d %5d %5d\n", 528 gpu_freq * 50, 529 ((ia_freq >> 0) & 0xff) * 100, 530 ((ia_freq >> 8) & 0xff) * 100); 531 } 532 533 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 534 } 535 536 static u64 __measure_frequency(u32 *cntr, int duration_ms) 537 { 538 u64 dc, dt; 539 540 dt = ktime_get(); 541 dc = READ_ONCE(*cntr); 542 usleep_range(1000 * duration_ms, 2000 * duration_ms); 543 dc = READ_ONCE(*cntr) - dc; 544 dt = ktime_get() - dt; 545 546 return div64_u64(1000 * 1000 * dc, dt); 547 } 548 549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 550 { 551 u64 x[5]; 552 int i; 553 554 *freq = rps_set_check(rps, *freq); 555 for (i = 0; i < 5; i++) 556 x[i] = __measure_frequency(cntr, 2); 557 *freq = (*freq + read_cagf(rps)) / 2; 558 559 /* A simple triangle filter for better result stability */ 560 sort(x, 5, sizeof(*x), cmp_u64, NULL); 561 return div_u64(x[1] + 2 * x[2] + x[3], 4); 562 } 563 564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 565 int duration_ms) 566 { 567 u64 dc, dt; 568 569 dt = ktime_get(); 570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 571 usleep_range(1000 * duration_ms, 2000 * duration_ms); 572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 573 dt = ktime_get() - dt; 574 575 return div64_u64(1000 * 1000 * dc, dt); 576 } 577 578 static u64 measure_cs_frequency_at(struct intel_rps *rps, 579 struct intel_engine_cs *engine, 580 int *freq) 581 { 582 u64 x[5]; 583 int i; 584 585 *freq = rps_set_check(rps, *freq); 586 for (i = 0; i < 5; i++) 587 x[i] = __measure_cs_frequency(engine, 2); 588 *freq = (*freq + read_cagf(rps)) / 2; 589 590 /* A simple triangle filter for better result stability */ 591 sort(x, 5, sizeof(*x), cmp_u64, NULL); 592 return div_u64(x[1] + 2 * x[2] + x[3], 4); 593 } 594 595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 596 { 597 return f_d * x > f_n * y && f_n * x < f_d * y; 598 } 599 600 int live_rps_frequency_cs(void *arg) 601 { 602 void (*saved_work)(struct work_struct *wrk); 603 struct intel_gt *gt = arg; 604 struct intel_rps *rps = >->rps; 605 struct intel_engine_cs *engine; 606 struct pm_qos_request qos; 607 enum intel_engine_id id; 608 int err = 0; 609 610 /* 611 * The premise is that the GPU does change frequency at our behest. 612 * Let's check there is a correspondence between the requested 613 * frequency, the actual frequency, and the observed clock rate. 614 */ 615 616 if (!intel_rps_is_enabled(rps)) 617 return 0; 618 619 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 620 return 0; 621 622 if (CPU_LATENCY >= 0) 623 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 624 625 intel_gt_pm_wait_for_idle(gt); 626 saved_work = rps->work.func; 627 rps->work.func = dummy_rps_work; 628 629 for_each_engine(engine, gt, id) { 630 struct i915_request *rq; 631 struct i915_vma *vma; 632 u32 *cancel, *cntr; 633 struct { 634 u64 count; 635 int freq; 636 } min, max; 637 638 st_engine_heartbeat_disable(engine); 639 640 vma = create_spin_counter(engine, 641 engine->kernel_context->vm, false, 642 &cancel, &cntr); 643 if (IS_ERR(vma)) { 644 err = PTR_ERR(vma); 645 st_engine_heartbeat_enable(engine); 646 break; 647 } 648 649 rq = intel_engine_create_kernel_request(engine); 650 if (IS_ERR(rq)) { 651 err = PTR_ERR(rq); 652 goto err_vma; 653 } 654 655 err = i915_request_await_object(rq, vma->obj, false); 656 if (!err) 657 err = i915_vma_move_to_active(vma, rq, 0); 658 if (!err) 659 err = rq->engine->emit_bb_start(rq, 660 vma->node.start, 661 PAGE_SIZE, 0); 662 i915_request_add(rq); 663 if (err) 664 goto err_vma; 665 666 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 667 10)) { 668 pr_err("%s: timed loop did not start\n", 669 engine->name); 670 goto err_vma; 671 } 672 673 min.freq = rps->min_freq; 674 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 675 676 max.freq = rps->max_freq; 677 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 678 679 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 680 engine->name, 681 min.count, intel_gpu_freq(rps, min.freq), 682 max.count, intel_gpu_freq(rps, max.freq), 683 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 684 max.freq * min.count)); 685 686 if (!scaled_within(max.freq * min.count, 687 min.freq * max.count, 688 2, 3)) { 689 int f; 690 691 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 692 engine->name, 693 max.freq * min.count, 694 min.freq * max.count); 695 show_pcu_config(rps); 696 697 for (f = min.freq + 1; f <= rps->max_freq; f++) { 698 int act = f; 699 u64 count; 700 701 count = measure_cs_frequency_at(rps, engine, &act); 702 if (act < f) 703 break; 704 705 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 706 engine->name, 707 act, intel_gpu_freq(rps, act), count, 708 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 709 act * min.count)); 710 711 f = act; /* may skip ahead [pcu granularity] */ 712 } 713 714 err = -EINTR; /* ignore error, continue on with test */ 715 } 716 717 err_vma: 718 *cancel = MI_BATCH_BUFFER_END; 719 i915_gem_object_flush_map(vma->obj); 720 i915_gem_object_unpin_map(vma->obj); 721 i915_vma_unpin(vma); 722 i915_vma_unlock(vma); 723 i915_vma_put(vma); 724 725 st_engine_heartbeat_enable(engine); 726 if (igt_flush_test(gt->i915)) 727 err = -EIO; 728 if (err) 729 break; 730 } 731 732 intel_gt_pm_wait_for_idle(gt); 733 rps->work.func = saved_work; 734 735 if (CPU_LATENCY >= 0) 736 cpu_latency_qos_remove_request(&qos); 737 738 return err; 739 } 740 741 int live_rps_frequency_srm(void *arg) 742 { 743 void (*saved_work)(struct work_struct *wrk); 744 struct intel_gt *gt = arg; 745 struct intel_rps *rps = >->rps; 746 struct intel_engine_cs *engine; 747 struct pm_qos_request qos; 748 enum intel_engine_id id; 749 int err = 0; 750 751 /* 752 * The premise is that the GPU does change frequency at our behest. 753 * Let's check there is a correspondence between the requested 754 * frequency, the actual frequency, and the observed clock rate. 755 */ 756 757 if (!intel_rps_is_enabled(rps)) 758 return 0; 759 760 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 761 return 0; 762 763 if (CPU_LATENCY >= 0) 764 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 765 766 intel_gt_pm_wait_for_idle(gt); 767 saved_work = rps->work.func; 768 rps->work.func = dummy_rps_work; 769 770 for_each_engine(engine, gt, id) { 771 struct i915_request *rq; 772 struct i915_vma *vma; 773 u32 *cancel, *cntr; 774 struct { 775 u64 count; 776 int freq; 777 } min, max; 778 779 st_engine_heartbeat_disable(engine); 780 781 vma = create_spin_counter(engine, 782 engine->kernel_context->vm, true, 783 &cancel, &cntr); 784 if (IS_ERR(vma)) { 785 err = PTR_ERR(vma); 786 st_engine_heartbeat_enable(engine); 787 break; 788 } 789 790 rq = intel_engine_create_kernel_request(engine); 791 if (IS_ERR(rq)) { 792 err = PTR_ERR(rq); 793 goto err_vma; 794 } 795 796 err = i915_request_await_object(rq, vma->obj, false); 797 if (!err) 798 err = i915_vma_move_to_active(vma, rq, 0); 799 if (!err) 800 err = rq->engine->emit_bb_start(rq, 801 vma->node.start, 802 PAGE_SIZE, 0); 803 i915_request_add(rq); 804 if (err) 805 goto err_vma; 806 807 if (wait_for(READ_ONCE(*cntr), 10)) { 808 pr_err("%s: timed loop did not start\n", 809 engine->name); 810 goto err_vma; 811 } 812 813 min.freq = rps->min_freq; 814 min.count = measure_frequency_at(rps, cntr, &min.freq); 815 816 max.freq = rps->max_freq; 817 max.count = measure_frequency_at(rps, cntr, &max.freq); 818 819 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 820 engine->name, 821 min.count, intel_gpu_freq(rps, min.freq), 822 max.count, intel_gpu_freq(rps, max.freq), 823 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 824 max.freq * min.count)); 825 826 if (!scaled_within(max.freq * min.count, 827 min.freq * max.count, 828 1, 2)) { 829 int f; 830 831 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 832 engine->name, 833 max.freq * min.count, 834 min.freq * max.count); 835 show_pcu_config(rps); 836 837 for (f = min.freq + 1; f <= rps->max_freq; f++) { 838 int act = f; 839 u64 count; 840 841 count = measure_frequency_at(rps, cntr, &act); 842 if (act < f) 843 break; 844 845 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 846 engine->name, 847 act, intel_gpu_freq(rps, act), count, 848 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 849 act * min.count)); 850 851 f = act; /* may skip ahead [pcu granularity] */ 852 } 853 854 err = -EINTR; /* ignore error, continue on with test */ 855 } 856 857 err_vma: 858 *cancel = MI_BATCH_BUFFER_END; 859 i915_gem_object_flush_map(vma->obj); 860 i915_gem_object_unpin_map(vma->obj); 861 i915_vma_unpin(vma); 862 i915_vma_unlock(vma); 863 i915_vma_put(vma); 864 865 st_engine_heartbeat_enable(engine); 866 if (igt_flush_test(gt->i915)) 867 err = -EIO; 868 if (err) 869 break; 870 } 871 872 intel_gt_pm_wait_for_idle(gt); 873 rps->work.func = saved_work; 874 875 if (CPU_LATENCY >= 0) 876 cpu_latency_qos_remove_request(&qos); 877 878 return err; 879 } 880 881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 882 { 883 /* Flush any previous EI */ 884 usleep_range(timeout_us, 2 * timeout_us); 885 886 /* Reset the interrupt status */ 887 rps_disable_interrupts(rps); 888 GEM_BUG_ON(rps->pm_iir); 889 rps_enable_interrupts(rps); 890 891 /* And then wait for the timeout, for real this time */ 892 usleep_range(2 * timeout_us, 3 * timeout_us); 893 } 894 895 static int __rps_up_interrupt(struct intel_rps *rps, 896 struct intel_engine_cs *engine, 897 struct igt_spinner *spin) 898 { 899 struct intel_uncore *uncore = engine->uncore; 900 struct i915_request *rq; 901 u32 timeout; 902 903 if (!intel_engine_can_store_dword(engine)) 904 return 0; 905 906 rps_set_check(rps, rps->min_freq); 907 908 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 909 if (IS_ERR(rq)) 910 return PTR_ERR(rq); 911 912 i915_request_get(rq); 913 i915_request_add(rq); 914 915 if (!igt_wait_for_spinner(spin, rq)) { 916 pr_err("%s: RPS spinner did not start\n", 917 engine->name); 918 i915_request_put(rq); 919 intel_gt_set_wedged(engine->gt); 920 return -EIO; 921 } 922 923 if (!intel_rps_is_active(rps)) { 924 pr_err("%s: RPS not enabled on starting spinner\n", 925 engine->name); 926 igt_spinner_end(spin); 927 i915_request_put(rq); 928 return -EINVAL; 929 } 930 931 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 932 pr_err("%s: RPS did not register UP interrupt\n", 933 engine->name); 934 i915_request_put(rq); 935 return -EINVAL; 936 } 937 938 if (rps->last_freq != rps->min_freq) { 939 pr_err("%s: RPS did not program min frequency\n", 940 engine->name); 941 i915_request_put(rq); 942 return -EINVAL; 943 } 944 945 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 946 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 947 timeout = DIV_ROUND_UP(timeout, 1000); 948 949 sleep_for_ei(rps, timeout); 950 GEM_BUG_ON(i915_request_completed(rq)); 951 952 igt_spinner_end(spin); 953 i915_request_put(rq); 954 955 if (rps->cur_freq != rps->min_freq) { 956 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 957 engine->name, intel_rps_read_actual_frequency(rps)); 958 return -EINVAL; 959 } 960 961 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 962 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 963 engine->name, rps->pm_iir, 964 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 965 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 966 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 967 return -EINVAL; 968 } 969 970 return 0; 971 } 972 973 static int __rps_down_interrupt(struct intel_rps *rps, 974 struct intel_engine_cs *engine) 975 { 976 struct intel_uncore *uncore = engine->uncore; 977 u32 timeout; 978 979 rps_set_check(rps, rps->max_freq); 980 981 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 982 pr_err("%s: RPS did not register DOWN interrupt\n", 983 engine->name); 984 return -EINVAL; 985 } 986 987 if (rps->last_freq != rps->max_freq) { 988 pr_err("%s: RPS did not program max frequency\n", 989 engine->name); 990 return -EINVAL; 991 } 992 993 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 994 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 995 timeout = DIV_ROUND_UP(timeout, 1000); 996 997 sleep_for_ei(rps, timeout); 998 999 if (rps->cur_freq != rps->max_freq) { 1000 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 1001 engine->name, 1002 intel_rps_read_actual_frequency(rps)); 1003 return -EINVAL; 1004 } 1005 1006 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1007 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1008 engine->name, rps->pm_iir, 1009 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1010 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1011 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1012 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1013 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1014 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1015 return -EINVAL; 1016 } 1017 1018 return 0; 1019 } 1020 1021 int live_rps_interrupt(void *arg) 1022 { 1023 struct intel_gt *gt = arg; 1024 struct intel_rps *rps = >->rps; 1025 void (*saved_work)(struct work_struct *wrk); 1026 struct intel_engine_cs *engine; 1027 enum intel_engine_id id; 1028 struct igt_spinner spin; 1029 u32 pm_events; 1030 int err = 0; 1031 1032 /* 1033 * First, let's check whether or not we are receiving interrupts. 1034 */ 1035 1036 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1037 return 0; 1038 1039 intel_gt_pm_get(gt); 1040 pm_events = rps->pm_events; 1041 intel_gt_pm_put(gt); 1042 if (!pm_events) { 1043 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1044 return -ENODEV; 1045 } 1046 1047 if (igt_spinner_init(&spin, gt)) 1048 return -ENOMEM; 1049 1050 intel_gt_pm_wait_for_idle(gt); 1051 saved_work = rps->work.func; 1052 rps->work.func = dummy_rps_work; 1053 1054 for_each_engine(engine, gt, id) { 1055 /* Keep the engine busy with a spinner; expect an UP! */ 1056 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1057 intel_gt_pm_wait_for_idle(engine->gt); 1058 GEM_BUG_ON(intel_rps_is_active(rps)); 1059 1060 st_engine_heartbeat_disable(engine); 1061 1062 err = __rps_up_interrupt(rps, engine, &spin); 1063 1064 st_engine_heartbeat_enable(engine); 1065 if (err) 1066 goto out; 1067 1068 intel_gt_pm_wait_for_idle(engine->gt); 1069 } 1070 1071 /* Keep the engine awake but idle and check for DOWN */ 1072 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1073 st_engine_heartbeat_disable(engine); 1074 intel_rc6_disable(>->rc6); 1075 1076 err = __rps_down_interrupt(rps, engine); 1077 1078 intel_rc6_enable(>->rc6); 1079 st_engine_heartbeat_enable(engine); 1080 if (err) 1081 goto out; 1082 } 1083 } 1084 1085 out: 1086 if (igt_flush_test(gt->i915)) 1087 err = -EIO; 1088 1089 igt_spinner_fini(&spin); 1090 1091 intel_gt_pm_wait_for_idle(gt); 1092 rps->work.func = saved_work; 1093 1094 return err; 1095 } 1096 1097 static u64 __measure_power(int duration_ms) 1098 { 1099 u64 dE, dt; 1100 1101 dt = ktime_get(); 1102 dE = librapl_energy_uJ(); 1103 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1104 dE = librapl_energy_uJ() - dE; 1105 dt = ktime_get() - dt; 1106 1107 return div64_u64(1000 * 1000 * dE, dt); 1108 } 1109 1110 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1111 { 1112 u64 x[5]; 1113 int i; 1114 1115 *freq = rps_set_check(rps, *freq); 1116 for (i = 0; i < 5; i++) 1117 x[i] = __measure_power(5); 1118 *freq = (*freq + read_cagf(rps)) / 2; 1119 1120 /* A simple triangle filter for better result stability */ 1121 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1122 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1123 } 1124 1125 int live_rps_power(void *arg) 1126 { 1127 struct intel_gt *gt = arg; 1128 struct intel_rps *rps = >->rps; 1129 void (*saved_work)(struct work_struct *wrk); 1130 struct intel_engine_cs *engine; 1131 enum intel_engine_id id; 1132 struct igt_spinner spin; 1133 int err = 0; 1134 1135 /* 1136 * Our fundamental assumption is that running at lower frequency 1137 * actually saves power. Let's see if our RAPL measurement support 1138 * that theory. 1139 */ 1140 1141 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1142 return 0; 1143 1144 if (!librapl_supported(gt->i915)) 1145 return 0; 1146 1147 if (igt_spinner_init(&spin, gt)) 1148 return -ENOMEM; 1149 1150 intel_gt_pm_wait_for_idle(gt); 1151 saved_work = rps->work.func; 1152 rps->work.func = dummy_rps_work; 1153 1154 for_each_engine(engine, gt, id) { 1155 struct i915_request *rq; 1156 struct { 1157 u64 power; 1158 int freq; 1159 } min, max; 1160 1161 if (!intel_engine_can_store_dword(engine)) 1162 continue; 1163 1164 st_engine_heartbeat_disable(engine); 1165 1166 rq = igt_spinner_create_request(&spin, 1167 engine->kernel_context, 1168 MI_NOOP); 1169 if (IS_ERR(rq)) { 1170 st_engine_heartbeat_enable(engine); 1171 err = PTR_ERR(rq); 1172 break; 1173 } 1174 1175 i915_request_add(rq); 1176 1177 if (!igt_wait_for_spinner(&spin, rq)) { 1178 pr_err("%s: RPS spinner did not start\n", 1179 engine->name); 1180 igt_spinner_end(&spin); 1181 st_engine_heartbeat_enable(engine); 1182 intel_gt_set_wedged(engine->gt); 1183 err = -EIO; 1184 break; 1185 } 1186 1187 max.freq = rps->max_freq; 1188 max.power = measure_power_at(rps, &max.freq); 1189 1190 min.freq = rps->min_freq; 1191 min.power = measure_power_at(rps, &min.freq); 1192 1193 igt_spinner_end(&spin); 1194 st_engine_heartbeat_enable(engine); 1195 1196 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1197 engine->name, 1198 min.power, intel_gpu_freq(rps, min.freq), 1199 max.power, intel_gpu_freq(rps, max.freq)); 1200 1201 if (10 * min.freq >= 9 * max.freq) { 1202 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1203 min.freq, intel_gpu_freq(rps, min.freq), 1204 max.freq, intel_gpu_freq(rps, max.freq)); 1205 continue; 1206 } 1207 1208 if (11 * min.power > 10 * max.power) { 1209 pr_err("%s: did not conserve power when setting lower frequency!\n", 1210 engine->name); 1211 err = -EINVAL; 1212 break; 1213 } 1214 1215 if (igt_flush_test(gt->i915)) { 1216 err = -EIO; 1217 break; 1218 } 1219 } 1220 1221 igt_spinner_fini(&spin); 1222 1223 intel_gt_pm_wait_for_idle(gt); 1224 rps->work.func = saved_work; 1225 1226 return err; 1227 } 1228 1229 int live_rps_dynamic(void *arg) 1230 { 1231 struct intel_gt *gt = arg; 1232 struct intel_rps *rps = >->rps; 1233 struct intel_engine_cs *engine; 1234 enum intel_engine_id id; 1235 struct igt_spinner spin; 1236 int err = 0; 1237 1238 /* 1239 * We've looked at the bascs, and have established that we 1240 * can change the clock frequency and that the HW will generate 1241 * interrupts based on load. Now we check how we integrate those 1242 * moving parts into dynamic reclocking based on load. 1243 */ 1244 1245 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1246 return 0; 1247 1248 if (igt_spinner_init(&spin, gt)) 1249 return -ENOMEM; 1250 1251 if (intel_rps_has_interrupts(rps)) 1252 pr_info("RPS has interrupt support\n"); 1253 if (intel_rps_uses_timer(rps)) 1254 pr_info("RPS has timer support\n"); 1255 1256 for_each_engine(engine, gt, id) { 1257 struct i915_request *rq; 1258 struct { 1259 ktime_t dt; 1260 u8 freq; 1261 } min, max; 1262 1263 if (!intel_engine_can_store_dword(engine)) 1264 continue; 1265 1266 intel_gt_pm_wait_for_idle(gt); 1267 GEM_BUG_ON(intel_rps_is_active(rps)); 1268 rps->cur_freq = rps->min_freq; 1269 1270 intel_engine_pm_get(engine); 1271 intel_rc6_disable(>->rc6); 1272 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1273 1274 rq = igt_spinner_create_request(&spin, 1275 engine->kernel_context, 1276 MI_NOOP); 1277 if (IS_ERR(rq)) { 1278 err = PTR_ERR(rq); 1279 goto err; 1280 } 1281 1282 i915_request_add(rq); 1283 1284 max.dt = ktime_get(); 1285 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1286 max.dt = ktime_sub(ktime_get(), max.dt); 1287 1288 igt_spinner_end(&spin); 1289 1290 min.dt = ktime_get(); 1291 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1292 min.dt = ktime_sub(ktime_get(), min.dt); 1293 1294 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1295 engine->name, 1296 max.freq, intel_gpu_freq(rps, max.freq), 1297 ktime_to_ns(max.dt), 1298 min.freq, intel_gpu_freq(rps, min.freq), 1299 ktime_to_ns(min.dt)); 1300 if (min.freq >= max.freq) { 1301 pr_err("%s: dynamic reclocking of spinner failed\n!", 1302 engine->name); 1303 err = -EINVAL; 1304 } 1305 1306 err: 1307 intel_rc6_enable(>->rc6); 1308 intel_engine_pm_put(engine); 1309 1310 if (igt_flush_test(gt->i915)) 1311 err = -EIO; 1312 if (err) 1313 break; 1314 } 1315 1316 igt_spinner_fini(&spin); 1317 1318 return err; 1319 } 1320