1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "intel_engine_heartbeat.h" 10 #include "intel_engine_pm.h" 11 #include "intel_gpu_commands.h" 12 #include "intel_gt_clock_utils.h" 13 #include "intel_gt_pm.h" 14 #include "intel_rc6.h" 15 #include "selftest_rps.h" 16 #include "selftests/igt_flush_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/librapl.h" 19 20 /* Try to isolate the impact of cstates from determing frequency response */ 21 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 22 23 static void engine_heartbeat_disable(struct intel_engine_cs *engine) 24 { 25 engine->props.heartbeat_interval_ms = 0; 26 27 intel_engine_pm_get(engine); 28 intel_engine_park_heartbeat(engine); 29 } 30 31 static void engine_heartbeat_enable(struct intel_engine_cs *engine) 32 { 33 intel_engine_pm_put(engine); 34 35 engine->props.heartbeat_interval_ms = 36 engine->defaults.heartbeat_interval_ms; 37 } 38 39 static void dummy_rps_work(struct work_struct *wrk) 40 { 41 } 42 43 static int cmp_u64(const void *A, const void *B) 44 { 45 const u64 *a = A, *b = B; 46 47 if (a < b) 48 return -1; 49 else if (a > b) 50 return 1; 51 else 52 return 0; 53 } 54 55 static int cmp_u32(const void *A, const void *B) 56 { 57 const u32 *a = A, *b = B; 58 59 if (a < b) 60 return -1; 61 else if (a > b) 62 return 1; 63 else 64 return 0; 65 } 66 67 static struct i915_vma * 68 create_spin_counter(struct intel_engine_cs *engine, 69 struct i915_address_space *vm, 70 bool srm, 71 u32 **cancel, 72 u32 **counter) 73 { 74 enum { 75 COUNT, 76 INC, 77 __NGPR__, 78 }; 79 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 80 struct drm_i915_gem_object *obj; 81 struct i915_vma *vma; 82 unsigned long end; 83 u32 *base, *cs; 84 int loop, i; 85 int err; 86 87 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 88 if (IS_ERR(obj)) 89 return ERR_CAST(obj); 90 91 end = obj->base.size / sizeof(u32) - 1; 92 93 vma = i915_vma_instance(obj, vm, NULL); 94 if (IS_ERR(vma)) { 95 i915_gem_object_put(obj); 96 return vma; 97 } 98 99 err = i915_vma_pin(vma, 0, 0, PIN_USER); 100 if (err) { 101 i915_vma_put(vma); 102 return ERR_PTR(err); 103 } 104 105 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 106 if (IS_ERR(base)) { 107 i915_gem_object_put(obj); 108 return ERR_CAST(base); 109 } 110 cs = base; 111 112 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 113 for (i = 0; i < __NGPR__; i++) { 114 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 115 *cs++ = 0; 116 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 117 *cs++ = 0; 118 } 119 120 *cs++ = MI_LOAD_REGISTER_IMM(1); 121 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 122 *cs++ = 1; 123 124 loop = cs - base; 125 126 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 127 for (i = 0; i < 1024; i++) { 128 *cs++ = MI_MATH(4); 129 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 130 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 131 *cs++ = MI_MATH_ADD; 132 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 133 134 if (srm) { 135 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 136 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 137 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); 138 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); 139 } 140 } 141 142 *cs++ = MI_BATCH_BUFFER_START_GEN8; 143 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); 144 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); 145 GEM_BUG_ON(cs - base > end); 146 147 i915_gem_object_flush_map(obj); 148 149 *cancel = base + loop; 150 *counter = srm ? memset32(base + end, 0, 1) : NULL; 151 return vma; 152 } 153 154 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 155 { 156 u8 history[64], i; 157 unsigned long end; 158 int sleep; 159 160 i = 0; 161 memset(history, freq, sizeof(history)); 162 sleep = 20; 163 164 /* The PCU does not change instantly, but drifts towards the goal? */ 165 end = jiffies + msecs_to_jiffies(timeout_ms); 166 do { 167 u8 act; 168 169 act = read_cagf(rps); 170 if (time_after(jiffies, end)) 171 return act; 172 173 /* Target acquired */ 174 if (act == freq) 175 return act; 176 177 /* Any change within the last N samples? */ 178 if (!memchr_inv(history, act, sizeof(history))) 179 return act; 180 181 history[i] = act; 182 i = (i + 1) % ARRAY_SIZE(history); 183 184 usleep_range(sleep, 2 * sleep); 185 sleep *= 2; 186 if (sleep > timeout_ms * 20) 187 sleep = timeout_ms * 20; 188 } while (1); 189 } 190 191 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 192 { 193 mutex_lock(&rps->lock); 194 GEM_BUG_ON(!intel_rps_is_active(rps)); 195 intel_rps_set(rps, freq); 196 GEM_BUG_ON(rps->last_freq != freq); 197 mutex_unlock(&rps->lock); 198 199 return wait_for_freq(rps, freq, 50); 200 } 201 202 static void show_pstate_limits(struct intel_rps *rps) 203 { 204 struct drm_i915_private *i915 = rps_to_i915(rps); 205 206 if (IS_BROXTON(i915)) { 207 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 208 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 209 intel_uncore_read(rps_to_uncore(rps), 210 BXT_RP_STATE_CAP)); 211 } else if (IS_GEN(i915, 9)) { 212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 214 intel_uncore_read(rps_to_uncore(rps), 215 GEN9_RP_STATE_LIMITS)); 216 } 217 } 218 219 int live_rps_clock_interval(void *arg) 220 { 221 struct intel_gt *gt = arg; 222 struct intel_rps *rps = >->rps; 223 void (*saved_work)(struct work_struct *wrk); 224 struct intel_engine_cs *engine; 225 enum intel_engine_id id; 226 struct igt_spinner spin; 227 int err = 0; 228 229 if (!intel_rps_is_enabled(rps)) 230 return 0; 231 232 if (igt_spinner_init(&spin, gt)) 233 return -ENOMEM; 234 235 intel_gt_pm_wait_for_idle(gt); 236 saved_work = rps->work.func; 237 rps->work.func = dummy_rps_work; 238 239 intel_gt_pm_get(gt); 240 intel_rps_disable(>->rps); 241 242 intel_gt_check_clock_frequency(gt); 243 244 for_each_engine(engine, gt, id) { 245 struct i915_request *rq; 246 u32 cycles; 247 u64 dt; 248 249 if (!intel_engine_can_store_dword(engine)) 250 continue; 251 252 engine_heartbeat_disable(engine); 253 254 rq = igt_spinner_create_request(&spin, 255 engine->kernel_context, 256 MI_NOOP); 257 if (IS_ERR(rq)) { 258 engine_heartbeat_enable(engine); 259 err = PTR_ERR(rq); 260 break; 261 } 262 263 i915_request_add(rq); 264 265 if (!igt_wait_for_spinner(&spin, rq)) { 266 pr_err("%s: RPS spinner did not start\n", 267 engine->name); 268 igt_spinner_end(&spin); 269 engine_heartbeat_enable(engine); 270 intel_gt_set_wedged(engine->gt); 271 err = -EIO; 272 break; 273 } 274 275 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 276 277 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 278 279 /* Set the evaluation interval to infinity! */ 280 intel_uncore_write_fw(gt->uncore, 281 GEN6_RP_UP_EI, 0xffffffff); 282 intel_uncore_write_fw(gt->uncore, 283 GEN6_RP_UP_THRESHOLD, 0xffffffff); 284 285 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 286 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 287 288 if (wait_for(intel_uncore_read_fw(gt->uncore, 289 GEN6_RP_CUR_UP_EI), 290 10)) { 291 /* Just skip the test; assume lack of HW support */ 292 pr_notice("%s: rps evaluation interval not ticking\n", 293 engine->name); 294 err = -ENODEV; 295 } else { 296 ktime_t dt_[5]; 297 u32 cycles_[5]; 298 int i; 299 300 for (i = 0; i < 5; i++) { 301 preempt_disable(); 302 303 dt_[i] = ktime_get(); 304 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 305 306 udelay(1000); 307 308 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 309 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 310 311 preempt_enable(); 312 } 313 314 /* Use the median of both cycle/dt; close enough */ 315 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 316 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 317 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 318 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 319 } 320 321 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 322 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 323 324 igt_spinner_end(&spin); 325 engine_heartbeat_enable(engine); 326 327 if (err == 0) { 328 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 329 u32 expected = 330 intel_gt_ns_to_pm_interval(gt, dt); 331 332 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 333 engine->name, cycles, time, dt, expected, 334 gt->clock_frequency / 1000); 335 336 if (10 * time < 8 * dt || 337 8 * time > 10 * dt) { 338 pr_err("%s: rps clock time does not match walltime!\n", 339 engine->name); 340 err = -EINVAL; 341 } 342 343 if (10 * expected < 8 * cycles || 344 8 * expected > 10 * cycles) { 345 pr_err("%s: walltime does not match rps clock ticks!\n", 346 engine->name); 347 err = -EINVAL; 348 } 349 } 350 351 if (igt_flush_test(gt->i915)) 352 err = -EIO; 353 354 break; /* once is enough */ 355 } 356 357 intel_rps_enable(>->rps); 358 intel_gt_pm_put(gt); 359 360 igt_spinner_fini(&spin); 361 362 intel_gt_pm_wait_for_idle(gt); 363 rps->work.func = saved_work; 364 365 if (err == -ENODEV) /* skipped, don't report a fail */ 366 err = 0; 367 368 return err; 369 } 370 371 int live_rps_control(void *arg) 372 { 373 struct intel_gt *gt = arg; 374 struct intel_rps *rps = >->rps; 375 void (*saved_work)(struct work_struct *wrk); 376 struct intel_engine_cs *engine; 377 enum intel_engine_id id; 378 struct igt_spinner spin; 379 int err = 0; 380 381 /* 382 * Check that the actual frequency matches our requested frequency, 383 * to verify our control mechanism. We have to be careful that the 384 * PCU may throttle the GPU in which case the actual frequency used 385 * will be lowered than requested. 386 */ 387 388 if (!intel_rps_is_enabled(rps)) 389 return 0; 390 391 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 392 return 0; 393 394 if (igt_spinner_init(&spin, gt)) 395 return -ENOMEM; 396 397 intel_gt_pm_wait_for_idle(gt); 398 saved_work = rps->work.func; 399 rps->work.func = dummy_rps_work; 400 401 intel_gt_pm_get(gt); 402 for_each_engine(engine, gt, id) { 403 struct i915_request *rq; 404 ktime_t min_dt, max_dt; 405 int f, limit; 406 int min, max; 407 408 if (!intel_engine_can_store_dword(engine)) 409 continue; 410 411 engine_heartbeat_disable(engine); 412 413 rq = igt_spinner_create_request(&spin, 414 engine->kernel_context, 415 MI_NOOP); 416 if (IS_ERR(rq)) { 417 err = PTR_ERR(rq); 418 break; 419 } 420 421 i915_request_add(rq); 422 423 if (!igt_wait_for_spinner(&spin, rq)) { 424 pr_err("%s: RPS spinner did not start\n", 425 engine->name); 426 igt_spinner_end(&spin); 427 engine_heartbeat_enable(engine); 428 intel_gt_set_wedged(engine->gt); 429 err = -EIO; 430 break; 431 } 432 433 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 434 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 435 engine->name, rps->min_freq, read_cagf(rps)); 436 igt_spinner_end(&spin); 437 engine_heartbeat_enable(engine); 438 show_pstate_limits(rps); 439 err = -EINVAL; 440 break; 441 } 442 443 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 444 if (rps_set_check(rps, f) < f) 445 break; 446 } 447 448 limit = rps_set_check(rps, f); 449 450 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 451 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 452 engine->name, rps->min_freq, read_cagf(rps)); 453 igt_spinner_end(&spin); 454 engine_heartbeat_enable(engine); 455 show_pstate_limits(rps); 456 err = -EINVAL; 457 break; 458 } 459 460 max_dt = ktime_get(); 461 max = rps_set_check(rps, limit); 462 max_dt = ktime_sub(ktime_get(), max_dt); 463 464 min_dt = ktime_get(); 465 min = rps_set_check(rps, rps->min_freq); 466 min_dt = ktime_sub(ktime_get(), min_dt); 467 468 igt_spinner_end(&spin); 469 engine_heartbeat_enable(engine); 470 471 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 472 engine->name, 473 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 474 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 475 limit, intel_gpu_freq(rps, limit), 476 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 477 478 if (limit == rps->min_freq) { 479 pr_err("%s: GPU throttled to minimum!\n", 480 engine->name); 481 show_pstate_limits(rps); 482 err = -ENODEV; 483 break; 484 } 485 486 if (igt_flush_test(gt->i915)) { 487 err = -EIO; 488 break; 489 } 490 } 491 intel_gt_pm_put(gt); 492 493 igt_spinner_fini(&spin); 494 495 intel_gt_pm_wait_for_idle(gt); 496 rps->work.func = saved_work; 497 498 return err; 499 } 500 501 static void show_pcu_config(struct intel_rps *rps) 502 { 503 struct drm_i915_private *i915 = rps_to_i915(rps); 504 unsigned int max_gpu_freq, min_gpu_freq; 505 intel_wakeref_t wakeref; 506 int gpu_freq; 507 508 if (!HAS_LLC(i915)) 509 return; 510 511 min_gpu_freq = rps->min_freq; 512 max_gpu_freq = rps->max_freq; 513 if (INTEL_GEN(i915) >= 9) { 514 /* Convert GT frequency to 50 HZ units */ 515 min_gpu_freq /= GEN9_FREQ_SCALER; 516 max_gpu_freq /= GEN9_FREQ_SCALER; 517 } 518 519 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 520 521 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 522 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 523 int ia_freq = gpu_freq; 524 525 sandybridge_pcode_read(i915, 526 GEN6_PCODE_READ_MIN_FREQ_TABLE, 527 &ia_freq, NULL); 528 529 pr_info("%5d %5d %5d\n", 530 gpu_freq * 50, 531 ((ia_freq >> 0) & 0xff) * 100, 532 ((ia_freq >> 8) & 0xff) * 100); 533 } 534 535 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 536 } 537 538 static u64 __measure_frequency(u32 *cntr, int duration_ms) 539 { 540 u64 dc, dt; 541 542 dt = ktime_get(); 543 dc = READ_ONCE(*cntr); 544 usleep_range(1000 * duration_ms, 2000 * duration_ms); 545 dc = READ_ONCE(*cntr) - dc; 546 dt = ktime_get() - dt; 547 548 return div64_u64(1000 * 1000 * dc, dt); 549 } 550 551 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 552 { 553 u64 x[5]; 554 int i; 555 556 *freq = rps_set_check(rps, *freq); 557 for (i = 0; i < 5; i++) 558 x[i] = __measure_frequency(cntr, 2); 559 *freq = (*freq + read_cagf(rps)) / 2; 560 561 /* A simple triangle filter for better result stability */ 562 sort(x, 5, sizeof(*x), cmp_u64, NULL); 563 return div_u64(x[1] + 2 * x[2] + x[3], 4); 564 } 565 566 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 567 int duration_ms) 568 { 569 u64 dc, dt; 570 571 dt = ktime_get(); 572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 573 usleep_range(1000 * duration_ms, 2000 * duration_ms); 574 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 575 dt = ktime_get() - dt; 576 577 return div64_u64(1000 * 1000 * dc, dt); 578 } 579 580 static u64 measure_cs_frequency_at(struct intel_rps *rps, 581 struct intel_engine_cs *engine, 582 int *freq) 583 { 584 u64 x[5]; 585 int i; 586 587 *freq = rps_set_check(rps, *freq); 588 for (i = 0; i < 5; i++) 589 x[i] = __measure_cs_frequency(engine, 2); 590 *freq = (*freq + read_cagf(rps)) / 2; 591 592 /* A simple triangle filter for better result stability */ 593 sort(x, 5, sizeof(*x), cmp_u64, NULL); 594 return div_u64(x[1] + 2 * x[2] + x[3], 4); 595 } 596 597 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 598 { 599 return f_d * x > f_n * y && f_n * x < f_d * y; 600 } 601 602 int live_rps_frequency_cs(void *arg) 603 { 604 void (*saved_work)(struct work_struct *wrk); 605 struct intel_gt *gt = arg; 606 struct intel_rps *rps = >->rps; 607 struct intel_engine_cs *engine; 608 struct pm_qos_request qos; 609 enum intel_engine_id id; 610 int err = 0; 611 612 /* 613 * The premise is that the GPU does change freqency at our behest. 614 * Let's check there is a correspondence between the requested 615 * frequency, the actual frequency, and the observed clock rate. 616 */ 617 618 if (!intel_rps_is_enabled(rps)) 619 return 0; 620 621 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ 622 return 0; 623 624 if (CPU_LATENCY >= 0) 625 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 626 627 intel_gt_pm_wait_for_idle(gt); 628 saved_work = rps->work.func; 629 rps->work.func = dummy_rps_work; 630 631 for_each_engine(engine, gt, id) { 632 struct i915_request *rq; 633 struct i915_vma *vma; 634 u32 *cancel, *cntr; 635 struct { 636 u64 count; 637 int freq; 638 } min, max; 639 640 engine_heartbeat_disable(engine); 641 642 vma = create_spin_counter(engine, 643 engine->kernel_context->vm, false, 644 &cancel, &cntr); 645 if (IS_ERR(vma)) { 646 err = PTR_ERR(vma); 647 engine_heartbeat_enable(engine); 648 break; 649 } 650 651 rq = intel_engine_create_kernel_request(engine); 652 if (IS_ERR(rq)) { 653 err = PTR_ERR(rq); 654 goto err_vma; 655 } 656 657 i915_vma_lock(vma); 658 err = i915_request_await_object(rq, vma->obj, false); 659 if (!err) 660 err = i915_vma_move_to_active(vma, rq, 0); 661 if (!err) 662 err = rq->engine->emit_bb_start(rq, 663 vma->node.start, 664 PAGE_SIZE, 0); 665 i915_vma_unlock(vma); 666 i915_request_add(rq); 667 if (err) 668 goto err_vma; 669 670 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 671 10)) { 672 pr_err("%s: timed loop did not start\n", 673 engine->name); 674 goto err_vma; 675 } 676 677 min.freq = rps->min_freq; 678 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 679 680 max.freq = rps->max_freq; 681 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 682 683 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 684 engine->name, 685 min.count, intel_gpu_freq(rps, min.freq), 686 max.count, intel_gpu_freq(rps, max.freq), 687 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 688 max.freq * min.count)); 689 690 if (!scaled_within(max.freq * min.count, 691 min.freq * max.count, 692 2, 3)) { 693 int f; 694 695 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 696 engine->name, 697 max.freq * min.count, 698 min.freq * max.count); 699 show_pcu_config(rps); 700 701 for (f = min.freq + 1; f <= rps->max_freq; f++) { 702 int act = f; 703 u64 count; 704 705 count = measure_cs_frequency_at(rps, engine, &act); 706 if (act < f) 707 break; 708 709 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 710 engine->name, 711 act, intel_gpu_freq(rps, act), count, 712 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 713 act * min.count)); 714 715 f = act; /* may skip ahead [pcu granularity] */ 716 } 717 718 err = -EINVAL; 719 } 720 721 err_vma: 722 *cancel = MI_BATCH_BUFFER_END; 723 i915_gem_object_flush_map(vma->obj); 724 i915_gem_object_unpin_map(vma->obj); 725 i915_vma_unpin(vma); 726 i915_vma_put(vma); 727 728 engine_heartbeat_enable(engine); 729 if (igt_flush_test(gt->i915)) 730 err = -EIO; 731 if (err) 732 break; 733 } 734 735 intel_gt_pm_wait_for_idle(gt); 736 rps->work.func = saved_work; 737 738 if (CPU_LATENCY >= 0) 739 cpu_latency_qos_remove_request(&qos); 740 741 return err; 742 } 743 744 int live_rps_frequency_srm(void *arg) 745 { 746 void (*saved_work)(struct work_struct *wrk); 747 struct intel_gt *gt = arg; 748 struct intel_rps *rps = >->rps; 749 struct intel_engine_cs *engine; 750 struct pm_qos_request qos; 751 enum intel_engine_id id; 752 int err = 0; 753 754 /* 755 * The premise is that the GPU does change freqency at our behest. 756 * Let's check there is a correspondence between the requested 757 * frequency, the actual frequency, and the observed clock rate. 758 */ 759 760 if (!intel_rps_is_enabled(rps)) 761 return 0; 762 763 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */ 764 return 0; 765 766 if (CPU_LATENCY >= 0) 767 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 768 769 intel_gt_pm_wait_for_idle(gt); 770 saved_work = rps->work.func; 771 rps->work.func = dummy_rps_work; 772 773 for_each_engine(engine, gt, id) { 774 struct i915_request *rq; 775 struct i915_vma *vma; 776 u32 *cancel, *cntr; 777 struct { 778 u64 count; 779 int freq; 780 } min, max; 781 782 engine_heartbeat_disable(engine); 783 784 vma = create_spin_counter(engine, 785 engine->kernel_context->vm, true, 786 &cancel, &cntr); 787 if (IS_ERR(vma)) { 788 err = PTR_ERR(vma); 789 engine_heartbeat_enable(engine); 790 break; 791 } 792 793 rq = intel_engine_create_kernel_request(engine); 794 if (IS_ERR(rq)) { 795 err = PTR_ERR(rq); 796 goto err_vma; 797 } 798 799 i915_vma_lock(vma); 800 err = i915_request_await_object(rq, vma->obj, false); 801 if (!err) 802 err = i915_vma_move_to_active(vma, rq, 0); 803 if (!err) 804 err = rq->engine->emit_bb_start(rq, 805 vma->node.start, 806 PAGE_SIZE, 0); 807 i915_vma_unlock(vma); 808 i915_request_add(rq); 809 if (err) 810 goto err_vma; 811 812 if (wait_for(READ_ONCE(*cntr), 10)) { 813 pr_err("%s: timed loop did not start\n", 814 engine->name); 815 goto err_vma; 816 } 817 818 min.freq = rps->min_freq; 819 min.count = measure_frequency_at(rps, cntr, &min.freq); 820 821 max.freq = rps->max_freq; 822 max.count = measure_frequency_at(rps, cntr, &max.freq); 823 824 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 825 engine->name, 826 min.count, intel_gpu_freq(rps, min.freq), 827 max.count, intel_gpu_freq(rps, max.freq), 828 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 829 max.freq * min.count)); 830 831 if (!scaled_within(max.freq * min.count, 832 min.freq * max.count, 833 1, 2)) { 834 int f; 835 836 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 837 engine->name, 838 max.freq * min.count, 839 min.freq * max.count); 840 show_pcu_config(rps); 841 842 for (f = min.freq + 1; f <= rps->max_freq; f++) { 843 int act = f; 844 u64 count; 845 846 count = measure_frequency_at(rps, cntr, &act); 847 if (act < f) 848 break; 849 850 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 851 engine->name, 852 act, intel_gpu_freq(rps, act), count, 853 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 854 act * min.count)); 855 856 f = act; /* may skip ahead [pcu granularity] */ 857 } 858 859 err = -EINVAL; 860 } 861 862 err_vma: 863 *cancel = MI_BATCH_BUFFER_END; 864 i915_gem_object_flush_map(vma->obj); 865 i915_gem_object_unpin_map(vma->obj); 866 i915_vma_unpin(vma); 867 i915_vma_put(vma); 868 869 engine_heartbeat_enable(engine); 870 if (igt_flush_test(gt->i915)) 871 err = -EIO; 872 if (err) 873 break; 874 } 875 876 intel_gt_pm_wait_for_idle(gt); 877 rps->work.func = saved_work; 878 879 if (CPU_LATENCY >= 0) 880 cpu_latency_qos_remove_request(&qos); 881 882 return err; 883 } 884 885 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 886 { 887 /* Flush any previous EI */ 888 usleep_range(timeout_us, 2 * timeout_us); 889 890 /* Reset the interrupt status */ 891 rps_disable_interrupts(rps); 892 GEM_BUG_ON(rps->pm_iir); 893 rps_enable_interrupts(rps); 894 895 /* And then wait for the timeout, for real this time */ 896 usleep_range(2 * timeout_us, 3 * timeout_us); 897 } 898 899 static int __rps_up_interrupt(struct intel_rps *rps, 900 struct intel_engine_cs *engine, 901 struct igt_spinner *spin) 902 { 903 struct intel_uncore *uncore = engine->uncore; 904 struct i915_request *rq; 905 u32 timeout; 906 907 if (!intel_engine_can_store_dword(engine)) 908 return 0; 909 910 rps_set_check(rps, rps->min_freq); 911 912 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 913 if (IS_ERR(rq)) 914 return PTR_ERR(rq); 915 916 i915_request_get(rq); 917 i915_request_add(rq); 918 919 if (!igt_wait_for_spinner(spin, rq)) { 920 pr_err("%s: RPS spinner did not start\n", 921 engine->name); 922 i915_request_put(rq); 923 intel_gt_set_wedged(engine->gt); 924 return -EIO; 925 } 926 927 if (!intel_rps_is_active(rps)) { 928 pr_err("%s: RPS not enabled on starting spinner\n", 929 engine->name); 930 igt_spinner_end(spin); 931 i915_request_put(rq); 932 return -EINVAL; 933 } 934 935 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 936 pr_err("%s: RPS did not register UP interrupt\n", 937 engine->name); 938 i915_request_put(rq); 939 return -EINVAL; 940 } 941 942 if (rps->last_freq != rps->min_freq) { 943 pr_err("%s: RPS did not program min frequency\n", 944 engine->name); 945 i915_request_put(rq); 946 return -EINVAL; 947 } 948 949 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 950 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 951 timeout = DIV_ROUND_UP(timeout, 1000); 952 953 sleep_for_ei(rps, timeout); 954 GEM_BUG_ON(i915_request_completed(rq)); 955 956 igt_spinner_end(spin); 957 i915_request_put(rq); 958 959 if (rps->cur_freq != rps->min_freq) { 960 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 961 engine->name, intel_rps_read_actual_frequency(rps)); 962 return -EINVAL; 963 } 964 965 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 966 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 967 engine->name, rps->pm_iir, 968 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 969 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 970 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 971 return -EINVAL; 972 } 973 974 return 0; 975 } 976 977 static int __rps_down_interrupt(struct intel_rps *rps, 978 struct intel_engine_cs *engine) 979 { 980 struct intel_uncore *uncore = engine->uncore; 981 u32 timeout; 982 983 rps_set_check(rps, rps->max_freq); 984 985 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 986 pr_err("%s: RPS did not register DOWN interrupt\n", 987 engine->name); 988 return -EINVAL; 989 } 990 991 if (rps->last_freq != rps->max_freq) { 992 pr_err("%s: RPS did not program max frequency\n", 993 engine->name); 994 return -EINVAL; 995 } 996 997 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 998 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 999 timeout = DIV_ROUND_UP(timeout, 1000); 1000 1001 sleep_for_ei(rps, timeout); 1002 1003 if (rps->cur_freq != rps->max_freq) { 1004 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 1005 engine->name, 1006 intel_rps_read_actual_frequency(rps)); 1007 return -EINVAL; 1008 } 1009 1010 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1011 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1012 engine->name, rps->pm_iir, 1013 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1014 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1015 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1016 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1017 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1018 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1019 return -EINVAL; 1020 } 1021 1022 return 0; 1023 } 1024 1025 int live_rps_interrupt(void *arg) 1026 { 1027 struct intel_gt *gt = arg; 1028 struct intel_rps *rps = >->rps; 1029 void (*saved_work)(struct work_struct *wrk); 1030 struct intel_engine_cs *engine; 1031 enum intel_engine_id id; 1032 struct igt_spinner spin; 1033 u32 pm_events; 1034 int err = 0; 1035 1036 /* 1037 * First, let's check whether or not we are receiving interrupts. 1038 */ 1039 1040 if (!intel_rps_has_interrupts(rps)) 1041 return 0; 1042 1043 intel_gt_pm_get(gt); 1044 pm_events = rps->pm_events; 1045 intel_gt_pm_put(gt); 1046 if (!pm_events) { 1047 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1048 return -ENODEV; 1049 } 1050 1051 if (igt_spinner_init(&spin, gt)) 1052 return -ENOMEM; 1053 1054 intel_gt_pm_wait_for_idle(gt); 1055 saved_work = rps->work.func; 1056 rps->work.func = dummy_rps_work; 1057 1058 for_each_engine(engine, gt, id) { 1059 /* Keep the engine busy with a spinner; expect an UP! */ 1060 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1061 intel_gt_pm_wait_for_idle(engine->gt); 1062 GEM_BUG_ON(intel_rps_is_active(rps)); 1063 1064 engine_heartbeat_disable(engine); 1065 1066 err = __rps_up_interrupt(rps, engine, &spin); 1067 1068 engine_heartbeat_enable(engine); 1069 if (err) 1070 goto out; 1071 1072 intel_gt_pm_wait_for_idle(engine->gt); 1073 } 1074 1075 /* Keep the engine awake but idle and check for DOWN */ 1076 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1077 engine_heartbeat_disable(engine); 1078 intel_rc6_disable(>->rc6); 1079 1080 err = __rps_down_interrupt(rps, engine); 1081 1082 intel_rc6_enable(>->rc6); 1083 engine_heartbeat_enable(engine); 1084 if (err) 1085 goto out; 1086 } 1087 } 1088 1089 out: 1090 if (igt_flush_test(gt->i915)) 1091 err = -EIO; 1092 1093 igt_spinner_fini(&spin); 1094 1095 intel_gt_pm_wait_for_idle(gt); 1096 rps->work.func = saved_work; 1097 1098 return err; 1099 } 1100 1101 static u64 __measure_power(int duration_ms) 1102 { 1103 u64 dE, dt; 1104 1105 dt = ktime_get(); 1106 dE = librapl_energy_uJ(); 1107 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1108 dE = librapl_energy_uJ() - dE; 1109 dt = ktime_get() - dt; 1110 1111 return div64_u64(1000 * 1000 * dE, dt); 1112 } 1113 1114 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1115 { 1116 u64 x[5]; 1117 int i; 1118 1119 *freq = rps_set_check(rps, *freq); 1120 for (i = 0; i < 5; i++) 1121 x[i] = __measure_power(5); 1122 *freq = (*freq + read_cagf(rps)) / 2; 1123 1124 /* A simple triangle filter for better result stability */ 1125 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1126 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1127 } 1128 1129 int live_rps_power(void *arg) 1130 { 1131 struct intel_gt *gt = arg; 1132 struct intel_rps *rps = >->rps; 1133 void (*saved_work)(struct work_struct *wrk); 1134 struct intel_engine_cs *engine; 1135 enum intel_engine_id id; 1136 struct igt_spinner spin; 1137 int err = 0; 1138 1139 /* 1140 * Our fundamental assumption is that running at lower frequency 1141 * actually saves power. Let's see if our RAPL measurement support 1142 * that theory. 1143 */ 1144 1145 if (!intel_rps_is_enabled(rps)) 1146 return 0; 1147 1148 if (!librapl_energy_uJ()) 1149 return 0; 1150 1151 if (igt_spinner_init(&spin, gt)) 1152 return -ENOMEM; 1153 1154 intel_gt_pm_wait_for_idle(gt); 1155 saved_work = rps->work.func; 1156 rps->work.func = dummy_rps_work; 1157 1158 for_each_engine(engine, gt, id) { 1159 struct i915_request *rq; 1160 struct { 1161 u64 power; 1162 int freq; 1163 } min, max; 1164 1165 if (!intel_engine_can_store_dword(engine)) 1166 continue; 1167 1168 engine_heartbeat_disable(engine); 1169 1170 rq = igt_spinner_create_request(&spin, 1171 engine->kernel_context, 1172 MI_NOOP); 1173 if (IS_ERR(rq)) { 1174 engine_heartbeat_enable(engine); 1175 err = PTR_ERR(rq); 1176 break; 1177 } 1178 1179 i915_request_add(rq); 1180 1181 if (!igt_wait_for_spinner(&spin, rq)) { 1182 pr_err("%s: RPS spinner did not start\n", 1183 engine->name); 1184 igt_spinner_end(&spin); 1185 engine_heartbeat_enable(engine); 1186 intel_gt_set_wedged(engine->gt); 1187 err = -EIO; 1188 break; 1189 } 1190 1191 max.freq = rps->max_freq; 1192 max.power = measure_power_at(rps, &max.freq); 1193 1194 min.freq = rps->min_freq; 1195 min.power = measure_power_at(rps, &min.freq); 1196 1197 igt_spinner_end(&spin); 1198 engine_heartbeat_enable(engine); 1199 1200 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1201 engine->name, 1202 min.power, intel_gpu_freq(rps, min.freq), 1203 max.power, intel_gpu_freq(rps, max.freq)); 1204 1205 if (10 * min.freq >= 9 * max.freq) { 1206 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1207 min.freq, intel_gpu_freq(rps, min.freq), 1208 max.freq, intel_gpu_freq(rps, max.freq)); 1209 continue; 1210 } 1211 1212 if (11 * min.power > 10 * max.power) { 1213 pr_err("%s: did not conserve power when setting lower frequency!\n", 1214 engine->name); 1215 err = -EINVAL; 1216 break; 1217 } 1218 1219 if (igt_flush_test(gt->i915)) { 1220 err = -EIO; 1221 break; 1222 } 1223 } 1224 1225 igt_spinner_fini(&spin); 1226 1227 intel_gt_pm_wait_for_idle(gt); 1228 rps->work.func = saved_work; 1229 1230 return err; 1231 } 1232 1233 int live_rps_dynamic(void *arg) 1234 { 1235 struct intel_gt *gt = arg; 1236 struct intel_rps *rps = >->rps; 1237 struct intel_engine_cs *engine; 1238 enum intel_engine_id id; 1239 struct igt_spinner spin; 1240 int err = 0; 1241 1242 /* 1243 * We've looked at the bascs, and have established that we 1244 * can change the clock frequency and that the HW will generate 1245 * interrupts based on load. Now we check how we integrate those 1246 * moving parts into dynamic reclocking based on load. 1247 */ 1248 1249 if (!intel_rps_is_enabled(rps)) 1250 return 0; 1251 1252 if (igt_spinner_init(&spin, gt)) 1253 return -ENOMEM; 1254 1255 for_each_engine(engine, gt, id) { 1256 struct i915_request *rq; 1257 struct { 1258 ktime_t dt; 1259 u8 freq; 1260 } min, max; 1261 1262 if (!intel_engine_can_store_dword(engine)) 1263 continue; 1264 1265 intel_gt_pm_wait_for_idle(gt); 1266 GEM_BUG_ON(intel_rps_is_active(rps)); 1267 rps->cur_freq = rps->min_freq; 1268 1269 intel_engine_pm_get(engine); 1270 intel_rc6_disable(>->rc6); 1271 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1272 1273 rq = igt_spinner_create_request(&spin, 1274 engine->kernel_context, 1275 MI_NOOP); 1276 if (IS_ERR(rq)) { 1277 err = PTR_ERR(rq); 1278 goto err; 1279 } 1280 1281 i915_request_add(rq); 1282 1283 max.dt = ktime_get(); 1284 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1285 max.dt = ktime_sub(ktime_get(), max.dt); 1286 1287 igt_spinner_end(&spin); 1288 1289 min.dt = ktime_get(); 1290 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1291 min.dt = ktime_sub(ktime_get(), min.dt); 1292 1293 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1294 engine->name, 1295 max.freq, intel_gpu_freq(rps, max.freq), 1296 ktime_to_ns(max.dt), 1297 min.freq, intel_gpu_freq(rps, min.freq), 1298 ktime_to_ns(min.dt)); 1299 if (min.freq >= max.freq) { 1300 pr_err("%s: dynamic reclocking of spinner failed\n!", 1301 engine->name); 1302 err = -EINVAL; 1303 } 1304 1305 err: 1306 intel_rc6_enable(>->rc6); 1307 intel_engine_pm_put(engine); 1308 1309 if (igt_flush_test(gt->i915)) 1310 err = -EIO; 1311 if (err) 1312 break; 1313 } 1314 1315 igt_spinner_fini(&spin); 1316 1317 return err; 1318 } 1319