1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "intel_engine_heartbeat.h" 10 #include "intel_engine_pm.h" 11 #include "intel_gpu_commands.h" 12 #include "intel_gt_clock_utils.h" 13 #include "intel_gt_pm.h" 14 #include "intel_rc6.h" 15 #include "selftest_engine_heartbeat.h" 16 #include "selftest_rps.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_spinner.h" 19 #include "selftests/librapl.h" 20 21 /* Try to isolate the impact of cstates from determing frequency response */ 22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 23 24 static void dummy_rps_work(struct work_struct *wrk) 25 { 26 } 27 28 static int cmp_u64(const void *A, const void *B) 29 { 30 const u64 *a = A, *b = B; 31 32 if (*a < *b) 33 return -1; 34 else if (*a > *b) 35 return 1; 36 else 37 return 0; 38 } 39 40 static int cmp_u32(const void *A, const void *B) 41 { 42 const u32 *a = A, *b = B; 43 44 if (*a < *b) 45 return -1; 46 else if (*a > *b) 47 return 1; 48 else 49 return 0; 50 } 51 52 static struct i915_vma * 53 create_spin_counter(struct intel_engine_cs *engine, 54 struct i915_address_space *vm, 55 bool srm, 56 u32 **cancel, 57 u32 **counter) 58 { 59 enum { 60 COUNT, 61 INC, 62 __NGPR__, 63 }; 64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 65 struct drm_i915_gem_object *obj; 66 struct i915_vma *vma; 67 unsigned long end; 68 u32 *base, *cs; 69 int loop, i; 70 int err; 71 72 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 73 if (IS_ERR(obj)) 74 return ERR_CAST(obj); 75 76 end = obj->base.size / sizeof(u32) - 1; 77 78 vma = i915_vma_instance(obj, vm, NULL); 79 if (IS_ERR(vma)) { 80 err = PTR_ERR(vma); 81 goto err_put; 82 } 83 84 err = i915_vma_pin(vma, 0, 0, PIN_USER); 85 if (err) 86 goto err_unlock; 87 88 i915_vma_lock(vma); 89 90 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 91 if (IS_ERR(base)) { 92 err = PTR_ERR(base); 93 goto err_unpin; 94 } 95 cs = base; 96 97 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 98 for (i = 0; i < __NGPR__; i++) { 99 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 100 *cs++ = 0; 101 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 102 *cs++ = 0; 103 } 104 105 *cs++ = MI_LOAD_REGISTER_IMM(1); 106 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 107 *cs++ = 1; 108 109 loop = cs - base; 110 111 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 112 for (i = 0; i < 1024; i++) { 113 *cs++ = MI_MATH(4); 114 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 115 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 116 *cs++ = MI_MATH_ADD; 117 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 118 119 if (srm) { 120 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 121 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 122 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs)); 123 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs)); 124 } 125 } 126 127 *cs++ = MI_BATCH_BUFFER_START_GEN8; 128 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs)); 129 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs)); 130 GEM_BUG_ON(cs - base > end); 131 132 i915_gem_object_flush_map(obj); 133 134 *cancel = base + loop; 135 *counter = srm ? memset32(base + end, 0, 1) : NULL; 136 return vma; 137 138 err_unpin: 139 i915_vma_unpin(vma); 140 err_unlock: 141 i915_vma_unlock(vma); 142 err_put: 143 i915_gem_object_put(obj); 144 return ERR_PTR(err); 145 } 146 147 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 148 { 149 u8 history[64], i; 150 unsigned long end; 151 int sleep; 152 153 i = 0; 154 memset(history, freq, sizeof(history)); 155 sleep = 20; 156 157 /* The PCU does not change instantly, but drifts towards the goal? */ 158 end = jiffies + msecs_to_jiffies(timeout_ms); 159 do { 160 u8 act; 161 162 act = read_cagf(rps); 163 if (time_after(jiffies, end)) 164 return act; 165 166 /* Target acquired */ 167 if (act == freq) 168 return act; 169 170 /* Any change within the last N samples? */ 171 if (!memchr_inv(history, act, sizeof(history))) 172 return act; 173 174 history[i] = act; 175 i = (i + 1) % ARRAY_SIZE(history); 176 177 usleep_range(sleep, 2 * sleep); 178 sleep *= 2; 179 if (sleep > timeout_ms * 20) 180 sleep = timeout_ms * 20; 181 } while (1); 182 } 183 184 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 185 { 186 mutex_lock(&rps->lock); 187 GEM_BUG_ON(!intel_rps_is_active(rps)); 188 if (wait_for(!intel_rps_set(rps, freq), 50)) { 189 mutex_unlock(&rps->lock); 190 return 0; 191 } 192 GEM_BUG_ON(rps->last_freq != freq); 193 mutex_unlock(&rps->lock); 194 195 return wait_for_freq(rps, freq, 50); 196 } 197 198 static void show_pstate_limits(struct intel_rps *rps) 199 { 200 struct drm_i915_private *i915 = rps_to_i915(rps); 201 202 if (IS_BROXTON(i915)) { 203 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 204 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 205 intel_uncore_read(rps_to_uncore(rps), 206 BXT_RP_STATE_CAP)); 207 } else if (GRAPHICS_VER(i915) == 9) { 208 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 209 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 210 intel_uncore_read(rps_to_uncore(rps), 211 GEN9_RP_STATE_LIMITS)); 212 } 213 } 214 215 int live_rps_clock_interval(void *arg) 216 { 217 struct intel_gt *gt = arg; 218 struct intel_rps *rps = >->rps; 219 void (*saved_work)(struct work_struct *wrk); 220 struct intel_engine_cs *engine; 221 enum intel_engine_id id; 222 struct igt_spinner spin; 223 int err = 0; 224 225 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 226 return 0; 227 228 if (igt_spinner_init(&spin, gt)) 229 return -ENOMEM; 230 231 intel_gt_pm_wait_for_idle(gt); 232 saved_work = rps->work.func; 233 rps->work.func = dummy_rps_work; 234 235 intel_gt_pm_get(gt); 236 intel_rps_disable(>->rps); 237 238 intel_gt_check_clock_frequency(gt); 239 240 for_each_engine(engine, gt, id) { 241 struct i915_request *rq; 242 u32 cycles; 243 u64 dt; 244 245 if (!intel_engine_can_store_dword(engine)) 246 continue; 247 248 st_engine_heartbeat_disable(engine); 249 250 rq = igt_spinner_create_request(&spin, 251 engine->kernel_context, 252 MI_NOOP); 253 if (IS_ERR(rq)) { 254 st_engine_heartbeat_enable(engine); 255 err = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_add(rq); 260 261 if (!igt_wait_for_spinner(&spin, rq)) { 262 pr_err("%s: RPS spinner did not start\n", 263 engine->name); 264 igt_spinner_end(&spin); 265 st_engine_heartbeat_enable(engine); 266 intel_gt_set_wedged(engine->gt); 267 err = -EIO; 268 break; 269 } 270 271 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 272 273 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 274 275 /* Set the evaluation interval to infinity! */ 276 intel_uncore_write_fw(gt->uncore, 277 GEN6_RP_UP_EI, 0xffffffff); 278 intel_uncore_write_fw(gt->uncore, 279 GEN6_RP_UP_THRESHOLD, 0xffffffff); 280 281 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 282 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 283 284 if (wait_for(intel_uncore_read_fw(gt->uncore, 285 GEN6_RP_CUR_UP_EI), 286 10)) { 287 /* Just skip the test; assume lack of HW support */ 288 pr_notice("%s: rps evaluation interval not ticking\n", 289 engine->name); 290 err = -ENODEV; 291 } else { 292 ktime_t dt_[5]; 293 u32 cycles_[5]; 294 int i; 295 296 for (i = 0; i < 5; i++) { 297 preempt_disable(); 298 299 dt_[i] = ktime_get(); 300 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 301 302 udelay(1000); 303 304 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 305 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 306 307 preempt_enable(); 308 } 309 310 /* Use the median of both cycle/dt; close enough */ 311 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 312 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 313 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 314 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 315 } 316 317 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 318 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 319 320 igt_spinner_end(&spin); 321 st_engine_heartbeat_enable(engine); 322 323 if (err == 0) { 324 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 325 u32 expected = 326 intel_gt_ns_to_pm_interval(gt, dt); 327 328 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 329 engine->name, cycles, time, dt, expected, 330 gt->clock_frequency / 1000); 331 332 if (10 * time < 8 * dt || 333 8 * time > 10 * dt) { 334 pr_err("%s: rps clock time does not match walltime!\n", 335 engine->name); 336 err = -EINVAL; 337 } 338 339 if (10 * expected < 8 * cycles || 340 8 * expected > 10 * cycles) { 341 pr_err("%s: walltime does not match rps clock ticks!\n", 342 engine->name); 343 err = -EINVAL; 344 } 345 } 346 347 if (igt_flush_test(gt->i915)) 348 err = -EIO; 349 350 break; /* once is enough */ 351 } 352 353 intel_rps_enable(>->rps); 354 intel_gt_pm_put(gt); 355 356 igt_spinner_fini(&spin); 357 358 intel_gt_pm_wait_for_idle(gt); 359 rps->work.func = saved_work; 360 361 if (err == -ENODEV) /* skipped, don't report a fail */ 362 err = 0; 363 364 return err; 365 } 366 367 int live_rps_control(void *arg) 368 { 369 struct intel_gt *gt = arg; 370 struct intel_rps *rps = >->rps; 371 void (*saved_work)(struct work_struct *wrk); 372 struct intel_engine_cs *engine; 373 enum intel_engine_id id; 374 struct igt_spinner spin; 375 int err = 0; 376 377 /* 378 * Check that the actual frequency matches our requested frequency, 379 * to verify our control mechanism. We have to be careful that the 380 * PCU may throttle the GPU in which case the actual frequency used 381 * will be lowered than requested. 382 */ 383 384 if (!intel_rps_is_enabled(rps)) 385 return 0; 386 387 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 388 return 0; 389 390 if (igt_spinner_init(&spin, gt)) 391 return -ENOMEM; 392 393 intel_gt_pm_wait_for_idle(gt); 394 saved_work = rps->work.func; 395 rps->work.func = dummy_rps_work; 396 397 intel_gt_pm_get(gt); 398 for_each_engine(engine, gt, id) { 399 struct i915_request *rq; 400 ktime_t min_dt, max_dt; 401 int f, limit; 402 int min, max; 403 404 if (!intel_engine_can_store_dword(engine)) 405 continue; 406 407 st_engine_heartbeat_disable(engine); 408 409 rq = igt_spinner_create_request(&spin, 410 engine->kernel_context, 411 MI_NOOP); 412 if (IS_ERR(rq)) { 413 err = PTR_ERR(rq); 414 break; 415 } 416 417 i915_request_add(rq); 418 419 if (!igt_wait_for_spinner(&spin, rq)) { 420 pr_err("%s: RPS spinner did not start\n", 421 engine->name); 422 igt_spinner_end(&spin); 423 st_engine_heartbeat_enable(engine); 424 intel_gt_set_wedged(engine->gt); 425 err = -EIO; 426 break; 427 } 428 429 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 430 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 431 engine->name, rps->min_freq, read_cagf(rps)); 432 igt_spinner_end(&spin); 433 st_engine_heartbeat_enable(engine); 434 show_pstate_limits(rps); 435 err = -EINVAL; 436 break; 437 } 438 439 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 440 if (rps_set_check(rps, f) < f) 441 break; 442 } 443 444 limit = rps_set_check(rps, f); 445 446 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 447 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 448 engine->name, rps->min_freq, read_cagf(rps)); 449 igt_spinner_end(&spin); 450 st_engine_heartbeat_enable(engine); 451 show_pstate_limits(rps); 452 err = -EINVAL; 453 break; 454 } 455 456 max_dt = ktime_get(); 457 max = rps_set_check(rps, limit); 458 max_dt = ktime_sub(ktime_get(), max_dt); 459 460 min_dt = ktime_get(); 461 min = rps_set_check(rps, rps->min_freq); 462 min_dt = ktime_sub(ktime_get(), min_dt); 463 464 igt_spinner_end(&spin); 465 st_engine_heartbeat_enable(engine); 466 467 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 468 engine->name, 469 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 470 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 471 limit, intel_gpu_freq(rps, limit), 472 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 473 474 if (limit == rps->min_freq) { 475 pr_err("%s: GPU throttled to minimum!\n", 476 engine->name); 477 show_pstate_limits(rps); 478 err = -ENODEV; 479 break; 480 } 481 482 if (igt_flush_test(gt->i915)) { 483 err = -EIO; 484 break; 485 } 486 } 487 intel_gt_pm_put(gt); 488 489 igt_spinner_fini(&spin); 490 491 intel_gt_pm_wait_for_idle(gt); 492 rps->work.func = saved_work; 493 494 return err; 495 } 496 497 static void show_pcu_config(struct intel_rps *rps) 498 { 499 struct drm_i915_private *i915 = rps_to_i915(rps); 500 unsigned int max_gpu_freq, min_gpu_freq; 501 intel_wakeref_t wakeref; 502 int gpu_freq; 503 504 if (!HAS_LLC(i915)) 505 return; 506 507 min_gpu_freq = rps->min_freq; 508 max_gpu_freq = rps->max_freq; 509 if (GRAPHICS_VER(i915) >= 9) { 510 /* Convert GT frequency to 50 HZ units */ 511 min_gpu_freq /= GEN9_FREQ_SCALER; 512 max_gpu_freq /= GEN9_FREQ_SCALER; 513 } 514 515 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 516 517 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 518 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 519 int ia_freq = gpu_freq; 520 521 sandybridge_pcode_read(i915, 522 GEN6_PCODE_READ_MIN_FREQ_TABLE, 523 &ia_freq, NULL); 524 525 pr_info("%5d %5d %5d\n", 526 gpu_freq * 50, 527 ((ia_freq >> 0) & 0xff) * 100, 528 ((ia_freq >> 8) & 0xff) * 100); 529 } 530 531 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 532 } 533 534 static u64 __measure_frequency(u32 *cntr, int duration_ms) 535 { 536 u64 dc, dt; 537 538 dt = ktime_get(); 539 dc = READ_ONCE(*cntr); 540 usleep_range(1000 * duration_ms, 2000 * duration_ms); 541 dc = READ_ONCE(*cntr) - dc; 542 dt = ktime_get() - dt; 543 544 return div64_u64(1000 * 1000 * dc, dt); 545 } 546 547 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 548 { 549 u64 x[5]; 550 int i; 551 552 *freq = rps_set_check(rps, *freq); 553 for (i = 0; i < 5; i++) 554 x[i] = __measure_frequency(cntr, 2); 555 *freq = (*freq + read_cagf(rps)) / 2; 556 557 /* A simple triangle filter for better result stability */ 558 sort(x, 5, sizeof(*x), cmp_u64, NULL); 559 return div_u64(x[1] + 2 * x[2] + x[3], 4); 560 } 561 562 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 563 int duration_ms) 564 { 565 u64 dc, dt; 566 567 dt = ktime_get(); 568 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 569 usleep_range(1000 * duration_ms, 2000 * duration_ms); 570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 571 dt = ktime_get() - dt; 572 573 return div64_u64(1000 * 1000 * dc, dt); 574 } 575 576 static u64 measure_cs_frequency_at(struct intel_rps *rps, 577 struct intel_engine_cs *engine, 578 int *freq) 579 { 580 u64 x[5]; 581 int i; 582 583 *freq = rps_set_check(rps, *freq); 584 for (i = 0; i < 5; i++) 585 x[i] = __measure_cs_frequency(engine, 2); 586 *freq = (*freq + read_cagf(rps)) / 2; 587 588 /* A simple triangle filter for better result stability */ 589 sort(x, 5, sizeof(*x), cmp_u64, NULL); 590 return div_u64(x[1] + 2 * x[2] + x[3], 4); 591 } 592 593 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 594 { 595 return f_d * x > f_n * y && f_n * x < f_d * y; 596 } 597 598 int live_rps_frequency_cs(void *arg) 599 { 600 void (*saved_work)(struct work_struct *wrk); 601 struct intel_gt *gt = arg; 602 struct intel_rps *rps = >->rps; 603 struct intel_engine_cs *engine; 604 struct pm_qos_request qos; 605 enum intel_engine_id id; 606 int err = 0; 607 608 /* 609 * The premise is that the GPU does change frequency at our behest. 610 * Let's check there is a correspondence between the requested 611 * frequency, the actual frequency, and the observed clock rate. 612 */ 613 614 if (!intel_rps_is_enabled(rps)) 615 return 0; 616 617 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 618 return 0; 619 620 if (CPU_LATENCY >= 0) 621 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 622 623 intel_gt_pm_wait_for_idle(gt); 624 saved_work = rps->work.func; 625 rps->work.func = dummy_rps_work; 626 627 for_each_engine(engine, gt, id) { 628 struct i915_request *rq; 629 struct i915_vma *vma; 630 u32 *cancel, *cntr; 631 struct { 632 u64 count; 633 int freq; 634 } min, max; 635 636 st_engine_heartbeat_disable(engine); 637 638 vma = create_spin_counter(engine, 639 engine->kernel_context->vm, false, 640 &cancel, &cntr); 641 if (IS_ERR(vma)) { 642 err = PTR_ERR(vma); 643 st_engine_heartbeat_enable(engine); 644 break; 645 } 646 647 rq = intel_engine_create_kernel_request(engine); 648 if (IS_ERR(rq)) { 649 err = PTR_ERR(rq); 650 goto err_vma; 651 } 652 653 err = i915_request_await_object(rq, vma->obj, false); 654 if (!err) 655 err = i915_vma_move_to_active(vma, rq, 0); 656 if (!err) 657 err = rq->engine->emit_bb_start(rq, 658 vma->node.start, 659 PAGE_SIZE, 0); 660 i915_request_add(rq); 661 if (err) 662 goto err_vma; 663 664 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 665 10)) { 666 pr_err("%s: timed loop did not start\n", 667 engine->name); 668 goto err_vma; 669 } 670 671 min.freq = rps->min_freq; 672 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 673 674 max.freq = rps->max_freq; 675 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 676 677 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 678 engine->name, 679 min.count, intel_gpu_freq(rps, min.freq), 680 max.count, intel_gpu_freq(rps, max.freq), 681 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 682 max.freq * min.count)); 683 684 if (!scaled_within(max.freq * min.count, 685 min.freq * max.count, 686 2, 3)) { 687 int f; 688 689 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 690 engine->name, 691 max.freq * min.count, 692 min.freq * max.count); 693 show_pcu_config(rps); 694 695 for (f = min.freq + 1; f <= rps->max_freq; f++) { 696 int act = f; 697 u64 count; 698 699 count = measure_cs_frequency_at(rps, engine, &act); 700 if (act < f) 701 break; 702 703 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 704 engine->name, 705 act, intel_gpu_freq(rps, act), count, 706 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 707 act * min.count)); 708 709 f = act; /* may skip ahead [pcu granularity] */ 710 } 711 712 err = -EINTR; /* ignore error, continue on with test */ 713 } 714 715 err_vma: 716 *cancel = MI_BATCH_BUFFER_END; 717 i915_gem_object_flush_map(vma->obj); 718 i915_gem_object_unpin_map(vma->obj); 719 i915_vma_unpin(vma); 720 i915_vma_unlock(vma); 721 i915_vma_put(vma); 722 723 st_engine_heartbeat_enable(engine); 724 if (igt_flush_test(gt->i915)) 725 err = -EIO; 726 if (err) 727 break; 728 } 729 730 intel_gt_pm_wait_for_idle(gt); 731 rps->work.func = saved_work; 732 733 if (CPU_LATENCY >= 0) 734 cpu_latency_qos_remove_request(&qos); 735 736 return err; 737 } 738 739 int live_rps_frequency_srm(void *arg) 740 { 741 void (*saved_work)(struct work_struct *wrk); 742 struct intel_gt *gt = arg; 743 struct intel_rps *rps = >->rps; 744 struct intel_engine_cs *engine; 745 struct pm_qos_request qos; 746 enum intel_engine_id id; 747 int err = 0; 748 749 /* 750 * The premise is that the GPU does change frequency at our behest. 751 * Let's check there is a correspondence between the requested 752 * frequency, the actual frequency, and the observed clock rate. 753 */ 754 755 if (!intel_rps_is_enabled(rps)) 756 return 0; 757 758 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 759 return 0; 760 761 if (CPU_LATENCY >= 0) 762 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 763 764 intel_gt_pm_wait_for_idle(gt); 765 saved_work = rps->work.func; 766 rps->work.func = dummy_rps_work; 767 768 for_each_engine(engine, gt, id) { 769 struct i915_request *rq; 770 struct i915_vma *vma; 771 u32 *cancel, *cntr; 772 struct { 773 u64 count; 774 int freq; 775 } min, max; 776 777 st_engine_heartbeat_disable(engine); 778 779 vma = create_spin_counter(engine, 780 engine->kernel_context->vm, true, 781 &cancel, &cntr); 782 if (IS_ERR(vma)) { 783 err = PTR_ERR(vma); 784 st_engine_heartbeat_enable(engine); 785 break; 786 } 787 788 rq = intel_engine_create_kernel_request(engine); 789 if (IS_ERR(rq)) { 790 err = PTR_ERR(rq); 791 goto err_vma; 792 } 793 794 err = i915_request_await_object(rq, vma->obj, false); 795 if (!err) 796 err = i915_vma_move_to_active(vma, rq, 0); 797 if (!err) 798 err = rq->engine->emit_bb_start(rq, 799 vma->node.start, 800 PAGE_SIZE, 0); 801 i915_request_add(rq); 802 if (err) 803 goto err_vma; 804 805 if (wait_for(READ_ONCE(*cntr), 10)) { 806 pr_err("%s: timed loop did not start\n", 807 engine->name); 808 goto err_vma; 809 } 810 811 min.freq = rps->min_freq; 812 min.count = measure_frequency_at(rps, cntr, &min.freq); 813 814 max.freq = rps->max_freq; 815 max.count = measure_frequency_at(rps, cntr, &max.freq); 816 817 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 818 engine->name, 819 min.count, intel_gpu_freq(rps, min.freq), 820 max.count, intel_gpu_freq(rps, max.freq), 821 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 822 max.freq * min.count)); 823 824 if (!scaled_within(max.freq * min.count, 825 min.freq * max.count, 826 1, 2)) { 827 int f; 828 829 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 830 engine->name, 831 max.freq * min.count, 832 min.freq * max.count); 833 show_pcu_config(rps); 834 835 for (f = min.freq + 1; f <= rps->max_freq; f++) { 836 int act = f; 837 u64 count; 838 839 count = measure_frequency_at(rps, cntr, &act); 840 if (act < f) 841 break; 842 843 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 844 engine->name, 845 act, intel_gpu_freq(rps, act), count, 846 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 847 act * min.count)); 848 849 f = act; /* may skip ahead [pcu granularity] */ 850 } 851 852 err = -EINTR; /* ignore error, continue on with test */ 853 } 854 855 err_vma: 856 *cancel = MI_BATCH_BUFFER_END; 857 i915_gem_object_flush_map(vma->obj); 858 i915_gem_object_unpin_map(vma->obj); 859 i915_vma_unpin(vma); 860 i915_vma_unlock(vma); 861 i915_vma_put(vma); 862 863 st_engine_heartbeat_enable(engine); 864 if (igt_flush_test(gt->i915)) 865 err = -EIO; 866 if (err) 867 break; 868 } 869 870 intel_gt_pm_wait_for_idle(gt); 871 rps->work.func = saved_work; 872 873 if (CPU_LATENCY >= 0) 874 cpu_latency_qos_remove_request(&qos); 875 876 return err; 877 } 878 879 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 880 { 881 /* Flush any previous EI */ 882 usleep_range(timeout_us, 2 * timeout_us); 883 884 /* Reset the interrupt status */ 885 rps_disable_interrupts(rps); 886 GEM_BUG_ON(rps->pm_iir); 887 rps_enable_interrupts(rps); 888 889 /* And then wait for the timeout, for real this time */ 890 usleep_range(2 * timeout_us, 3 * timeout_us); 891 } 892 893 static int __rps_up_interrupt(struct intel_rps *rps, 894 struct intel_engine_cs *engine, 895 struct igt_spinner *spin) 896 { 897 struct intel_uncore *uncore = engine->uncore; 898 struct i915_request *rq; 899 u32 timeout; 900 901 if (!intel_engine_can_store_dword(engine)) 902 return 0; 903 904 rps_set_check(rps, rps->min_freq); 905 906 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 907 if (IS_ERR(rq)) 908 return PTR_ERR(rq); 909 910 i915_request_get(rq); 911 i915_request_add(rq); 912 913 if (!igt_wait_for_spinner(spin, rq)) { 914 pr_err("%s: RPS spinner did not start\n", 915 engine->name); 916 i915_request_put(rq); 917 intel_gt_set_wedged(engine->gt); 918 return -EIO; 919 } 920 921 if (!intel_rps_is_active(rps)) { 922 pr_err("%s: RPS not enabled on starting spinner\n", 923 engine->name); 924 igt_spinner_end(spin); 925 i915_request_put(rq); 926 return -EINVAL; 927 } 928 929 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 930 pr_err("%s: RPS did not register UP interrupt\n", 931 engine->name); 932 i915_request_put(rq); 933 return -EINVAL; 934 } 935 936 if (rps->last_freq != rps->min_freq) { 937 pr_err("%s: RPS did not program min frequency\n", 938 engine->name); 939 i915_request_put(rq); 940 return -EINVAL; 941 } 942 943 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 944 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 945 timeout = DIV_ROUND_UP(timeout, 1000); 946 947 sleep_for_ei(rps, timeout); 948 GEM_BUG_ON(i915_request_completed(rq)); 949 950 igt_spinner_end(spin); 951 i915_request_put(rq); 952 953 if (rps->cur_freq != rps->min_freq) { 954 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 955 engine->name, intel_rps_read_actual_frequency(rps)); 956 return -EINVAL; 957 } 958 959 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 960 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 961 engine->name, rps->pm_iir, 962 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 963 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 964 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 965 return -EINVAL; 966 } 967 968 return 0; 969 } 970 971 static int __rps_down_interrupt(struct intel_rps *rps, 972 struct intel_engine_cs *engine) 973 { 974 struct intel_uncore *uncore = engine->uncore; 975 u32 timeout; 976 977 rps_set_check(rps, rps->max_freq); 978 979 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 980 pr_err("%s: RPS did not register DOWN interrupt\n", 981 engine->name); 982 return -EINVAL; 983 } 984 985 if (rps->last_freq != rps->max_freq) { 986 pr_err("%s: RPS did not program max frequency\n", 987 engine->name); 988 return -EINVAL; 989 } 990 991 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 992 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 993 timeout = DIV_ROUND_UP(timeout, 1000); 994 995 sleep_for_ei(rps, timeout); 996 997 if (rps->cur_freq != rps->max_freq) { 998 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 999 engine->name, 1000 intel_rps_read_actual_frequency(rps)); 1001 return -EINVAL; 1002 } 1003 1004 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1005 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1006 engine->name, rps->pm_iir, 1007 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1008 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1009 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1010 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1011 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1012 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1013 return -EINVAL; 1014 } 1015 1016 return 0; 1017 } 1018 1019 int live_rps_interrupt(void *arg) 1020 { 1021 struct intel_gt *gt = arg; 1022 struct intel_rps *rps = >->rps; 1023 void (*saved_work)(struct work_struct *wrk); 1024 struct intel_engine_cs *engine; 1025 enum intel_engine_id id; 1026 struct igt_spinner spin; 1027 u32 pm_events; 1028 int err = 0; 1029 1030 /* 1031 * First, let's check whether or not we are receiving interrupts. 1032 */ 1033 1034 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1035 return 0; 1036 1037 intel_gt_pm_get(gt); 1038 pm_events = rps->pm_events; 1039 intel_gt_pm_put(gt); 1040 if (!pm_events) { 1041 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1042 return -ENODEV; 1043 } 1044 1045 if (igt_spinner_init(&spin, gt)) 1046 return -ENOMEM; 1047 1048 intel_gt_pm_wait_for_idle(gt); 1049 saved_work = rps->work.func; 1050 rps->work.func = dummy_rps_work; 1051 1052 for_each_engine(engine, gt, id) { 1053 /* Keep the engine busy with a spinner; expect an UP! */ 1054 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1055 intel_gt_pm_wait_for_idle(engine->gt); 1056 GEM_BUG_ON(intel_rps_is_active(rps)); 1057 1058 st_engine_heartbeat_disable(engine); 1059 1060 err = __rps_up_interrupt(rps, engine, &spin); 1061 1062 st_engine_heartbeat_enable(engine); 1063 if (err) 1064 goto out; 1065 1066 intel_gt_pm_wait_for_idle(engine->gt); 1067 } 1068 1069 /* Keep the engine awake but idle and check for DOWN */ 1070 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1071 st_engine_heartbeat_disable(engine); 1072 intel_rc6_disable(>->rc6); 1073 1074 err = __rps_down_interrupt(rps, engine); 1075 1076 intel_rc6_enable(>->rc6); 1077 st_engine_heartbeat_enable(engine); 1078 if (err) 1079 goto out; 1080 } 1081 } 1082 1083 out: 1084 if (igt_flush_test(gt->i915)) 1085 err = -EIO; 1086 1087 igt_spinner_fini(&spin); 1088 1089 intel_gt_pm_wait_for_idle(gt); 1090 rps->work.func = saved_work; 1091 1092 return err; 1093 } 1094 1095 static u64 __measure_power(int duration_ms) 1096 { 1097 u64 dE, dt; 1098 1099 dt = ktime_get(); 1100 dE = librapl_energy_uJ(); 1101 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1102 dE = librapl_energy_uJ() - dE; 1103 dt = ktime_get() - dt; 1104 1105 return div64_u64(1000 * 1000 * dE, dt); 1106 } 1107 1108 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1109 { 1110 u64 x[5]; 1111 int i; 1112 1113 *freq = rps_set_check(rps, *freq); 1114 for (i = 0; i < 5; i++) 1115 x[i] = __measure_power(5); 1116 *freq = (*freq + read_cagf(rps)) / 2; 1117 1118 /* A simple triangle filter for better result stability */ 1119 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1120 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1121 } 1122 1123 int live_rps_power(void *arg) 1124 { 1125 struct intel_gt *gt = arg; 1126 struct intel_rps *rps = >->rps; 1127 void (*saved_work)(struct work_struct *wrk); 1128 struct intel_engine_cs *engine; 1129 enum intel_engine_id id; 1130 struct igt_spinner spin; 1131 int err = 0; 1132 1133 /* 1134 * Our fundamental assumption is that running at lower frequency 1135 * actually saves power. Let's see if our RAPL measurement support 1136 * that theory. 1137 */ 1138 1139 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1140 return 0; 1141 1142 if (!librapl_supported(gt->i915)) 1143 return 0; 1144 1145 if (igt_spinner_init(&spin, gt)) 1146 return -ENOMEM; 1147 1148 intel_gt_pm_wait_for_idle(gt); 1149 saved_work = rps->work.func; 1150 rps->work.func = dummy_rps_work; 1151 1152 for_each_engine(engine, gt, id) { 1153 struct i915_request *rq; 1154 struct { 1155 u64 power; 1156 int freq; 1157 } min, max; 1158 1159 if (!intel_engine_can_store_dword(engine)) 1160 continue; 1161 1162 st_engine_heartbeat_disable(engine); 1163 1164 rq = igt_spinner_create_request(&spin, 1165 engine->kernel_context, 1166 MI_NOOP); 1167 if (IS_ERR(rq)) { 1168 st_engine_heartbeat_enable(engine); 1169 err = PTR_ERR(rq); 1170 break; 1171 } 1172 1173 i915_request_add(rq); 1174 1175 if (!igt_wait_for_spinner(&spin, rq)) { 1176 pr_err("%s: RPS spinner did not start\n", 1177 engine->name); 1178 igt_spinner_end(&spin); 1179 st_engine_heartbeat_enable(engine); 1180 intel_gt_set_wedged(engine->gt); 1181 err = -EIO; 1182 break; 1183 } 1184 1185 max.freq = rps->max_freq; 1186 max.power = measure_power_at(rps, &max.freq); 1187 1188 min.freq = rps->min_freq; 1189 min.power = measure_power_at(rps, &min.freq); 1190 1191 igt_spinner_end(&spin); 1192 st_engine_heartbeat_enable(engine); 1193 1194 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1195 engine->name, 1196 min.power, intel_gpu_freq(rps, min.freq), 1197 max.power, intel_gpu_freq(rps, max.freq)); 1198 1199 if (10 * min.freq >= 9 * max.freq) { 1200 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1201 min.freq, intel_gpu_freq(rps, min.freq), 1202 max.freq, intel_gpu_freq(rps, max.freq)); 1203 continue; 1204 } 1205 1206 if (11 * min.power > 10 * max.power) { 1207 pr_err("%s: did not conserve power when setting lower frequency!\n", 1208 engine->name); 1209 err = -EINVAL; 1210 break; 1211 } 1212 1213 if (igt_flush_test(gt->i915)) { 1214 err = -EIO; 1215 break; 1216 } 1217 } 1218 1219 igt_spinner_fini(&spin); 1220 1221 intel_gt_pm_wait_for_idle(gt); 1222 rps->work.func = saved_work; 1223 1224 return err; 1225 } 1226 1227 int live_rps_dynamic(void *arg) 1228 { 1229 struct intel_gt *gt = arg; 1230 struct intel_rps *rps = >->rps; 1231 struct intel_engine_cs *engine; 1232 enum intel_engine_id id; 1233 struct igt_spinner spin; 1234 int err = 0; 1235 1236 /* 1237 * We've looked at the bascs, and have established that we 1238 * can change the clock frequency and that the HW will generate 1239 * interrupts based on load. Now we check how we integrate those 1240 * moving parts into dynamic reclocking based on load. 1241 */ 1242 1243 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1244 return 0; 1245 1246 if (igt_spinner_init(&spin, gt)) 1247 return -ENOMEM; 1248 1249 if (intel_rps_has_interrupts(rps)) 1250 pr_info("RPS has interrupt support\n"); 1251 if (intel_rps_uses_timer(rps)) 1252 pr_info("RPS has timer support\n"); 1253 1254 for_each_engine(engine, gt, id) { 1255 struct i915_request *rq; 1256 struct { 1257 ktime_t dt; 1258 u8 freq; 1259 } min, max; 1260 1261 if (!intel_engine_can_store_dword(engine)) 1262 continue; 1263 1264 intel_gt_pm_wait_for_idle(gt); 1265 GEM_BUG_ON(intel_rps_is_active(rps)); 1266 rps->cur_freq = rps->min_freq; 1267 1268 intel_engine_pm_get(engine); 1269 intel_rc6_disable(>->rc6); 1270 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1271 1272 rq = igt_spinner_create_request(&spin, 1273 engine->kernel_context, 1274 MI_NOOP); 1275 if (IS_ERR(rq)) { 1276 err = PTR_ERR(rq); 1277 goto err; 1278 } 1279 1280 i915_request_add(rq); 1281 1282 max.dt = ktime_get(); 1283 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1284 max.dt = ktime_sub(ktime_get(), max.dt); 1285 1286 igt_spinner_end(&spin); 1287 1288 min.dt = ktime_get(); 1289 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1290 min.dt = ktime_sub(ktime_get(), min.dt); 1291 1292 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1293 engine->name, 1294 max.freq, intel_gpu_freq(rps, max.freq), 1295 ktime_to_ns(max.dt), 1296 min.freq, intel_gpu_freq(rps, min.freq), 1297 ktime_to_ns(min.dt)); 1298 if (min.freq >= max.freq) { 1299 pr_err("%s: dynamic reclocking of spinner failed\n!", 1300 engine->name); 1301 err = -EINVAL; 1302 } 1303 1304 err: 1305 intel_rc6_enable(>->rc6); 1306 intel_engine_pm_put(engine); 1307 1308 if (igt_flush_test(gt->i915)) 1309 err = -EIO; 1310 if (err) 1311 break; 1312 } 1313 1314 igt_spinner_fini(&spin); 1315 1316 return err; 1317 } 1318