1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/i915_drm.h> 9 10 #include "i915_drv.h" 11 #include "i915_irq.h" 12 #include "intel_breadcrumbs.h" 13 #include "intel_gt.h" 14 #include "intel_gt_clock_utils.h" 15 #include "intel_gt_irq.h" 16 #include "intel_gt_pm_irq.h" 17 #include "intel_gt_regs.h" 18 #include "intel_mchbar_regs.h" 19 #include "intel_pcode.h" 20 #include "intel_rps.h" 21 #include "vlv_sideband.h" 22 #include "../../../platform/x86/intel_ips.h" 23 24 #define BUSY_MAX_EI 20u /* ms */ 25 26 /* 27 * Lock protecting IPS related data structures 28 */ 29 static DEFINE_SPINLOCK(mchdev_lock); 30 31 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 32 { 33 return container_of(rps, struct intel_gt, rps); 34 } 35 36 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 37 { 38 return rps_to_gt(rps)->i915; 39 } 40 41 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 42 { 43 return rps_to_gt(rps)->uncore; 44 } 45 46 static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) 47 { 48 struct intel_gt *gt = rps_to_gt(rps); 49 50 return >->uc.guc.slpc; 51 } 52 53 static bool rps_uses_slpc(struct intel_rps *rps) 54 { 55 struct intel_gt *gt = rps_to_gt(rps); 56 57 return intel_uc_uses_guc_slpc(>->uc); 58 } 59 60 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 61 { 62 return mask & ~rps->pm_intrmsk_mbz; 63 } 64 65 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) 66 { 67 intel_uncore_write_fw(uncore, reg, val); 68 } 69 70 static void rps_timer(struct timer_list *t) 71 { 72 struct intel_rps *rps = from_timer(rps, t, timer); 73 struct intel_engine_cs *engine; 74 ktime_t dt, last, timestamp; 75 enum intel_engine_id id; 76 s64 max_busy[3] = {}; 77 78 timestamp = 0; 79 for_each_engine(engine, rps_to_gt(rps), id) { 80 s64 busy; 81 int i; 82 83 dt = intel_engine_get_busy_time(engine, ×tamp); 84 last = engine->stats.rps; 85 engine->stats.rps = dt; 86 87 busy = ktime_to_ns(ktime_sub(dt, last)); 88 for (i = 0; i < ARRAY_SIZE(max_busy); i++) { 89 if (busy > max_busy[i]) 90 swap(busy, max_busy[i]); 91 } 92 } 93 last = rps->pm_timestamp; 94 rps->pm_timestamp = timestamp; 95 96 if (intel_rps_is_active(rps)) { 97 s64 busy; 98 int i; 99 100 dt = ktime_sub(timestamp, last); 101 102 /* 103 * Our goal is to evaluate each engine independently, so we run 104 * at the lowest clocks required to sustain the heaviest 105 * workload. However, a task may be split into sequential 106 * dependent operations across a set of engines, such that 107 * the independent contributions do not account for high load, 108 * but overall the task is GPU bound. For example, consider 109 * video decode on vcs followed by colour post-processing 110 * on vecs, followed by general post-processing on rcs. 111 * Since multi-engines being active does imply a single 112 * continuous workload across all engines, we hedge our 113 * bets by only contributing a factor of the distributed 114 * load into our busyness calculation. 115 */ 116 busy = max_busy[0]; 117 for (i = 1; i < ARRAY_SIZE(max_busy); i++) { 118 if (!max_busy[i]) 119 break; 120 121 busy += div_u64(max_busy[i], 1 << i); 122 } 123 GT_TRACE(rps_to_gt(rps), 124 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", 125 busy, (int)div64_u64(100 * busy, dt), 126 max_busy[0], max_busy[1], max_busy[2], 127 rps->pm_interval); 128 129 if (100 * busy > rps->power.up_threshold * dt && 130 rps->cur_freq < rps->max_freq_softlimit) { 131 rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; 132 rps->pm_interval = 1; 133 schedule_work(&rps->work); 134 } else if (100 * busy < rps->power.down_threshold * dt && 135 rps->cur_freq > rps->min_freq_softlimit) { 136 rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; 137 rps->pm_interval = 1; 138 schedule_work(&rps->work); 139 } else { 140 rps->last_adj = 0; 141 } 142 143 mod_timer(&rps->timer, 144 jiffies + msecs_to_jiffies(rps->pm_interval)); 145 rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); 146 } 147 } 148 149 static void rps_start_timer(struct intel_rps *rps) 150 { 151 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 152 rps->pm_interval = 1; 153 mod_timer(&rps->timer, jiffies + 1); 154 } 155 156 static void rps_stop_timer(struct intel_rps *rps) 157 { 158 del_timer_sync(&rps->timer); 159 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 160 cancel_work_sync(&rps->work); 161 } 162 163 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 164 { 165 u32 mask = 0; 166 167 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 168 if (val > rps->min_freq_softlimit) 169 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 170 GEN6_PM_RP_DOWN_THRESHOLD | 171 GEN6_PM_RP_DOWN_TIMEOUT); 172 173 if (val < rps->max_freq_softlimit) 174 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 175 176 mask &= rps->pm_events; 177 178 return rps_pm_sanitize_mask(rps, ~mask); 179 } 180 181 static void rps_reset_ei(struct intel_rps *rps) 182 { 183 memset(&rps->ei, 0, sizeof(rps->ei)); 184 } 185 186 static void rps_enable_interrupts(struct intel_rps *rps) 187 { 188 struct intel_gt *gt = rps_to_gt(rps); 189 190 GEM_BUG_ON(rps_uses_slpc(rps)); 191 192 GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", 193 rps->pm_events, rps_pm_mask(rps, rps->last_freq)); 194 195 rps_reset_ei(rps); 196 197 spin_lock_irq(gt->irq_lock); 198 gen6_gt_pm_enable_irq(gt, rps->pm_events); 199 spin_unlock_irq(gt->irq_lock); 200 201 intel_uncore_write(gt->uncore, 202 GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); 203 } 204 205 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 206 { 207 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 208 } 209 210 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 211 { 212 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 213 ; 214 } 215 216 static void rps_reset_interrupts(struct intel_rps *rps) 217 { 218 struct intel_gt *gt = rps_to_gt(rps); 219 220 spin_lock_irq(gt->irq_lock); 221 if (GRAPHICS_VER(gt->i915) >= 11) 222 gen11_rps_reset_interrupts(rps); 223 else 224 gen6_rps_reset_interrupts(rps); 225 226 rps->pm_iir = 0; 227 spin_unlock_irq(gt->irq_lock); 228 } 229 230 static void rps_disable_interrupts(struct intel_rps *rps) 231 { 232 struct intel_gt *gt = rps_to_gt(rps); 233 234 intel_uncore_write(gt->uncore, 235 GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); 236 237 spin_lock_irq(gt->irq_lock); 238 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 239 spin_unlock_irq(gt->irq_lock); 240 241 intel_synchronize_irq(gt->i915); 242 243 /* 244 * Now that we will not be generating any more work, flush any 245 * outstanding tasks. As we are called on the RPS idle path, 246 * we will reset the GPU to minimum frequencies, so the current 247 * state of the worker can be discarded. 248 */ 249 cancel_work_sync(&rps->work); 250 251 rps_reset_interrupts(rps); 252 GT_TRACE(gt, "interrupts:off\n"); 253 } 254 255 static const struct cparams { 256 u16 i; 257 u16 t; 258 u16 m; 259 u16 c; 260 } cparams[] = { 261 { 1, 1333, 301, 28664 }, 262 { 1, 1066, 294, 24460 }, 263 { 1, 800, 294, 25192 }, 264 { 0, 1333, 276, 27605 }, 265 { 0, 1066, 276, 27605 }, 266 { 0, 800, 231, 23784 }, 267 }; 268 269 static void gen5_rps_init(struct intel_rps *rps) 270 { 271 struct drm_i915_private *i915 = rps_to_i915(rps); 272 struct intel_uncore *uncore = rps_to_uncore(rps); 273 u8 fmax, fmin, fstart; 274 u32 rgvmodectl; 275 int c_m, i; 276 277 if (i915->fsb_freq <= 3200) 278 c_m = 0; 279 else if (i915->fsb_freq <= 4800) 280 c_m = 1; 281 else 282 c_m = 2; 283 284 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 285 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { 286 rps->ips.m = cparams[i].m; 287 rps->ips.c = cparams[i].c; 288 break; 289 } 290 } 291 292 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 293 294 /* Set up min, max, and cur for interrupt handling */ 295 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 296 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 297 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 298 MEMMODE_FSTART_SHIFT; 299 drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", 300 fmax, fmin, fstart); 301 302 rps->min_freq = fmax; 303 rps->efficient_freq = fstart; 304 rps->max_freq = fmin; 305 } 306 307 static unsigned long 308 __ips_chipset_val(struct intel_ips *ips) 309 { 310 struct intel_uncore *uncore = 311 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 312 unsigned long now = jiffies_to_msecs(jiffies), dt; 313 unsigned long result; 314 u64 total, delta; 315 316 lockdep_assert_held(&mchdev_lock); 317 318 /* 319 * Prevent division-by-zero if we are asking too fast. 320 * Also, we don't get interesting results if we are polling 321 * faster than once in 10ms, so just return the saved value 322 * in such cases. 323 */ 324 dt = now - ips->last_time1; 325 if (dt <= 10) 326 return ips->chipset_power; 327 328 /* FIXME: handle per-counter overflow */ 329 total = intel_uncore_read(uncore, DMIEC); 330 total += intel_uncore_read(uncore, DDREC); 331 total += intel_uncore_read(uncore, CSIEC); 332 333 delta = total - ips->last_count1; 334 335 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 336 337 ips->last_count1 = total; 338 ips->last_time1 = now; 339 340 ips->chipset_power = result; 341 342 return result; 343 } 344 345 static unsigned long ips_mch_val(struct intel_uncore *uncore) 346 { 347 unsigned int m, x, b; 348 u32 tsfs; 349 350 tsfs = intel_uncore_read(uncore, TSFS); 351 x = intel_uncore_read8(uncore, TR1); 352 353 b = tsfs & TSFS_INTR_MASK; 354 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 355 356 return m * x / 127 - b; 357 } 358 359 static int _pxvid_to_vd(u8 pxvid) 360 { 361 if (pxvid == 0) 362 return 0; 363 364 if (pxvid >= 8 && pxvid < 31) 365 pxvid = 31; 366 367 return (pxvid + 2) * 125; 368 } 369 370 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 371 { 372 const int vd = _pxvid_to_vd(pxvid); 373 374 if (INTEL_INFO(i915)->is_mobile) 375 return max(vd - 1125, 0); 376 377 return vd; 378 } 379 380 static void __gen5_ips_update(struct intel_ips *ips) 381 { 382 struct intel_uncore *uncore = 383 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 384 u64 now, delta, dt; 385 u32 count; 386 387 lockdep_assert_held(&mchdev_lock); 388 389 now = ktime_get_raw_ns(); 390 dt = now - ips->last_time2; 391 do_div(dt, NSEC_PER_MSEC); 392 393 /* Don't divide by 0 */ 394 if (dt <= 10) 395 return; 396 397 count = intel_uncore_read(uncore, GFXEC); 398 delta = count - ips->last_count2; 399 400 ips->last_count2 = count; 401 ips->last_time2 = now; 402 403 /* More magic constants... */ 404 ips->gfx_power = div_u64(delta * 1181, dt * 10); 405 } 406 407 static void gen5_rps_update(struct intel_rps *rps) 408 { 409 spin_lock_irq(&mchdev_lock); 410 __gen5_ips_update(&rps->ips); 411 spin_unlock_irq(&mchdev_lock); 412 } 413 414 static unsigned int gen5_invert_freq(struct intel_rps *rps, 415 unsigned int val) 416 { 417 /* Invert the frequency bin into an ips delay */ 418 val = rps->max_freq - val; 419 val = rps->min_freq + val; 420 421 return val; 422 } 423 424 static int __gen5_rps_set(struct intel_rps *rps, u8 val) 425 { 426 struct intel_uncore *uncore = rps_to_uncore(rps); 427 u16 rgvswctl; 428 429 lockdep_assert_held(&mchdev_lock); 430 431 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 432 if (rgvswctl & MEMCTL_CMD_STS) { 433 DRM_DEBUG("gpu busy, RCS change rejected\n"); 434 return -EBUSY; /* still busy with another command */ 435 } 436 437 /* Invert the frequency bin into an ips delay */ 438 val = gen5_invert_freq(rps, val); 439 440 rgvswctl = 441 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 442 (val << MEMCTL_FREQ_SHIFT) | 443 MEMCTL_SFCAVM; 444 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 445 intel_uncore_posting_read16(uncore, MEMSWCTL); 446 447 rgvswctl |= MEMCTL_CMD_STS; 448 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 449 450 return 0; 451 } 452 453 static int gen5_rps_set(struct intel_rps *rps, u8 val) 454 { 455 int err; 456 457 spin_lock_irq(&mchdev_lock); 458 err = __gen5_rps_set(rps, val); 459 spin_unlock_irq(&mchdev_lock); 460 461 return err; 462 } 463 464 static unsigned long intel_pxfreq(u32 vidfreq) 465 { 466 int div = (vidfreq & 0x3f0000) >> 16; 467 int post = (vidfreq & 0x3000) >> 12; 468 int pre = (vidfreq & 0x7); 469 470 if (!pre) 471 return 0; 472 473 return div * 133333 / (pre << post); 474 } 475 476 static unsigned int init_emon(struct intel_uncore *uncore) 477 { 478 u8 pxw[16]; 479 int i; 480 481 /* Disable to program */ 482 intel_uncore_write(uncore, ECR, 0); 483 intel_uncore_posting_read(uncore, ECR); 484 485 /* Program energy weights for various events */ 486 intel_uncore_write(uncore, SDEW, 0x15040d00); 487 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 488 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 489 intel_uncore_write(uncore, CSIEW2, 0x04000004); 490 491 for (i = 0; i < 5; i++) 492 intel_uncore_write(uncore, PEW(i), 0); 493 for (i = 0; i < 3; i++) 494 intel_uncore_write(uncore, DEW(i), 0); 495 496 /* Program P-state weights to account for frequency power adjustment */ 497 for (i = 0; i < 16; i++) { 498 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 499 unsigned int freq = intel_pxfreq(pxvidfreq); 500 unsigned int vid = 501 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 502 unsigned int val; 503 504 val = vid * vid * freq / 1000 * 255; 505 val /= 127 * 127 * 900; 506 507 pxw[i] = val; 508 } 509 /* Render standby states get 0 weight */ 510 pxw[14] = 0; 511 pxw[15] = 0; 512 513 for (i = 0; i < 4; i++) { 514 intel_uncore_write(uncore, PXW(i), 515 pxw[i * 4 + 0] << 24 | 516 pxw[i * 4 + 1] << 16 | 517 pxw[i * 4 + 2] << 8 | 518 pxw[i * 4 + 3] << 0); 519 } 520 521 /* Adjust magic regs to magic values (more experimental results) */ 522 intel_uncore_write(uncore, OGW0, 0); 523 intel_uncore_write(uncore, OGW1, 0); 524 intel_uncore_write(uncore, EG0, 0x00007f00); 525 intel_uncore_write(uncore, EG1, 0x0000000e); 526 intel_uncore_write(uncore, EG2, 0x000e0000); 527 intel_uncore_write(uncore, EG3, 0x68000300); 528 intel_uncore_write(uncore, EG4, 0x42000000); 529 intel_uncore_write(uncore, EG5, 0x00140031); 530 intel_uncore_write(uncore, EG6, 0); 531 intel_uncore_write(uncore, EG7, 0); 532 533 for (i = 0; i < 8; i++) 534 intel_uncore_write(uncore, PXWL(i), 0); 535 536 /* Enable PMON + select events */ 537 intel_uncore_write(uncore, ECR, 0x80000019); 538 539 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 540 } 541 542 static bool gen5_rps_enable(struct intel_rps *rps) 543 { 544 struct drm_i915_private *i915 = rps_to_i915(rps); 545 struct intel_uncore *uncore = rps_to_uncore(rps); 546 u8 fstart, vstart; 547 u32 rgvmodectl; 548 549 spin_lock_irq(&mchdev_lock); 550 551 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 552 553 /* Enable temp reporting */ 554 intel_uncore_write16(uncore, PMMISC, 555 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 556 intel_uncore_write16(uncore, TSC1, 557 intel_uncore_read16(uncore, TSC1) | TSE); 558 559 /* 100ms RC evaluation intervals */ 560 intel_uncore_write(uncore, RCUPEI, 100000); 561 intel_uncore_write(uncore, RCDNEI, 100000); 562 563 /* Set max/min thresholds to 90ms and 80ms respectively */ 564 intel_uncore_write(uncore, RCBMAXAVG, 90000); 565 intel_uncore_write(uncore, RCBMINAVG, 80000); 566 567 intel_uncore_write(uncore, MEMIHYST, 1); 568 569 /* Set up min, max, and cur for interrupt handling */ 570 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 571 MEMMODE_FSTART_SHIFT; 572 573 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 574 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 575 576 intel_uncore_write(uncore, 577 MEMINTREN, 578 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 579 580 intel_uncore_write(uncore, VIDSTART, vstart); 581 intel_uncore_posting_read(uncore, VIDSTART); 582 583 rgvmodectl |= MEMMODE_SWMODE_EN; 584 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 585 586 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 587 MEMCTL_CMD_STS) == 0, 10)) 588 drm_err(&uncore->i915->drm, 589 "stuck trying to change perf mode\n"); 590 mdelay(1); 591 592 __gen5_rps_set(rps, rps->cur_freq); 593 594 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 595 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 596 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 597 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 598 599 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 600 rps->ips.last_time2 = ktime_get_raw_ns(); 601 602 spin_lock(&i915->irq_lock); 603 ilk_enable_display_irq(i915, DE_PCU_EVENT); 604 spin_unlock(&i915->irq_lock); 605 606 spin_unlock_irq(&mchdev_lock); 607 608 rps->ips.corr = init_emon(uncore); 609 610 return true; 611 } 612 613 static void gen5_rps_disable(struct intel_rps *rps) 614 { 615 struct drm_i915_private *i915 = rps_to_i915(rps); 616 struct intel_uncore *uncore = rps_to_uncore(rps); 617 u16 rgvswctl; 618 619 spin_lock_irq(&mchdev_lock); 620 621 spin_lock(&i915->irq_lock); 622 ilk_disable_display_irq(i915, DE_PCU_EVENT); 623 spin_unlock(&i915->irq_lock); 624 625 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 626 627 /* Ack interrupts, disable EFC interrupt */ 628 intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); 629 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 630 631 /* Go back to the starting frequency */ 632 __gen5_rps_set(rps, rps->idle_freq); 633 mdelay(1); 634 rgvswctl |= MEMCTL_CMD_STS; 635 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 636 mdelay(1); 637 638 spin_unlock_irq(&mchdev_lock); 639 } 640 641 static u32 rps_limits(struct intel_rps *rps, u8 val) 642 { 643 u32 limits; 644 645 /* 646 * Only set the down limit when we've reached the lowest level to avoid 647 * getting more interrupts, otherwise leave this clear. This prevents a 648 * race in the hw when coming out of rc6: There's a tiny window where 649 * the hw runs at the minimal clock before selecting the desired 650 * frequency, if the down threshold expires in that window we will not 651 * receive a down interrupt. 652 */ 653 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 654 limits = rps->max_freq_softlimit << 23; 655 if (val <= rps->min_freq_softlimit) 656 limits |= rps->min_freq_softlimit << 14; 657 } else { 658 limits = rps->max_freq_softlimit << 24; 659 if (val <= rps->min_freq_softlimit) 660 limits |= rps->min_freq_softlimit << 16; 661 } 662 663 return limits; 664 } 665 666 static void rps_set_power(struct intel_rps *rps, int new_power) 667 { 668 struct intel_gt *gt = rps_to_gt(rps); 669 struct intel_uncore *uncore = gt->uncore; 670 u32 threshold_up = 0, threshold_down = 0; /* in % */ 671 u32 ei_up = 0, ei_down = 0; 672 673 lockdep_assert_held(&rps->power.mutex); 674 675 if (new_power == rps->power.mode) 676 return; 677 678 threshold_up = 95; 679 threshold_down = 85; 680 681 /* Note the units here are not exactly 1us, but 1280ns. */ 682 switch (new_power) { 683 case LOW_POWER: 684 ei_up = 16000; 685 ei_down = 32000; 686 break; 687 688 case BETWEEN: 689 ei_up = 13000; 690 ei_down = 32000; 691 break; 692 693 case HIGH_POWER: 694 ei_up = 10000; 695 ei_down = 32000; 696 break; 697 } 698 699 /* When byt can survive without system hang with dynamic 700 * sw freq adjustments, this restriction can be lifted. 701 */ 702 if (IS_VALLEYVIEW(gt->i915)) 703 goto skip_hw_write; 704 705 GT_TRACE(gt, 706 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", 707 new_power, threshold_up, ei_up, threshold_down, ei_down); 708 709 set(uncore, GEN6_RP_UP_EI, 710 intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); 711 set(uncore, GEN6_RP_UP_THRESHOLD, 712 intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); 713 714 set(uncore, GEN6_RP_DOWN_EI, 715 intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); 716 set(uncore, GEN6_RP_DOWN_THRESHOLD, 717 intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); 718 719 set(uncore, GEN6_RP_CONTROL, 720 (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 721 GEN6_RP_MEDIA_HW_NORMAL_MODE | 722 GEN6_RP_MEDIA_IS_GFX | 723 GEN6_RP_ENABLE | 724 GEN6_RP_UP_BUSY_AVG | 725 GEN6_RP_DOWN_IDLE_AVG); 726 727 skip_hw_write: 728 rps->power.mode = new_power; 729 rps->power.up_threshold = threshold_up; 730 rps->power.down_threshold = threshold_down; 731 } 732 733 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 734 { 735 int new_power; 736 737 new_power = rps->power.mode; 738 switch (rps->power.mode) { 739 case LOW_POWER: 740 if (val > rps->efficient_freq + 1 && 741 val > rps->cur_freq) 742 new_power = BETWEEN; 743 break; 744 745 case BETWEEN: 746 if (val <= rps->efficient_freq && 747 val < rps->cur_freq) 748 new_power = LOW_POWER; 749 else if (val >= rps->rp0_freq && 750 val > rps->cur_freq) 751 new_power = HIGH_POWER; 752 break; 753 754 case HIGH_POWER: 755 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 756 val < rps->cur_freq) 757 new_power = BETWEEN; 758 break; 759 } 760 /* Max/min bins are special */ 761 if (val <= rps->min_freq_softlimit) 762 new_power = LOW_POWER; 763 if (val >= rps->max_freq_softlimit) 764 new_power = HIGH_POWER; 765 766 mutex_lock(&rps->power.mutex); 767 if (rps->power.interactive) 768 new_power = HIGH_POWER; 769 rps_set_power(rps, new_power); 770 mutex_unlock(&rps->power.mutex); 771 } 772 773 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 774 { 775 GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", 776 str_yes_no(interactive)); 777 778 mutex_lock(&rps->power.mutex); 779 if (interactive) { 780 if (!rps->power.interactive++ && intel_rps_is_active(rps)) 781 rps_set_power(rps, HIGH_POWER); 782 } else { 783 GEM_BUG_ON(!rps->power.interactive); 784 rps->power.interactive--; 785 } 786 mutex_unlock(&rps->power.mutex); 787 } 788 789 static int gen6_rps_set(struct intel_rps *rps, u8 val) 790 { 791 struct intel_uncore *uncore = rps_to_uncore(rps); 792 struct drm_i915_private *i915 = rps_to_i915(rps); 793 u32 swreq; 794 795 GEM_BUG_ON(rps_uses_slpc(rps)); 796 797 if (GRAPHICS_VER(i915) >= 9) 798 swreq = GEN9_FREQUENCY(val); 799 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 800 swreq = HSW_FREQUENCY(val); 801 else 802 swreq = (GEN6_FREQUENCY(val) | 803 GEN6_OFFSET(0) | 804 GEN6_AGGRESSIVE_TURBO); 805 set(uncore, GEN6_RPNSWREQ, swreq); 806 807 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", 808 val, intel_gpu_freq(rps, val), swreq); 809 810 return 0; 811 } 812 813 static int vlv_rps_set(struct intel_rps *rps, u8 val) 814 { 815 struct drm_i915_private *i915 = rps_to_i915(rps); 816 int err; 817 818 vlv_punit_get(i915); 819 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 820 vlv_punit_put(i915); 821 822 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", 823 val, intel_gpu_freq(rps, val)); 824 825 return err; 826 } 827 828 static int rps_set(struct intel_rps *rps, u8 val, bool update) 829 { 830 struct drm_i915_private *i915 = rps_to_i915(rps); 831 int err; 832 833 if (val == rps->last_freq) 834 return 0; 835 836 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 837 err = vlv_rps_set(rps, val); 838 else if (GRAPHICS_VER(i915) >= 6) 839 err = gen6_rps_set(rps, val); 840 else 841 err = gen5_rps_set(rps, val); 842 if (err) 843 return err; 844 845 if (update && GRAPHICS_VER(i915) >= 6) 846 gen6_rps_set_thresholds(rps, val); 847 rps->last_freq = val; 848 849 return 0; 850 } 851 852 void intel_rps_unpark(struct intel_rps *rps) 853 { 854 if (!intel_rps_is_enabled(rps)) 855 return; 856 857 GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); 858 859 /* 860 * Use the user's desired frequency as a guide, but for better 861 * performance, jump directly to RPe as our starting frequency. 862 */ 863 mutex_lock(&rps->lock); 864 865 intel_rps_set_active(rps); 866 intel_rps_set(rps, 867 clamp(rps->cur_freq, 868 rps->min_freq_softlimit, 869 rps->max_freq_softlimit)); 870 871 mutex_unlock(&rps->lock); 872 873 rps->pm_iir = 0; 874 if (intel_rps_has_interrupts(rps)) 875 rps_enable_interrupts(rps); 876 if (intel_rps_uses_timer(rps)) 877 rps_start_timer(rps); 878 879 if (GRAPHICS_VER(rps_to_i915(rps)) == 5) 880 gen5_rps_update(rps); 881 } 882 883 void intel_rps_park(struct intel_rps *rps) 884 { 885 int adj; 886 887 if (!intel_rps_is_enabled(rps)) 888 return; 889 890 if (!intel_rps_clear_active(rps)) 891 return; 892 893 if (intel_rps_uses_timer(rps)) 894 rps_stop_timer(rps); 895 if (intel_rps_has_interrupts(rps)) 896 rps_disable_interrupts(rps); 897 898 if (rps->last_freq <= rps->idle_freq) 899 return; 900 901 /* 902 * The punit delays the write of the frequency and voltage until it 903 * determines the GPU is awake. During normal usage we don't want to 904 * waste power changing the frequency if the GPU is sleeping (rc6). 905 * However, the GPU and driver is now idle and we do not want to delay 906 * switching to minimum voltage (reducing power whilst idle) as we do 907 * not expect to be woken in the near future and so must flush the 908 * change by waking the device. 909 * 910 * We choose to take the media powerwell (either would do to trick the 911 * punit into committing the voltage change) as that takes a lot less 912 * power than the render powerwell. 913 */ 914 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 915 rps_set(rps, rps->idle_freq, false); 916 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 917 918 /* 919 * Since we will try and restart from the previously requested 920 * frequency on unparking, treat this idle point as a downclock 921 * interrupt and reduce the frequency for resume. If we park/unpark 922 * more frequently than the rps worker can run, we will not respond 923 * to any EI and never see a change in frequency. 924 * 925 * (Note we accommodate Cherryview's limitation of only using an 926 * even bin by applying it to all.) 927 */ 928 adj = rps->last_adj; 929 if (adj < 0) 930 adj *= 2; 931 else /* CHV needs even encode values */ 932 adj = -2; 933 rps->last_adj = adj; 934 rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); 935 if (rps->cur_freq < rps->efficient_freq) { 936 rps->cur_freq = rps->efficient_freq; 937 rps->last_adj = 0; 938 } 939 940 GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); 941 } 942 943 u32 intel_rps_get_boost_frequency(struct intel_rps *rps) 944 { 945 struct intel_guc_slpc *slpc; 946 947 if (rps_uses_slpc(rps)) { 948 slpc = rps_to_slpc(rps); 949 950 return slpc->boost_freq; 951 } else { 952 return intel_gpu_freq(rps, rps->boost_freq); 953 } 954 } 955 956 static int rps_set_boost_freq(struct intel_rps *rps, u32 val) 957 { 958 bool boost = false; 959 960 /* Validate against (static) hardware limits */ 961 val = intel_freq_opcode(rps, val); 962 if (val < rps->min_freq || val > rps->max_freq) 963 return -EINVAL; 964 965 mutex_lock(&rps->lock); 966 if (val != rps->boost_freq) { 967 rps->boost_freq = val; 968 boost = atomic_read(&rps->num_waiters); 969 } 970 mutex_unlock(&rps->lock); 971 if (boost) 972 schedule_work(&rps->work); 973 974 return 0; 975 } 976 977 int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) 978 { 979 struct intel_guc_slpc *slpc; 980 981 if (rps_uses_slpc(rps)) { 982 slpc = rps_to_slpc(rps); 983 984 return intel_guc_slpc_set_boost_freq(slpc, freq); 985 } else { 986 return rps_set_boost_freq(rps, freq); 987 } 988 } 989 990 void intel_rps_dec_waiters(struct intel_rps *rps) 991 { 992 struct intel_guc_slpc *slpc; 993 994 if (rps_uses_slpc(rps)) { 995 slpc = rps_to_slpc(rps); 996 997 intel_guc_slpc_dec_waiters(slpc); 998 } else { 999 atomic_dec(&rps->num_waiters); 1000 } 1001 } 1002 1003 void intel_rps_boost(struct i915_request *rq) 1004 { 1005 struct intel_guc_slpc *slpc; 1006 1007 if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) 1008 return; 1009 1010 /* Serializes with i915_request_retire() */ 1011 if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { 1012 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; 1013 1014 if (rps_uses_slpc(rps)) { 1015 slpc = rps_to_slpc(rps); 1016 1017 if (slpc->min_freq_softlimit >= slpc->boost_freq) 1018 return; 1019 1020 /* Return if old value is non zero */ 1021 if (!atomic_fetch_inc(&slpc->num_waiters)) { 1022 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1023 rq->fence.context, rq->fence.seqno); 1024 schedule_work(&slpc->boost_work); 1025 } 1026 1027 return; 1028 } 1029 1030 if (atomic_fetch_inc(&rps->num_waiters)) 1031 return; 1032 1033 if (!intel_rps_is_active(rps)) 1034 return; 1035 1036 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1037 rq->fence.context, rq->fence.seqno); 1038 1039 if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 1040 schedule_work(&rps->work); 1041 1042 WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ 1043 } 1044 } 1045 1046 int intel_rps_set(struct intel_rps *rps, u8 val) 1047 { 1048 int err; 1049 1050 lockdep_assert_held(&rps->lock); 1051 GEM_BUG_ON(val > rps->max_freq); 1052 GEM_BUG_ON(val < rps->min_freq); 1053 1054 if (intel_rps_is_active(rps)) { 1055 err = rps_set(rps, val, true); 1056 if (err) 1057 return err; 1058 1059 /* 1060 * Make sure we continue to get interrupts 1061 * until we hit the minimum or maximum frequencies. 1062 */ 1063 if (intel_rps_has_interrupts(rps)) { 1064 struct intel_uncore *uncore = rps_to_uncore(rps); 1065 1066 set(uncore, 1067 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); 1068 1069 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); 1070 } 1071 } 1072 1073 rps->cur_freq = val; 1074 return 0; 1075 } 1076 1077 static u32 intel_rps_read_state_cap(struct intel_rps *rps) 1078 { 1079 struct drm_i915_private *i915 = rps_to_i915(rps); 1080 struct intel_uncore *uncore = rps_to_uncore(rps); 1081 1082 if (IS_PONTEVECCHIO(i915)) 1083 return intel_uncore_read(uncore, PVC_RP_STATE_CAP); 1084 else if (IS_XEHPSDV(i915)) 1085 return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); 1086 else if (IS_GEN9_LP(i915)) 1087 return intel_uncore_read(uncore, BXT_RP_STATE_CAP); 1088 else 1089 return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 1090 } 1091 1092 static void 1093 mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1094 { 1095 struct intel_uncore *uncore = rps_to_uncore(rps); 1096 u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? 1097 intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : 1098 intel_uncore_read(uncore, MTL_RP_STATE_CAP); 1099 u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? 1100 intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : 1101 intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); 1102 1103 /* MTL values are in units of 16.67 MHz */ 1104 caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); 1105 caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); 1106 caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); 1107 } 1108 1109 static void 1110 __gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1111 { 1112 struct drm_i915_private *i915 = rps_to_i915(rps); 1113 u32 rp_state_cap; 1114 1115 rp_state_cap = intel_rps_read_state_cap(rps); 1116 1117 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 1118 if (IS_GEN9_LP(i915)) { 1119 caps->rp0_freq = (rp_state_cap >> 16) & 0xff; 1120 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1121 caps->min_freq = (rp_state_cap >> 0) & 0xff; 1122 } else { 1123 caps->rp0_freq = (rp_state_cap >> 0) & 0xff; 1124 if (GRAPHICS_VER(i915) >= 10) 1125 caps->rp1_freq = REG_FIELD_GET(RPE_MASK, 1126 intel_uncore_read(to_gt(i915)->uncore, 1127 GEN10_FREQ_INFO_REC)); 1128 else 1129 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1130 caps->min_freq = (rp_state_cap >> 16) & 0xff; 1131 } 1132 1133 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1134 /* 1135 * In this case rp_state_cap register reports frequencies in 1136 * units of 50 MHz. Convert these to the actual "hw unit", i.e. 1137 * units of 16.67 MHz 1138 */ 1139 caps->rp0_freq *= GEN9_FREQ_SCALER; 1140 caps->rp1_freq *= GEN9_FREQ_SCALER; 1141 caps->min_freq *= GEN9_FREQ_SCALER; 1142 } 1143 } 1144 1145 /** 1146 * gen6_rps_get_freq_caps - Get freq caps exposed by HW 1147 * @rps: the intel_rps structure 1148 * @caps: returned freq caps 1149 * 1150 * Returned "caps" frequencies should be converted to MHz using 1151 * intel_gpu_freq() 1152 */ 1153 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1154 { 1155 struct drm_i915_private *i915 = rps_to_i915(rps); 1156 1157 if (IS_METEORLAKE(i915)) 1158 return mtl_get_freq_caps(rps, caps); 1159 else 1160 return __gen6_rps_get_freq_caps(rps, caps); 1161 } 1162 1163 static void gen6_rps_init(struct intel_rps *rps) 1164 { 1165 struct drm_i915_private *i915 = rps_to_i915(rps); 1166 struct intel_rps_freq_caps caps; 1167 1168 gen6_rps_get_freq_caps(rps, &caps); 1169 rps->rp0_freq = caps.rp0_freq; 1170 rps->rp1_freq = caps.rp1_freq; 1171 rps->min_freq = caps.min_freq; 1172 1173 /* hw_max = RP0 until we check for overclocking */ 1174 rps->max_freq = rps->rp0_freq; 1175 1176 rps->efficient_freq = rps->rp1_freq; 1177 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 1178 IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1179 u32 ddcc_status = 0; 1180 u32 mult = 1; 1181 1182 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) 1183 mult = GEN9_FREQ_SCALER; 1184 if (snb_pcode_read(rps_to_gt(rps)->uncore, 1185 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 1186 &ddcc_status, NULL) == 0) 1187 rps->efficient_freq = 1188 clamp_t(u32, 1189 ((ddcc_status >> 8) & 0xff) * mult, 1190 rps->min_freq, 1191 rps->max_freq); 1192 } 1193 } 1194 1195 static bool rps_reset(struct intel_rps *rps) 1196 { 1197 struct drm_i915_private *i915 = rps_to_i915(rps); 1198 1199 /* force a reset */ 1200 rps->power.mode = -1; 1201 rps->last_freq = -1; 1202 1203 if (rps_set(rps, rps->min_freq, true)) { 1204 drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); 1205 return false; 1206 } 1207 1208 rps->cur_freq = rps->min_freq; 1209 return true; 1210 } 1211 1212 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 1213 static bool gen9_rps_enable(struct intel_rps *rps) 1214 { 1215 struct intel_gt *gt = rps_to_gt(rps); 1216 struct intel_uncore *uncore = gt->uncore; 1217 1218 /* Program defaults and thresholds for RPS */ 1219 if (GRAPHICS_VER(gt->i915) == 9) 1220 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1221 GEN9_FREQUENCY(rps->rp1_freq)); 1222 1223 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 1224 1225 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1226 1227 return rps_reset(rps); 1228 } 1229 1230 static bool gen8_rps_enable(struct intel_rps *rps) 1231 { 1232 struct intel_uncore *uncore = rps_to_uncore(rps); 1233 1234 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1235 HSW_FREQUENCY(rps->rp1_freq)); 1236 1237 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1238 1239 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1240 1241 return rps_reset(rps); 1242 } 1243 1244 static bool gen6_rps_enable(struct intel_rps *rps) 1245 { 1246 struct intel_uncore *uncore = rps_to_uncore(rps); 1247 1248 /* Power down if completely idle for over 50ms */ 1249 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 1250 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1251 1252 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1253 GEN6_PM_RP_DOWN_THRESHOLD | 1254 GEN6_PM_RP_DOWN_TIMEOUT); 1255 1256 return rps_reset(rps); 1257 } 1258 1259 static int chv_rps_max_freq(struct intel_rps *rps) 1260 { 1261 struct drm_i915_private *i915 = rps_to_i915(rps); 1262 struct intel_gt *gt = rps_to_gt(rps); 1263 u32 val; 1264 1265 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1266 1267 switch (gt->info.sseu.eu_total) { 1268 case 8: 1269 /* (2 * 4) config */ 1270 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 1271 break; 1272 case 12: 1273 /* (2 * 6) config */ 1274 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 1275 break; 1276 case 16: 1277 /* (2 * 8) config */ 1278 default: 1279 /* Setting (2 * 8) Min RP0 for any other combination */ 1280 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 1281 break; 1282 } 1283 1284 return val & FB_GFX_FREQ_FUSE_MASK; 1285 } 1286 1287 static int chv_rps_rpe_freq(struct intel_rps *rps) 1288 { 1289 struct drm_i915_private *i915 = rps_to_i915(rps); 1290 u32 val; 1291 1292 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 1293 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 1294 1295 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 1296 } 1297 1298 static int chv_rps_guar_freq(struct intel_rps *rps) 1299 { 1300 struct drm_i915_private *i915 = rps_to_i915(rps); 1301 u32 val; 1302 1303 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1304 1305 return val & FB_GFX_FREQ_FUSE_MASK; 1306 } 1307 1308 static u32 chv_rps_min_freq(struct intel_rps *rps) 1309 { 1310 struct drm_i915_private *i915 = rps_to_i915(rps); 1311 u32 val; 1312 1313 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 1314 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 1315 1316 return val & FB_GFX_FREQ_FUSE_MASK; 1317 } 1318 1319 static bool chv_rps_enable(struct intel_rps *rps) 1320 { 1321 struct intel_uncore *uncore = rps_to_uncore(rps); 1322 struct drm_i915_private *i915 = rps_to_i915(rps); 1323 u32 val; 1324 1325 /* 1: Program defaults and thresholds for RPS*/ 1326 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1327 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1328 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1329 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1330 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1331 1332 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1333 1334 /* 2: Enable RPS */ 1335 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1336 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1337 GEN6_RP_MEDIA_IS_GFX | 1338 GEN6_RP_ENABLE | 1339 GEN6_RP_UP_BUSY_AVG | 1340 GEN6_RP_DOWN_IDLE_AVG); 1341 1342 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1343 GEN6_PM_RP_DOWN_THRESHOLD | 1344 GEN6_PM_RP_DOWN_TIMEOUT); 1345 1346 /* Setting Fixed Bias */ 1347 vlv_punit_get(i915); 1348 1349 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1350 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1351 1352 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1353 1354 vlv_punit_put(i915); 1355 1356 /* RPS code assumes GPLL is used */ 1357 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1358 "GPLL not enabled\n"); 1359 1360 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1361 str_yes_no(val & GPLLENABLE)); 1362 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1363 1364 return rps_reset(rps); 1365 } 1366 1367 static int vlv_rps_guar_freq(struct intel_rps *rps) 1368 { 1369 struct drm_i915_private *i915 = rps_to_i915(rps); 1370 u32 val, rp1; 1371 1372 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1373 1374 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1375 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1376 1377 return rp1; 1378 } 1379 1380 static int vlv_rps_max_freq(struct intel_rps *rps) 1381 { 1382 struct drm_i915_private *i915 = rps_to_i915(rps); 1383 u32 val, rp0; 1384 1385 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1386 1387 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1388 /* Clamp to max */ 1389 rp0 = min_t(u32, rp0, 0xea); 1390 1391 return rp0; 1392 } 1393 1394 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1395 { 1396 struct drm_i915_private *i915 = rps_to_i915(rps); 1397 u32 val, rpe; 1398 1399 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1400 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1401 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1402 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1403 1404 return rpe; 1405 } 1406 1407 static int vlv_rps_min_freq(struct intel_rps *rps) 1408 { 1409 struct drm_i915_private *i915 = rps_to_i915(rps); 1410 u32 val; 1411 1412 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1413 /* 1414 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1415 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1416 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1417 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1418 * to make sure it matches what Punit accepts. 1419 */ 1420 return max_t(u32, val, 0xc0); 1421 } 1422 1423 static bool vlv_rps_enable(struct intel_rps *rps) 1424 { 1425 struct intel_uncore *uncore = rps_to_uncore(rps); 1426 struct drm_i915_private *i915 = rps_to_i915(rps); 1427 u32 val; 1428 1429 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1430 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1431 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1432 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1433 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1434 1435 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1436 1437 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1438 GEN6_RP_MEDIA_TURBO | 1439 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1440 GEN6_RP_MEDIA_IS_GFX | 1441 GEN6_RP_ENABLE | 1442 GEN6_RP_UP_BUSY_AVG | 1443 GEN6_RP_DOWN_IDLE_CONT); 1444 1445 /* WaGsvRC0ResidencyMethod:vlv */ 1446 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 1447 1448 vlv_punit_get(i915); 1449 1450 /* Setting Fixed Bias */ 1451 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1452 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1453 1454 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1455 1456 vlv_punit_put(i915); 1457 1458 /* RPS code assumes GPLL is used */ 1459 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1460 "GPLL not enabled\n"); 1461 1462 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1463 str_yes_no(val & GPLLENABLE)); 1464 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1465 1466 return rps_reset(rps); 1467 } 1468 1469 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1470 { 1471 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1472 struct intel_uncore *uncore = rps_to_uncore(rps); 1473 unsigned int t, state1, state2; 1474 u32 pxvid, ext_v; 1475 u64 corr, corr2; 1476 1477 lockdep_assert_held(&mchdev_lock); 1478 1479 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1480 pxvid = (pxvid >> 24) & 0x7f; 1481 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1482 1483 state1 = ext_v; 1484 1485 /* Revel in the empirically derived constants */ 1486 1487 /* Correction factor in 1/100000 units */ 1488 t = ips_mch_val(uncore); 1489 if (t > 80) 1490 corr = t * 2349 + 135940; 1491 else if (t >= 50) 1492 corr = t * 964 + 29317; 1493 else /* < 50 */ 1494 corr = t * 301 + 1004; 1495 1496 corr = div_u64(corr * 150142 * state1, 10000) - 78642; 1497 corr2 = div_u64(corr, 100000) * ips->corr; 1498 1499 state2 = div_u64(corr2 * state1, 10000); 1500 state2 /= 100; /* convert to mW */ 1501 1502 __gen5_ips_update(ips); 1503 1504 return ips->gfx_power + state2; 1505 } 1506 1507 static bool has_busy_stats(struct intel_rps *rps) 1508 { 1509 struct intel_engine_cs *engine; 1510 enum intel_engine_id id; 1511 1512 for_each_engine(engine, rps_to_gt(rps), id) { 1513 if (!intel_engine_supports_stats(engine)) 1514 return false; 1515 } 1516 1517 return true; 1518 } 1519 1520 void intel_rps_enable(struct intel_rps *rps) 1521 { 1522 struct drm_i915_private *i915 = rps_to_i915(rps); 1523 struct intel_uncore *uncore = rps_to_uncore(rps); 1524 bool enabled = false; 1525 1526 if (!HAS_RPS(i915)) 1527 return; 1528 1529 if (rps_uses_slpc(rps)) 1530 return; 1531 1532 intel_gt_check_clock_frequency(rps_to_gt(rps)); 1533 1534 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1535 if (rps->max_freq <= rps->min_freq) 1536 /* leave disabled, no room for dynamic reclocking */; 1537 else if (IS_CHERRYVIEW(i915)) 1538 enabled = chv_rps_enable(rps); 1539 else if (IS_VALLEYVIEW(i915)) 1540 enabled = vlv_rps_enable(rps); 1541 else if (GRAPHICS_VER(i915) >= 9) 1542 enabled = gen9_rps_enable(rps); 1543 else if (GRAPHICS_VER(i915) >= 8) 1544 enabled = gen8_rps_enable(rps); 1545 else if (GRAPHICS_VER(i915) >= 6) 1546 enabled = gen6_rps_enable(rps); 1547 else if (IS_IRONLAKE_M(i915)) 1548 enabled = gen5_rps_enable(rps); 1549 else 1550 MISSING_CASE(GRAPHICS_VER(i915)); 1551 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1552 if (!enabled) 1553 return; 1554 1555 GT_TRACE(rps_to_gt(rps), 1556 "min:%x, max:%x, freq:[%d, %d]\n", 1557 rps->min_freq, rps->max_freq, 1558 intel_gpu_freq(rps, rps->min_freq), 1559 intel_gpu_freq(rps, rps->max_freq)); 1560 1561 GEM_BUG_ON(rps->max_freq < rps->min_freq); 1562 GEM_BUG_ON(rps->idle_freq > rps->max_freq); 1563 1564 GEM_BUG_ON(rps->efficient_freq < rps->min_freq); 1565 GEM_BUG_ON(rps->efficient_freq > rps->max_freq); 1566 1567 if (has_busy_stats(rps)) 1568 intel_rps_set_timer(rps); 1569 else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) 1570 intel_rps_set_interrupts(rps); 1571 else 1572 /* Ironlake currently uses intel_ips.ko */ {} 1573 1574 intel_rps_set_enabled(rps); 1575 } 1576 1577 static void gen6_rps_disable(struct intel_rps *rps) 1578 { 1579 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1580 } 1581 1582 void intel_rps_disable(struct intel_rps *rps) 1583 { 1584 struct drm_i915_private *i915 = rps_to_i915(rps); 1585 1586 if (!intel_rps_is_enabled(rps)) 1587 return; 1588 1589 intel_rps_clear_enabled(rps); 1590 intel_rps_clear_interrupts(rps); 1591 intel_rps_clear_timer(rps); 1592 1593 if (GRAPHICS_VER(i915) >= 6) 1594 gen6_rps_disable(rps); 1595 else if (IS_IRONLAKE_M(i915)) 1596 gen5_rps_disable(rps); 1597 } 1598 1599 static int byt_gpu_freq(struct intel_rps *rps, int val) 1600 { 1601 /* 1602 * N = val - 0xb7 1603 * Slow = Fast = GPLL ref * N 1604 */ 1605 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1606 } 1607 1608 static int byt_freq_opcode(struct intel_rps *rps, int val) 1609 { 1610 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1611 } 1612 1613 static int chv_gpu_freq(struct intel_rps *rps, int val) 1614 { 1615 /* 1616 * N = val / 2 1617 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1618 */ 1619 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1620 } 1621 1622 static int chv_freq_opcode(struct intel_rps *rps, int val) 1623 { 1624 /* CHV needs even values */ 1625 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1626 } 1627 1628 int intel_gpu_freq(struct intel_rps *rps, int val) 1629 { 1630 struct drm_i915_private *i915 = rps_to_i915(rps); 1631 1632 if (GRAPHICS_VER(i915) >= 9) 1633 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1634 GEN9_FREQ_SCALER); 1635 else if (IS_CHERRYVIEW(i915)) 1636 return chv_gpu_freq(rps, val); 1637 else if (IS_VALLEYVIEW(i915)) 1638 return byt_gpu_freq(rps, val); 1639 else if (GRAPHICS_VER(i915) >= 6) 1640 return val * GT_FREQUENCY_MULTIPLIER; 1641 else 1642 return val; 1643 } 1644 1645 int intel_freq_opcode(struct intel_rps *rps, int val) 1646 { 1647 struct drm_i915_private *i915 = rps_to_i915(rps); 1648 1649 if (GRAPHICS_VER(i915) >= 9) 1650 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1651 GT_FREQUENCY_MULTIPLIER); 1652 else if (IS_CHERRYVIEW(i915)) 1653 return chv_freq_opcode(rps, val); 1654 else if (IS_VALLEYVIEW(i915)) 1655 return byt_freq_opcode(rps, val); 1656 else if (GRAPHICS_VER(i915) >= 6) 1657 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1658 else 1659 return val; 1660 } 1661 1662 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1663 { 1664 struct drm_i915_private *i915 = rps_to_i915(rps); 1665 1666 rps->gpll_ref_freq = 1667 vlv_get_cck_clock(i915, "GPLL ref", 1668 CCK_GPLL_CLOCK_CONTROL, 1669 i915->czclk_freq); 1670 1671 drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", 1672 rps->gpll_ref_freq); 1673 } 1674 1675 static void vlv_rps_init(struct intel_rps *rps) 1676 { 1677 struct drm_i915_private *i915 = rps_to_i915(rps); 1678 u32 val; 1679 1680 vlv_iosf_sb_get(i915, 1681 BIT(VLV_IOSF_SB_PUNIT) | 1682 BIT(VLV_IOSF_SB_NC) | 1683 BIT(VLV_IOSF_SB_CCK)); 1684 1685 vlv_init_gpll_ref_freq(rps); 1686 1687 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1688 switch ((val >> 6) & 3) { 1689 case 0: 1690 case 1: 1691 i915->mem_freq = 800; 1692 break; 1693 case 2: 1694 i915->mem_freq = 1066; 1695 break; 1696 case 3: 1697 i915->mem_freq = 1333; 1698 break; 1699 } 1700 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1701 1702 rps->max_freq = vlv_rps_max_freq(rps); 1703 rps->rp0_freq = rps->max_freq; 1704 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1705 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1706 1707 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1708 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1709 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1710 1711 rps->rp1_freq = vlv_rps_guar_freq(rps); 1712 drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1713 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1714 1715 rps->min_freq = vlv_rps_min_freq(rps); 1716 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1717 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1718 1719 vlv_iosf_sb_put(i915, 1720 BIT(VLV_IOSF_SB_PUNIT) | 1721 BIT(VLV_IOSF_SB_NC) | 1722 BIT(VLV_IOSF_SB_CCK)); 1723 } 1724 1725 static void chv_rps_init(struct intel_rps *rps) 1726 { 1727 struct drm_i915_private *i915 = rps_to_i915(rps); 1728 u32 val; 1729 1730 vlv_iosf_sb_get(i915, 1731 BIT(VLV_IOSF_SB_PUNIT) | 1732 BIT(VLV_IOSF_SB_NC) | 1733 BIT(VLV_IOSF_SB_CCK)); 1734 1735 vlv_init_gpll_ref_freq(rps); 1736 1737 val = vlv_cck_read(i915, CCK_FUSE_REG); 1738 1739 switch ((val >> 2) & 0x7) { 1740 case 3: 1741 i915->mem_freq = 2000; 1742 break; 1743 default: 1744 i915->mem_freq = 1600; 1745 break; 1746 } 1747 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1748 1749 rps->max_freq = chv_rps_max_freq(rps); 1750 rps->rp0_freq = rps->max_freq; 1751 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1752 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1753 1754 rps->efficient_freq = chv_rps_rpe_freq(rps); 1755 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1756 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1757 1758 rps->rp1_freq = chv_rps_guar_freq(rps); 1759 drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", 1760 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1761 1762 rps->min_freq = chv_rps_min_freq(rps); 1763 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1764 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1765 1766 vlv_iosf_sb_put(i915, 1767 BIT(VLV_IOSF_SB_PUNIT) | 1768 BIT(VLV_IOSF_SB_NC) | 1769 BIT(VLV_IOSF_SB_CCK)); 1770 1771 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | 1772 rps->rp1_freq | rps->min_freq) & 1, 1773 "Odd GPU freq values\n"); 1774 } 1775 1776 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1777 { 1778 ei->ktime = ktime_get_raw(); 1779 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1780 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1781 } 1782 1783 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1784 { 1785 struct intel_uncore *uncore = rps_to_uncore(rps); 1786 const struct intel_rps_ei *prev = &rps->ei; 1787 struct intel_rps_ei now; 1788 u32 events = 0; 1789 1790 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1791 return 0; 1792 1793 vlv_c0_read(uncore, &now); 1794 1795 if (prev->ktime) { 1796 u64 time, c0; 1797 u32 render, media; 1798 1799 time = ktime_us_delta(now.ktime, prev->ktime); 1800 1801 time *= rps_to_i915(rps)->czclk_freq; 1802 1803 /* Workload can be split between render + media, 1804 * e.g. SwapBuffers being blitted in X after being rendered in 1805 * mesa. To account for this we need to combine both engines 1806 * into our activity counter. 1807 */ 1808 render = now.render_c0 - prev->render_c0; 1809 media = now.media_c0 - prev->media_c0; 1810 c0 = max(render, media); 1811 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1812 1813 if (c0 > time * rps->power.up_threshold) 1814 events = GEN6_PM_RP_UP_THRESHOLD; 1815 else if (c0 < time * rps->power.down_threshold) 1816 events = GEN6_PM_RP_DOWN_THRESHOLD; 1817 } 1818 1819 rps->ei = now; 1820 return events; 1821 } 1822 1823 static void rps_work(struct work_struct *work) 1824 { 1825 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1826 struct intel_gt *gt = rps_to_gt(rps); 1827 struct drm_i915_private *i915 = rps_to_i915(rps); 1828 bool client_boost = false; 1829 int new_freq, adj, min, max; 1830 u32 pm_iir = 0; 1831 1832 spin_lock_irq(gt->irq_lock); 1833 pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; 1834 client_boost = atomic_read(&rps->num_waiters); 1835 spin_unlock_irq(gt->irq_lock); 1836 1837 /* Make sure we didn't queue anything we're not going to process. */ 1838 if (!pm_iir && !client_boost) 1839 goto out; 1840 1841 mutex_lock(&rps->lock); 1842 if (!intel_rps_is_active(rps)) { 1843 mutex_unlock(&rps->lock); 1844 return; 1845 } 1846 1847 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1848 1849 adj = rps->last_adj; 1850 new_freq = rps->cur_freq; 1851 min = rps->min_freq_softlimit; 1852 max = rps->max_freq_softlimit; 1853 if (client_boost) 1854 max = rps->max_freq; 1855 1856 GT_TRACE(gt, 1857 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", 1858 pm_iir, str_yes_no(client_boost), 1859 adj, new_freq, min, max); 1860 1861 if (client_boost && new_freq < rps->boost_freq) { 1862 new_freq = rps->boost_freq; 1863 adj = 0; 1864 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1865 if (adj > 0) 1866 adj *= 2; 1867 else /* CHV needs even encode values */ 1868 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1869 1870 if (new_freq >= rps->max_freq_softlimit) 1871 adj = 0; 1872 } else if (client_boost) { 1873 adj = 0; 1874 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1875 if (rps->cur_freq > rps->efficient_freq) 1876 new_freq = rps->efficient_freq; 1877 else if (rps->cur_freq > rps->min_freq_softlimit) 1878 new_freq = rps->min_freq_softlimit; 1879 adj = 0; 1880 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1881 if (adj < 0) 1882 adj *= 2; 1883 else /* CHV needs even encode values */ 1884 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1885 1886 if (new_freq <= rps->min_freq_softlimit) 1887 adj = 0; 1888 } else { /* unknown event */ 1889 adj = 0; 1890 } 1891 1892 /* 1893 * sysfs frequency limits may have snuck in while 1894 * servicing the interrupt 1895 */ 1896 new_freq += adj; 1897 new_freq = clamp_t(int, new_freq, min, max); 1898 1899 if (intel_rps_set(rps, new_freq)) { 1900 drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); 1901 adj = 0; 1902 } 1903 rps->last_adj = adj; 1904 1905 mutex_unlock(&rps->lock); 1906 1907 out: 1908 spin_lock_irq(gt->irq_lock); 1909 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1910 spin_unlock_irq(gt->irq_lock); 1911 } 1912 1913 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1914 { 1915 struct intel_gt *gt = rps_to_gt(rps); 1916 const u32 events = rps->pm_events & pm_iir; 1917 1918 lockdep_assert_held(gt->irq_lock); 1919 1920 if (unlikely(!events)) 1921 return; 1922 1923 GT_TRACE(gt, "irq events:%x\n", events); 1924 1925 gen6_gt_pm_mask_irq(gt, events); 1926 1927 rps->pm_iir |= events; 1928 schedule_work(&rps->work); 1929 } 1930 1931 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1932 { 1933 struct intel_gt *gt = rps_to_gt(rps); 1934 u32 events; 1935 1936 events = pm_iir & rps->pm_events; 1937 if (events) { 1938 spin_lock(gt->irq_lock); 1939 1940 GT_TRACE(gt, "irq events:%x\n", events); 1941 1942 gen6_gt_pm_mask_irq(gt, events); 1943 rps->pm_iir |= events; 1944 1945 schedule_work(&rps->work); 1946 spin_unlock(gt->irq_lock); 1947 } 1948 1949 if (GRAPHICS_VER(gt->i915) >= 8) 1950 return; 1951 1952 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1953 intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); 1954 1955 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1956 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); 1957 } 1958 1959 void gen5_rps_irq_handler(struct intel_rps *rps) 1960 { 1961 struct intel_uncore *uncore = rps_to_uncore(rps); 1962 u32 busy_up, busy_down, max_avg, min_avg; 1963 u8 new_freq; 1964 1965 spin_lock(&mchdev_lock); 1966 1967 intel_uncore_write16(uncore, 1968 MEMINTRSTS, 1969 intel_uncore_read(uncore, MEMINTRSTS)); 1970 1971 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1972 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1973 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1974 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1975 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1976 1977 /* Handle RCS change request from hw */ 1978 new_freq = rps->cur_freq; 1979 if (busy_up > max_avg) 1980 new_freq++; 1981 else if (busy_down < min_avg) 1982 new_freq--; 1983 new_freq = clamp(new_freq, 1984 rps->min_freq_softlimit, 1985 rps->max_freq_softlimit); 1986 1987 if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) 1988 rps->cur_freq = new_freq; 1989 1990 spin_unlock(&mchdev_lock); 1991 } 1992 1993 void intel_rps_init_early(struct intel_rps *rps) 1994 { 1995 mutex_init(&rps->lock); 1996 mutex_init(&rps->power.mutex); 1997 1998 INIT_WORK(&rps->work, rps_work); 1999 timer_setup(&rps->timer, rps_timer, 0); 2000 2001 atomic_set(&rps->num_waiters, 0); 2002 } 2003 2004 void intel_rps_init(struct intel_rps *rps) 2005 { 2006 struct drm_i915_private *i915 = rps_to_i915(rps); 2007 2008 if (rps_uses_slpc(rps)) 2009 return; 2010 2011 if (IS_CHERRYVIEW(i915)) 2012 chv_rps_init(rps); 2013 else if (IS_VALLEYVIEW(i915)) 2014 vlv_rps_init(rps); 2015 else if (GRAPHICS_VER(i915) >= 6) 2016 gen6_rps_init(rps); 2017 else if (IS_IRONLAKE_M(i915)) 2018 gen5_rps_init(rps); 2019 2020 /* Derive initial user preferences/limits from the hardware limits */ 2021 rps->max_freq_softlimit = rps->max_freq; 2022 rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; 2023 rps->min_freq_softlimit = rps->min_freq; 2024 rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; 2025 2026 /* After setting max-softlimit, find the overclock max freq */ 2027 if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 2028 u32 params = 0; 2029 2030 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, ¶ms, NULL); 2031 if (params & BIT(31)) { /* OC supported */ 2032 drm_dbg(&i915->drm, 2033 "Overclocking supported, max: %dMHz, overclock: %dMHz\n", 2034 (rps->max_freq & 0xff) * 50, 2035 (params & 0xff) * 50); 2036 rps->max_freq = params & 0xff; 2037 } 2038 } 2039 2040 /* Finally allow us to boost to max by default */ 2041 rps->boost_freq = rps->max_freq; 2042 rps->idle_freq = rps->min_freq; 2043 2044 /* Start in the middle, from here we will autotune based on workload */ 2045 rps->cur_freq = rps->efficient_freq; 2046 2047 rps->pm_intrmsk_mbz = 0; 2048 2049 /* 2050 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 2051 * if GEN6_PM_UP_EI_EXPIRED is masked. 2052 * 2053 * TODO: verify if this can be reproduced on VLV,CHV. 2054 */ 2055 if (GRAPHICS_VER(i915) <= 7) 2056 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 2057 2058 if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) 2059 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 2060 2061 /* GuC needs ARAT expired interrupt unmasked */ 2062 if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) 2063 rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 2064 } 2065 2066 void intel_rps_sanitize(struct intel_rps *rps) 2067 { 2068 if (rps_uses_slpc(rps)) 2069 return; 2070 2071 if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) 2072 rps_disable_interrupts(rps); 2073 } 2074 2075 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) 2076 { 2077 struct drm_i915_private *i915 = rps_to_i915(rps); 2078 u32 cagf; 2079 2080 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 2081 cagf = (rpstat >> 8) & 0xff; 2082 else if (GRAPHICS_VER(i915) >= 9) 2083 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; 2084 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2085 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; 2086 else if (GRAPHICS_VER(i915) >= 6) 2087 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; 2088 else 2089 cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> 2090 MEMSTAT_PSTATE_SHIFT); 2091 2092 return cagf; 2093 } 2094 2095 static u32 read_cagf(struct intel_rps *rps) 2096 { 2097 struct drm_i915_private *i915 = rps_to_i915(rps); 2098 struct intel_uncore *uncore = rps_to_uncore(rps); 2099 u32 freq; 2100 2101 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 2102 vlv_punit_get(i915); 2103 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 2104 vlv_punit_put(i915); 2105 } else if (GRAPHICS_VER(i915) >= 6) { 2106 freq = intel_uncore_read(uncore, GEN6_RPSTAT1); 2107 } else { 2108 freq = intel_uncore_read(uncore, MEMSTAT_ILK); 2109 } 2110 2111 return intel_rps_get_cagf(rps, freq); 2112 } 2113 2114 u32 intel_rps_read_actual_frequency(struct intel_rps *rps) 2115 { 2116 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2117 intel_wakeref_t wakeref; 2118 u32 freq = 0; 2119 2120 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2121 freq = intel_gpu_freq(rps, read_cagf(rps)); 2122 2123 return freq; 2124 } 2125 2126 u32 intel_rps_read_punit_req(struct intel_rps *rps) 2127 { 2128 struct intel_uncore *uncore = rps_to_uncore(rps); 2129 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2130 intel_wakeref_t wakeref; 2131 u32 freq = 0; 2132 2133 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2134 freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2135 2136 return freq; 2137 } 2138 2139 static u32 intel_rps_get_req(u32 pureq) 2140 { 2141 u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; 2142 2143 return req; 2144 } 2145 2146 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) 2147 { 2148 u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); 2149 2150 return intel_gpu_freq(rps, freq); 2151 } 2152 2153 u32 intel_rps_get_requested_frequency(struct intel_rps *rps) 2154 { 2155 if (rps_uses_slpc(rps)) 2156 return intel_rps_read_punit_req_frequency(rps); 2157 else 2158 return intel_gpu_freq(rps, rps->cur_freq); 2159 } 2160 2161 u32 intel_rps_get_max_frequency(struct intel_rps *rps) 2162 { 2163 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2164 2165 if (rps_uses_slpc(rps)) 2166 return slpc->max_freq_softlimit; 2167 else 2168 return intel_gpu_freq(rps, rps->max_freq_softlimit); 2169 } 2170 2171 /** 2172 * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. 2173 * @rps: the intel_rps structure 2174 * 2175 * Returns the max frequency in a raw format. In newer platforms raw is in 2176 * units of 50 MHz. 2177 */ 2178 u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) 2179 { 2180 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2181 u32 freq; 2182 2183 if (rps_uses_slpc(rps)) { 2184 return DIV_ROUND_CLOSEST(slpc->rp0_freq, 2185 GT_FREQUENCY_MULTIPLIER); 2186 } else { 2187 freq = rps->max_freq; 2188 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2189 /* Convert GT frequency to 50 MHz units */ 2190 freq /= GEN9_FREQ_SCALER; 2191 } 2192 return freq; 2193 } 2194 } 2195 2196 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) 2197 { 2198 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2199 2200 if (rps_uses_slpc(rps)) 2201 return slpc->rp0_freq; 2202 else 2203 return intel_gpu_freq(rps, rps->rp0_freq); 2204 } 2205 2206 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) 2207 { 2208 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2209 2210 if (rps_uses_slpc(rps)) 2211 return slpc->rp1_freq; 2212 else 2213 return intel_gpu_freq(rps, rps->rp1_freq); 2214 } 2215 2216 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) 2217 { 2218 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2219 2220 if (rps_uses_slpc(rps)) 2221 return slpc->min_freq; 2222 else 2223 return intel_gpu_freq(rps, rps->min_freq); 2224 } 2225 2226 static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2227 { 2228 struct intel_gt *gt = rps_to_gt(rps); 2229 struct drm_i915_private *i915 = gt->i915; 2230 struct intel_uncore *uncore = gt->uncore; 2231 struct intel_rps_freq_caps caps; 2232 u32 rp_state_limits; 2233 u32 gt_perf_status; 2234 u32 rpmodectl, rpinclimit, rpdeclimit; 2235 u32 rpstat, cagf, reqf; 2236 u32 rpcurupei, rpcurup, rpprevup; 2237 u32 rpcurdownei, rpcurdown, rpprevdown; 2238 u32 rpupei, rpupt, rpdownei, rpdownt; 2239 u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; 2240 2241 rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); 2242 gen6_rps_get_freq_caps(rps, &caps); 2243 if (IS_GEN9_LP(i915)) 2244 gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); 2245 else 2246 gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); 2247 2248 /* RPSTAT1 is in the GT power well */ 2249 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 2250 2251 reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2252 if (GRAPHICS_VER(i915) >= 9) { 2253 reqf >>= 23; 2254 } else { 2255 reqf &= ~GEN6_TURBO_DISABLE; 2256 if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2257 reqf >>= 24; 2258 else 2259 reqf >>= 25; 2260 } 2261 reqf = intel_gpu_freq(rps, reqf); 2262 2263 rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); 2264 rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2265 rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2266 2267 rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); 2268 rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; 2269 rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; 2270 rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; 2271 rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; 2272 rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; 2273 rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; 2274 2275 rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); 2276 rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2277 2278 rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 2279 rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2280 2281 cagf = intel_rps_read_actual_frequency(rps); 2282 2283 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 2284 2285 if (GRAPHICS_VER(i915) >= 11) { 2286 pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); 2287 pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); 2288 /* 2289 * The equivalent to the PM ISR & IIR cannot be read 2290 * without affecting the current state of the system 2291 */ 2292 pm_isr = 0; 2293 pm_iir = 0; 2294 } else if (GRAPHICS_VER(i915) >= 8) { 2295 pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); 2296 pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); 2297 pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); 2298 pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); 2299 } else { 2300 pm_ier = intel_uncore_read(uncore, GEN6_PMIER); 2301 pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); 2302 pm_isr = intel_uncore_read(uncore, GEN6_PMISR); 2303 pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); 2304 } 2305 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2306 2307 drm_printf(p, "Video Turbo Mode: %s\n", 2308 str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); 2309 drm_printf(p, "HW control enabled: %s\n", 2310 str_yes_no(rpmodectl & GEN6_RP_ENABLE)); 2311 drm_printf(p, "SW control enabled: %s\n", 2312 str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); 2313 2314 drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", 2315 pm_ier, pm_imr, pm_mask); 2316 if (GRAPHICS_VER(i915) <= 10) 2317 drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", 2318 pm_isr, pm_iir); 2319 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2320 rps->pm_intrmsk_mbz); 2321 drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 2322 drm_printf(p, "Render p-state ratio: %d\n", 2323 (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); 2324 drm_printf(p, "Render p-state VID: %d\n", 2325 gt_perf_status & 0xff); 2326 drm_printf(p, "Render p-state limit: %d\n", 2327 rp_state_limits & 0xff); 2328 drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); 2329 drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); 2330 drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); 2331 drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); 2332 drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); 2333 drm_printf(p, "CAGF: %dMHz\n", cagf); 2334 drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", 2335 rpcurupei, 2336 intel_gt_pm_interval_to_ns(gt, rpcurupei)); 2337 drm_printf(p, "RP CUR UP: %d (%lldns)\n", 2338 rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); 2339 drm_printf(p, "RP PREV UP: %d (%lldns)\n", 2340 rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); 2341 drm_printf(p, "Up threshold: %d%%\n", 2342 rps->power.up_threshold); 2343 drm_printf(p, "RP UP EI: %d (%lldns)\n", 2344 rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); 2345 drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", 2346 rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); 2347 2348 drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", 2349 rpcurdownei, 2350 intel_gt_pm_interval_to_ns(gt, rpcurdownei)); 2351 drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", 2352 rpcurdown, 2353 intel_gt_pm_interval_to_ns(gt, rpcurdown)); 2354 drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", 2355 rpprevdown, 2356 intel_gt_pm_interval_to_ns(gt, rpprevdown)); 2357 drm_printf(p, "Down threshold: %d%%\n", 2358 rps->power.down_threshold); 2359 drm_printf(p, "RP DOWN EI: %d (%lldns)\n", 2360 rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); 2361 drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", 2362 rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); 2363 2364 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2365 intel_gpu_freq(rps, caps.min_freq)); 2366 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2367 intel_gpu_freq(rps, caps.rp1_freq)); 2368 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2369 intel_gpu_freq(rps, caps.rp0_freq)); 2370 drm_printf(p, "Max overclocked frequency: %dMHz\n", 2371 intel_gpu_freq(rps, rps->max_freq)); 2372 2373 drm_printf(p, "Current freq: %d MHz\n", 2374 intel_gpu_freq(rps, rps->cur_freq)); 2375 drm_printf(p, "Actual freq: %d MHz\n", cagf); 2376 drm_printf(p, "Idle freq: %d MHz\n", 2377 intel_gpu_freq(rps, rps->idle_freq)); 2378 drm_printf(p, "Min freq: %d MHz\n", 2379 intel_gpu_freq(rps, rps->min_freq)); 2380 drm_printf(p, "Boost freq: %d MHz\n", 2381 intel_gpu_freq(rps, rps->boost_freq)); 2382 drm_printf(p, "Max freq: %d MHz\n", 2383 intel_gpu_freq(rps, rps->max_freq)); 2384 drm_printf(p, 2385 "efficient (RPe) frequency: %d MHz\n", 2386 intel_gpu_freq(rps, rps->efficient_freq)); 2387 } 2388 2389 static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2390 { 2391 struct intel_gt *gt = rps_to_gt(rps); 2392 struct intel_uncore *uncore = gt->uncore; 2393 struct intel_rps_freq_caps caps; 2394 u32 pm_mask; 2395 2396 gen6_rps_get_freq_caps(rps, &caps); 2397 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2398 2399 drm_printf(p, "PM MASK=0x%08x\n", pm_mask); 2400 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2401 rps->pm_intrmsk_mbz); 2402 drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); 2403 drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); 2404 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2405 intel_gpu_freq(rps, caps.min_freq)); 2406 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2407 intel_gpu_freq(rps, caps.rp1_freq)); 2408 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2409 intel_gpu_freq(rps, caps.rp0_freq)); 2410 drm_printf(p, "Current freq: %d MHz\n", 2411 intel_rps_get_requested_frequency(rps)); 2412 drm_printf(p, "Actual freq: %d MHz\n", 2413 intel_rps_read_actual_frequency(rps)); 2414 drm_printf(p, "Min freq: %d MHz\n", 2415 intel_rps_get_min_frequency(rps)); 2416 drm_printf(p, "Boost freq: %d MHz\n", 2417 intel_rps_get_boost_frequency(rps)); 2418 drm_printf(p, "Max freq: %d MHz\n", 2419 intel_rps_get_max_frequency(rps)); 2420 drm_printf(p, 2421 "efficient (RPe) frequency: %d MHz\n", 2422 intel_gpu_freq(rps, caps.rp1_freq)); 2423 } 2424 2425 void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2426 { 2427 if (rps_uses_slpc(rps)) 2428 return slpc_frequency_dump(rps, p); 2429 else 2430 return rps_frequency_dump(rps, p); 2431 } 2432 2433 static int set_max_freq(struct intel_rps *rps, u32 val) 2434 { 2435 struct drm_i915_private *i915 = rps_to_i915(rps); 2436 int ret = 0; 2437 2438 mutex_lock(&rps->lock); 2439 2440 val = intel_freq_opcode(rps, val); 2441 if (val < rps->min_freq || 2442 val > rps->max_freq || 2443 val < rps->min_freq_softlimit) { 2444 ret = -EINVAL; 2445 goto unlock; 2446 } 2447 2448 if (val > rps->rp0_freq) 2449 drm_dbg(&i915->drm, "User requested overclocking to %d\n", 2450 intel_gpu_freq(rps, val)); 2451 2452 rps->max_freq_softlimit = val; 2453 2454 val = clamp_t(int, rps->cur_freq, 2455 rps->min_freq_softlimit, 2456 rps->max_freq_softlimit); 2457 2458 /* 2459 * We still need *_set_rps to process the new max_delay and 2460 * update the interrupt limits and PMINTRMSK even though 2461 * frequency request may be unchanged. 2462 */ 2463 intel_rps_set(rps, val); 2464 2465 unlock: 2466 mutex_unlock(&rps->lock); 2467 2468 return ret; 2469 } 2470 2471 int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) 2472 { 2473 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2474 2475 if (rps_uses_slpc(rps)) 2476 return intel_guc_slpc_set_max_freq(slpc, val); 2477 else 2478 return set_max_freq(rps, val); 2479 } 2480 2481 u32 intel_rps_get_min_frequency(struct intel_rps *rps) 2482 { 2483 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2484 2485 if (rps_uses_slpc(rps)) 2486 return slpc->min_freq_softlimit; 2487 else 2488 return intel_gpu_freq(rps, rps->min_freq_softlimit); 2489 } 2490 2491 /** 2492 * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. 2493 * @rps: the intel_rps structure 2494 * 2495 * Returns the min frequency in a raw format. In newer platforms raw is in 2496 * units of 50 MHz. 2497 */ 2498 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) 2499 { 2500 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2501 u32 freq; 2502 2503 if (rps_uses_slpc(rps)) { 2504 return DIV_ROUND_CLOSEST(slpc->min_freq, 2505 GT_FREQUENCY_MULTIPLIER); 2506 } else { 2507 freq = rps->min_freq; 2508 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2509 /* Convert GT frequency to 50 MHz units */ 2510 freq /= GEN9_FREQ_SCALER; 2511 } 2512 return freq; 2513 } 2514 } 2515 2516 static int set_min_freq(struct intel_rps *rps, u32 val) 2517 { 2518 int ret = 0; 2519 2520 mutex_lock(&rps->lock); 2521 2522 val = intel_freq_opcode(rps, val); 2523 if (val < rps->min_freq || 2524 val > rps->max_freq || 2525 val > rps->max_freq_softlimit) { 2526 ret = -EINVAL; 2527 goto unlock; 2528 } 2529 2530 rps->min_freq_softlimit = val; 2531 2532 val = clamp_t(int, rps->cur_freq, 2533 rps->min_freq_softlimit, 2534 rps->max_freq_softlimit); 2535 2536 /* 2537 * We still need *_set_rps to process the new min_delay and 2538 * update the interrupt limits and PMINTRMSK even though 2539 * frequency request may be unchanged. 2540 */ 2541 intel_rps_set(rps, val); 2542 2543 unlock: 2544 mutex_unlock(&rps->lock); 2545 2546 return ret; 2547 } 2548 2549 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) 2550 { 2551 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2552 2553 if (rps_uses_slpc(rps)) 2554 return intel_guc_slpc_set_min_freq(slpc, val); 2555 else 2556 return set_min_freq(rps, val); 2557 } 2558 2559 static void intel_rps_set_manual(struct intel_rps *rps, bool enable) 2560 { 2561 struct intel_uncore *uncore = rps_to_uncore(rps); 2562 u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; 2563 2564 /* Allow punit to process software requests */ 2565 intel_uncore_write(uncore, GEN6_RP_CONTROL, state); 2566 } 2567 2568 void intel_rps_raise_unslice(struct intel_rps *rps) 2569 { 2570 struct intel_uncore *uncore = rps_to_uncore(rps); 2571 2572 mutex_lock(&rps->lock); 2573 2574 if (rps_uses_slpc(rps)) { 2575 /* RP limits have not been initialized yet for SLPC path */ 2576 struct intel_rps_freq_caps caps; 2577 2578 gen6_rps_get_freq_caps(rps, &caps); 2579 2580 intel_rps_set_manual(rps, true); 2581 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2582 ((caps.rp0_freq << 2583 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2584 GEN9_IGNORE_SLICE_RATIO)); 2585 intel_rps_set_manual(rps, false); 2586 } else { 2587 intel_rps_set(rps, rps->rp0_freq); 2588 } 2589 2590 mutex_unlock(&rps->lock); 2591 } 2592 2593 void intel_rps_lower_unslice(struct intel_rps *rps) 2594 { 2595 struct intel_uncore *uncore = rps_to_uncore(rps); 2596 2597 mutex_lock(&rps->lock); 2598 2599 if (rps_uses_slpc(rps)) { 2600 /* RP limits have not been initialized yet for SLPC path */ 2601 struct intel_rps_freq_caps caps; 2602 2603 gen6_rps_get_freq_caps(rps, &caps); 2604 2605 intel_rps_set_manual(rps, true); 2606 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2607 ((caps.min_freq << 2608 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2609 GEN9_IGNORE_SLICE_RATIO)); 2610 intel_rps_set_manual(rps, false); 2611 } else { 2612 intel_rps_set(rps, rps->min_freq); 2613 } 2614 2615 mutex_unlock(&rps->lock); 2616 } 2617 2618 static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) 2619 { 2620 struct intel_gt *gt = rps_to_gt(rps); 2621 intel_wakeref_t wakeref; 2622 u32 val; 2623 2624 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 2625 val = intel_uncore_read(gt->uncore, reg32); 2626 2627 return val; 2628 } 2629 2630 bool rps_read_mask_mmio(struct intel_rps *rps, 2631 i915_reg_t reg32, u32 mask) 2632 { 2633 return rps_read_mmio(rps, reg32) & mask; 2634 } 2635 2636 /* External interface for intel_ips.ko */ 2637 2638 static struct drm_i915_private __rcu *ips_mchdev; 2639 2640 /** 2641 * Tells the intel_ips driver that the i915 driver is now loaded, if 2642 * IPS got loaded first. 2643 * 2644 * This awkward dance is so that neither module has to depend on the 2645 * other in order for IPS to do the appropriate communication of 2646 * GPU turbo limits to i915. 2647 */ 2648 static void 2649 ips_ping_for_i915_load(void) 2650 { 2651 void (*link)(void); 2652 2653 link = symbol_get(ips_link_to_i915_driver); 2654 if (link) { 2655 link(); 2656 symbol_put(ips_link_to_i915_driver); 2657 } 2658 } 2659 2660 void intel_rps_driver_register(struct intel_rps *rps) 2661 { 2662 struct intel_gt *gt = rps_to_gt(rps); 2663 2664 /* 2665 * We only register the i915 ips part with intel-ips once everything is 2666 * set up, to avoid intel-ips sneaking in and reading bogus values. 2667 */ 2668 if (GRAPHICS_VER(gt->i915) == 5) { 2669 GEM_BUG_ON(ips_mchdev); 2670 rcu_assign_pointer(ips_mchdev, gt->i915); 2671 ips_ping_for_i915_load(); 2672 } 2673 } 2674 2675 void intel_rps_driver_unregister(struct intel_rps *rps) 2676 { 2677 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) 2678 rcu_assign_pointer(ips_mchdev, NULL); 2679 } 2680 2681 static struct drm_i915_private *mchdev_get(void) 2682 { 2683 struct drm_i915_private *i915; 2684 2685 rcu_read_lock(); 2686 i915 = rcu_dereference(ips_mchdev); 2687 if (i915 && !kref_get_unless_zero(&i915->drm.ref)) 2688 i915 = NULL; 2689 rcu_read_unlock(); 2690 2691 return i915; 2692 } 2693 2694 /** 2695 * i915_read_mch_val - return value for IPS use 2696 * 2697 * Calculate and return a value for the IPS driver to use when deciding whether 2698 * we have thermal and power headroom to increase CPU or GPU power budget. 2699 */ 2700 unsigned long i915_read_mch_val(void) 2701 { 2702 struct drm_i915_private *i915; 2703 unsigned long chipset_val = 0; 2704 unsigned long graphics_val = 0; 2705 intel_wakeref_t wakeref; 2706 2707 i915 = mchdev_get(); 2708 if (!i915) 2709 return 0; 2710 2711 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 2712 struct intel_ips *ips = &to_gt(i915)->rps.ips; 2713 2714 spin_lock_irq(&mchdev_lock); 2715 chipset_val = __ips_chipset_val(ips); 2716 graphics_val = __ips_gfx_val(ips); 2717 spin_unlock_irq(&mchdev_lock); 2718 } 2719 2720 drm_dev_put(&i915->drm); 2721 return chipset_val + graphics_val; 2722 } 2723 EXPORT_SYMBOL_GPL(i915_read_mch_val); 2724 2725 /** 2726 * i915_gpu_raise - raise GPU frequency limit 2727 * 2728 * Raise the limit; IPS indicates we have thermal headroom. 2729 */ 2730 bool i915_gpu_raise(void) 2731 { 2732 struct drm_i915_private *i915; 2733 struct intel_rps *rps; 2734 2735 i915 = mchdev_get(); 2736 if (!i915) 2737 return false; 2738 2739 rps = &to_gt(i915)->rps; 2740 2741 spin_lock_irq(&mchdev_lock); 2742 if (rps->max_freq_softlimit < rps->max_freq) 2743 rps->max_freq_softlimit++; 2744 spin_unlock_irq(&mchdev_lock); 2745 2746 drm_dev_put(&i915->drm); 2747 return true; 2748 } 2749 EXPORT_SYMBOL_GPL(i915_gpu_raise); 2750 2751 /** 2752 * i915_gpu_lower - lower GPU frequency limit 2753 * 2754 * IPS indicates we're close to a thermal limit, so throttle back the GPU 2755 * frequency maximum. 2756 */ 2757 bool i915_gpu_lower(void) 2758 { 2759 struct drm_i915_private *i915; 2760 struct intel_rps *rps; 2761 2762 i915 = mchdev_get(); 2763 if (!i915) 2764 return false; 2765 2766 rps = &to_gt(i915)->rps; 2767 2768 spin_lock_irq(&mchdev_lock); 2769 if (rps->max_freq_softlimit > rps->min_freq) 2770 rps->max_freq_softlimit--; 2771 spin_unlock_irq(&mchdev_lock); 2772 2773 drm_dev_put(&i915->drm); 2774 return true; 2775 } 2776 EXPORT_SYMBOL_GPL(i915_gpu_lower); 2777 2778 /** 2779 * i915_gpu_busy - indicate GPU business to IPS 2780 * 2781 * Tell the IPS driver whether or not the GPU is busy. 2782 */ 2783 bool i915_gpu_busy(void) 2784 { 2785 struct drm_i915_private *i915; 2786 bool ret; 2787 2788 i915 = mchdev_get(); 2789 if (!i915) 2790 return false; 2791 2792 ret = to_gt(i915)->awake; 2793 2794 drm_dev_put(&i915->drm); 2795 return ret; 2796 } 2797 EXPORT_SYMBOL_GPL(i915_gpu_busy); 2798 2799 /** 2800 * i915_gpu_turbo_disable - disable graphics turbo 2801 * 2802 * Disable graphics turbo by resetting the max frequency and setting the 2803 * current frequency to the default. 2804 */ 2805 bool i915_gpu_turbo_disable(void) 2806 { 2807 struct drm_i915_private *i915; 2808 struct intel_rps *rps; 2809 bool ret; 2810 2811 i915 = mchdev_get(); 2812 if (!i915) 2813 return false; 2814 2815 rps = &to_gt(i915)->rps; 2816 2817 spin_lock_irq(&mchdev_lock); 2818 rps->max_freq_softlimit = rps->min_freq; 2819 ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); 2820 spin_unlock_irq(&mchdev_lock); 2821 2822 drm_dev_put(&i915->drm); 2823 return ret; 2824 } 2825 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 2826 2827 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2828 #include "selftest_rps.c" 2829 #include "selftest_slpc.c" 2830 #endif 2831