1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/i915_drm.h> 9 10 #include "display/intel_display.h" 11 #include "display/intel_display_irq.h" 12 #include "i915_drv.h" 13 #include "i915_irq.h" 14 #include "i915_reg.h" 15 #include "intel_breadcrumbs.h" 16 #include "intel_gt.h" 17 #include "intel_gt_clock_utils.h" 18 #include "intel_gt_irq.h" 19 #include "intel_gt_pm_irq.h" 20 #include "intel_gt_regs.h" 21 #include "intel_mchbar_regs.h" 22 #include "intel_pcode.h" 23 #include "intel_rps.h" 24 #include "vlv_sideband.h" 25 #include "../../../platform/x86/intel_ips.h" 26 27 #define BUSY_MAX_EI 20u /* ms */ 28 29 /* 30 * Lock protecting IPS related data structures 31 */ 32 static DEFINE_SPINLOCK(mchdev_lock); 33 34 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 35 { 36 return container_of(rps, struct intel_gt, rps); 37 } 38 39 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 40 { 41 return rps_to_gt(rps)->i915; 42 } 43 44 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 45 { 46 return rps_to_gt(rps)->uncore; 47 } 48 49 static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) 50 { 51 struct intel_gt *gt = rps_to_gt(rps); 52 53 return >->uc.guc.slpc; 54 } 55 56 static bool rps_uses_slpc(struct intel_rps *rps) 57 { 58 struct intel_gt *gt = rps_to_gt(rps); 59 60 return intel_uc_uses_guc_slpc(>->uc); 61 } 62 63 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 64 { 65 return mask & ~rps->pm_intrmsk_mbz; 66 } 67 68 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) 69 { 70 intel_uncore_write_fw(uncore, reg, val); 71 } 72 73 static void rps_timer(struct timer_list *t) 74 { 75 struct intel_rps *rps = from_timer(rps, t, timer); 76 struct intel_engine_cs *engine; 77 ktime_t dt, last, timestamp; 78 enum intel_engine_id id; 79 s64 max_busy[3] = {}; 80 81 timestamp = 0; 82 for_each_engine(engine, rps_to_gt(rps), id) { 83 s64 busy; 84 int i; 85 86 dt = intel_engine_get_busy_time(engine, ×tamp); 87 last = engine->stats.rps; 88 engine->stats.rps = dt; 89 90 busy = ktime_to_ns(ktime_sub(dt, last)); 91 for (i = 0; i < ARRAY_SIZE(max_busy); i++) { 92 if (busy > max_busy[i]) 93 swap(busy, max_busy[i]); 94 } 95 } 96 last = rps->pm_timestamp; 97 rps->pm_timestamp = timestamp; 98 99 if (intel_rps_is_active(rps)) { 100 s64 busy; 101 int i; 102 103 dt = ktime_sub(timestamp, last); 104 105 /* 106 * Our goal is to evaluate each engine independently, so we run 107 * at the lowest clocks required to sustain the heaviest 108 * workload. However, a task may be split into sequential 109 * dependent operations across a set of engines, such that 110 * the independent contributions do not account for high load, 111 * but overall the task is GPU bound. For example, consider 112 * video decode on vcs followed by colour post-processing 113 * on vecs, followed by general post-processing on rcs. 114 * Since multi-engines being active does imply a single 115 * continuous workload across all engines, we hedge our 116 * bets by only contributing a factor of the distributed 117 * load into our busyness calculation. 118 */ 119 busy = max_busy[0]; 120 for (i = 1; i < ARRAY_SIZE(max_busy); i++) { 121 if (!max_busy[i]) 122 break; 123 124 busy += div_u64(max_busy[i], 1 << i); 125 } 126 GT_TRACE(rps_to_gt(rps), 127 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", 128 busy, (int)div64_u64(100 * busy, dt), 129 max_busy[0], max_busy[1], max_busy[2], 130 rps->pm_interval); 131 132 if (100 * busy > rps->power.up_threshold * dt && 133 rps->cur_freq < rps->max_freq_softlimit) { 134 rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; 135 rps->pm_interval = 1; 136 schedule_work(&rps->work); 137 } else if (100 * busy < rps->power.down_threshold * dt && 138 rps->cur_freq > rps->min_freq_softlimit) { 139 rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; 140 rps->pm_interval = 1; 141 schedule_work(&rps->work); 142 } else { 143 rps->last_adj = 0; 144 } 145 146 mod_timer(&rps->timer, 147 jiffies + msecs_to_jiffies(rps->pm_interval)); 148 rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); 149 } 150 } 151 152 static void rps_start_timer(struct intel_rps *rps) 153 { 154 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 155 rps->pm_interval = 1; 156 mod_timer(&rps->timer, jiffies + 1); 157 } 158 159 static void rps_stop_timer(struct intel_rps *rps) 160 { 161 del_timer_sync(&rps->timer); 162 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 163 cancel_work_sync(&rps->work); 164 } 165 166 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 167 { 168 u32 mask = 0; 169 170 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 171 if (val > rps->min_freq_softlimit) 172 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 173 GEN6_PM_RP_DOWN_THRESHOLD | 174 GEN6_PM_RP_DOWN_TIMEOUT); 175 176 if (val < rps->max_freq_softlimit) 177 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 178 179 mask &= rps->pm_events; 180 181 return rps_pm_sanitize_mask(rps, ~mask); 182 } 183 184 static void rps_reset_ei(struct intel_rps *rps) 185 { 186 memset(&rps->ei, 0, sizeof(rps->ei)); 187 } 188 189 static void rps_enable_interrupts(struct intel_rps *rps) 190 { 191 struct intel_gt *gt = rps_to_gt(rps); 192 193 GEM_BUG_ON(rps_uses_slpc(rps)); 194 195 GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", 196 rps->pm_events, rps_pm_mask(rps, rps->last_freq)); 197 198 rps_reset_ei(rps); 199 200 spin_lock_irq(gt->irq_lock); 201 gen6_gt_pm_enable_irq(gt, rps->pm_events); 202 spin_unlock_irq(gt->irq_lock); 203 204 intel_uncore_write(gt->uncore, 205 GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); 206 } 207 208 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 209 { 210 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 211 } 212 213 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 214 { 215 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 216 ; 217 } 218 219 static void rps_reset_interrupts(struct intel_rps *rps) 220 { 221 struct intel_gt *gt = rps_to_gt(rps); 222 223 spin_lock_irq(gt->irq_lock); 224 if (GRAPHICS_VER(gt->i915) >= 11) 225 gen11_rps_reset_interrupts(rps); 226 else 227 gen6_rps_reset_interrupts(rps); 228 229 rps->pm_iir = 0; 230 spin_unlock_irq(gt->irq_lock); 231 } 232 233 static void rps_disable_interrupts(struct intel_rps *rps) 234 { 235 struct intel_gt *gt = rps_to_gt(rps); 236 237 intel_uncore_write(gt->uncore, 238 GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); 239 240 spin_lock_irq(gt->irq_lock); 241 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 242 spin_unlock_irq(gt->irq_lock); 243 244 intel_synchronize_irq(gt->i915); 245 246 /* 247 * Now that we will not be generating any more work, flush any 248 * outstanding tasks. As we are called on the RPS idle path, 249 * we will reset the GPU to minimum frequencies, so the current 250 * state of the worker can be discarded. 251 */ 252 cancel_work_sync(&rps->work); 253 254 rps_reset_interrupts(rps); 255 GT_TRACE(gt, "interrupts:off\n"); 256 } 257 258 static const struct cparams { 259 u16 i; 260 u16 t; 261 u16 m; 262 u16 c; 263 } cparams[] = { 264 { 1, 1333, 301, 28664 }, 265 { 1, 1066, 294, 24460 }, 266 { 1, 800, 294, 25192 }, 267 { 0, 1333, 276, 27605 }, 268 { 0, 1066, 276, 27605 }, 269 { 0, 800, 231, 23784 }, 270 }; 271 272 static void gen5_rps_init(struct intel_rps *rps) 273 { 274 struct drm_i915_private *i915 = rps_to_i915(rps); 275 struct intel_uncore *uncore = rps_to_uncore(rps); 276 u8 fmax, fmin, fstart; 277 u32 rgvmodectl; 278 int c_m, i; 279 280 if (i915->fsb_freq <= 3200) 281 c_m = 0; 282 else if (i915->fsb_freq <= 4800) 283 c_m = 1; 284 else 285 c_m = 2; 286 287 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 288 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { 289 rps->ips.m = cparams[i].m; 290 rps->ips.c = cparams[i].c; 291 break; 292 } 293 } 294 295 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 296 297 /* Set up min, max, and cur for interrupt handling */ 298 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 299 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 300 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 301 MEMMODE_FSTART_SHIFT; 302 drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", 303 fmax, fmin, fstart); 304 305 rps->min_freq = fmax; 306 rps->efficient_freq = fstart; 307 rps->max_freq = fmin; 308 } 309 310 static unsigned long 311 __ips_chipset_val(struct intel_ips *ips) 312 { 313 struct intel_uncore *uncore = 314 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 315 unsigned long now = jiffies_to_msecs(jiffies), dt; 316 unsigned long result; 317 u64 total, delta; 318 319 lockdep_assert_held(&mchdev_lock); 320 321 /* 322 * Prevent division-by-zero if we are asking too fast. 323 * Also, we don't get interesting results if we are polling 324 * faster than once in 10ms, so just return the saved value 325 * in such cases. 326 */ 327 dt = now - ips->last_time1; 328 if (dt <= 10) 329 return ips->chipset_power; 330 331 /* FIXME: handle per-counter overflow */ 332 total = intel_uncore_read(uncore, DMIEC); 333 total += intel_uncore_read(uncore, DDREC); 334 total += intel_uncore_read(uncore, CSIEC); 335 336 delta = total - ips->last_count1; 337 338 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 339 340 ips->last_count1 = total; 341 ips->last_time1 = now; 342 343 ips->chipset_power = result; 344 345 return result; 346 } 347 348 static unsigned long ips_mch_val(struct intel_uncore *uncore) 349 { 350 unsigned int m, x, b; 351 u32 tsfs; 352 353 tsfs = intel_uncore_read(uncore, TSFS); 354 x = intel_uncore_read8(uncore, TR1); 355 356 b = tsfs & TSFS_INTR_MASK; 357 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 358 359 return m * x / 127 - b; 360 } 361 362 static int _pxvid_to_vd(u8 pxvid) 363 { 364 if (pxvid == 0) 365 return 0; 366 367 if (pxvid >= 8 && pxvid < 31) 368 pxvid = 31; 369 370 return (pxvid + 2) * 125; 371 } 372 373 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 374 { 375 const int vd = _pxvid_to_vd(pxvid); 376 377 if (INTEL_INFO(i915)->is_mobile) 378 return max(vd - 1125, 0); 379 380 return vd; 381 } 382 383 static void __gen5_ips_update(struct intel_ips *ips) 384 { 385 struct intel_uncore *uncore = 386 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 387 u64 now, delta, dt; 388 u32 count; 389 390 lockdep_assert_held(&mchdev_lock); 391 392 now = ktime_get_raw_ns(); 393 dt = now - ips->last_time2; 394 do_div(dt, NSEC_PER_MSEC); 395 396 /* Don't divide by 0 */ 397 if (dt <= 10) 398 return; 399 400 count = intel_uncore_read(uncore, GFXEC); 401 delta = count - ips->last_count2; 402 403 ips->last_count2 = count; 404 ips->last_time2 = now; 405 406 /* More magic constants... */ 407 ips->gfx_power = div_u64(delta * 1181, dt * 10); 408 } 409 410 static void gen5_rps_update(struct intel_rps *rps) 411 { 412 spin_lock_irq(&mchdev_lock); 413 __gen5_ips_update(&rps->ips); 414 spin_unlock_irq(&mchdev_lock); 415 } 416 417 static unsigned int gen5_invert_freq(struct intel_rps *rps, 418 unsigned int val) 419 { 420 /* Invert the frequency bin into an ips delay */ 421 val = rps->max_freq - val; 422 val = rps->min_freq + val; 423 424 return val; 425 } 426 427 static int __gen5_rps_set(struct intel_rps *rps, u8 val) 428 { 429 struct intel_uncore *uncore = rps_to_uncore(rps); 430 u16 rgvswctl; 431 432 lockdep_assert_held(&mchdev_lock); 433 434 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 435 if (rgvswctl & MEMCTL_CMD_STS) { 436 drm_dbg(&rps_to_i915(rps)->drm, 437 "gpu busy, RCS change rejected\n"); 438 return -EBUSY; /* still busy with another command */ 439 } 440 441 /* Invert the frequency bin into an ips delay */ 442 val = gen5_invert_freq(rps, val); 443 444 rgvswctl = 445 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 446 (val << MEMCTL_FREQ_SHIFT) | 447 MEMCTL_SFCAVM; 448 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 449 intel_uncore_posting_read16(uncore, MEMSWCTL); 450 451 rgvswctl |= MEMCTL_CMD_STS; 452 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 453 454 return 0; 455 } 456 457 static int gen5_rps_set(struct intel_rps *rps, u8 val) 458 { 459 int err; 460 461 spin_lock_irq(&mchdev_lock); 462 err = __gen5_rps_set(rps, val); 463 spin_unlock_irq(&mchdev_lock); 464 465 return err; 466 } 467 468 static unsigned long intel_pxfreq(u32 vidfreq) 469 { 470 int div = (vidfreq & 0x3f0000) >> 16; 471 int post = (vidfreq & 0x3000) >> 12; 472 int pre = (vidfreq & 0x7); 473 474 if (!pre) 475 return 0; 476 477 return div * 133333 / (pre << post); 478 } 479 480 static unsigned int init_emon(struct intel_uncore *uncore) 481 { 482 u8 pxw[16]; 483 int i; 484 485 /* Disable to program */ 486 intel_uncore_write(uncore, ECR, 0); 487 intel_uncore_posting_read(uncore, ECR); 488 489 /* Program energy weights for various events */ 490 intel_uncore_write(uncore, SDEW, 0x15040d00); 491 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 492 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 493 intel_uncore_write(uncore, CSIEW2, 0x04000004); 494 495 for (i = 0; i < 5; i++) 496 intel_uncore_write(uncore, PEW(i), 0); 497 for (i = 0; i < 3; i++) 498 intel_uncore_write(uncore, DEW(i), 0); 499 500 /* Program P-state weights to account for frequency power adjustment */ 501 for (i = 0; i < 16; i++) { 502 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 503 unsigned int freq = intel_pxfreq(pxvidfreq); 504 unsigned int vid = 505 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 506 unsigned int val; 507 508 val = vid * vid * freq / 1000 * 255; 509 val /= 127 * 127 * 900; 510 511 pxw[i] = val; 512 } 513 /* Render standby states get 0 weight */ 514 pxw[14] = 0; 515 pxw[15] = 0; 516 517 for (i = 0; i < 4; i++) { 518 intel_uncore_write(uncore, PXW(i), 519 pxw[i * 4 + 0] << 24 | 520 pxw[i * 4 + 1] << 16 | 521 pxw[i * 4 + 2] << 8 | 522 pxw[i * 4 + 3] << 0); 523 } 524 525 /* Adjust magic regs to magic values (more experimental results) */ 526 intel_uncore_write(uncore, OGW0, 0); 527 intel_uncore_write(uncore, OGW1, 0); 528 intel_uncore_write(uncore, EG0, 0x00007f00); 529 intel_uncore_write(uncore, EG1, 0x0000000e); 530 intel_uncore_write(uncore, EG2, 0x000e0000); 531 intel_uncore_write(uncore, EG3, 0x68000300); 532 intel_uncore_write(uncore, EG4, 0x42000000); 533 intel_uncore_write(uncore, EG5, 0x00140031); 534 intel_uncore_write(uncore, EG6, 0); 535 intel_uncore_write(uncore, EG7, 0); 536 537 for (i = 0; i < 8; i++) 538 intel_uncore_write(uncore, PXWL(i), 0); 539 540 /* Enable PMON + select events */ 541 intel_uncore_write(uncore, ECR, 0x80000019); 542 543 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 544 } 545 546 static bool gen5_rps_enable(struct intel_rps *rps) 547 { 548 struct drm_i915_private *i915 = rps_to_i915(rps); 549 struct intel_uncore *uncore = rps_to_uncore(rps); 550 u8 fstart, vstart; 551 u32 rgvmodectl; 552 553 spin_lock_irq(&mchdev_lock); 554 555 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 556 557 /* Enable temp reporting */ 558 intel_uncore_write16(uncore, PMMISC, 559 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 560 intel_uncore_write16(uncore, TSC1, 561 intel_uncore_read16(uncore, TSC1) | TSE); 562 563 /* 100ms RC evaluation intervals */ 564 intel_uncore_write(uncore, RCUPEI, 100000); 565 intel_uncore_write(uncore, RCDNEI, 100000); 566 567 /* Set max/min thresholds to 90ms and 80ms respectively */ 568 intel_uncore_write(uncore, RCBMAXAVG, 90000); 569 intel_uncore_write(uncore, RCBMINAVG, 80000); 570 571 intel_uncore_write(uncore, MEMIHYST, 1); 572 573 /* Set up min, max, and cur for interrupt handling */ 574 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 575 MEMMODE_FSTART_SHIFT; 576 577 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 578 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 579 580 intel_uncore_write(uncore, 581 MEMINTREN, 582 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 583 584 intel_uncore_write(uncore, VIDSTART, vstart); 585 intel_uncore_posting_read(uncore, VIDSTART); 586 587 rgvmodectl |= MEMMODE_SWMODE_EN; 588 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 589 590 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 591 MEMCTL_CMD_STS) == 0, 10)) 592 drm_err(&uncore->i915->drm, 593 "stuck trying to change perf mode\n"); 594 mdelay(1); 595 596 __gen5_rps_set(rps, rps->cur_freq); 597 598 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 599 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 600 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 601 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 602 603 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 604 rps->ips.last_time2 = ktime_get_raw_ns(); 605 606 spin_lock(&i915->irq_lock); 607 ilk_enable_display_irq(i915, DE_PCU_EVENT); 608 spin_unlock(&i915->irq_lock); 609 610 spin_unlock_irq(&mchdev_lock); 611 612 rps->ips.corr = init_emon(uncore); 613 614 return true; 615 } 616 617 static void gen5_rps_disable(struct intel_rps *rps) 618 { 619 struct drm_i915_private *i915 = rps_to_i915(rps); 620 struct intel_uncore *uncore = rps_to_uncore(rps); 621 u16 rgvswctl; 622 623 spin_lock_irq(&mchdev_lock); 624 625 spin_lock(&i915->irq_lock); 626 ilk_disable_display_irq(i915, DE_PCU_EVENT); 627 spin_unlock(&i915->irq_lock); 628 629 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 630 631 /* Ack interrupts, disable EFC interrupt */ 632 intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); 633 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 634 635 /* Go back to the starting frequency */ 636 __gen5_rps_set(rps, rps->idle_freq); 637 mdelay(1); 638 rgvswctl |= MEMCTL_CMD_STS; 639 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 640 mdelay(1); 641 642 spin_unlock_irq(&mchdev_lock); 643 } 644 645 static u32 rps_limits(struct intel_rps *rps, u8 val) 646 { 647 u32 limits; 648 649 /* 650 * Only set the down limit when we've reached the lowest level to avoid 651 * getting more interrupts, otherwise leave this clear. This prevents a 652 * race in the hw when coming out of rc6: There's a tiny window where 653 * the hw runs at the minimal clock before selecting the desired 654 * frequency, if the down threshold expires in that window we will not 655 * receive a down interrupt. 656 */ 657 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 658 limits = rps->max_freq_softlimit << 23; 659 if (val <= rps->min_freq_softlimit) 660 limits |= rps->min_freq_softlimit << 14; 661 } else { 662 limits = rps->max_freq_softlimit << 24; 663 if (val <= rps->min_freq_softlimit) 664 limits |= rps->min_freq_softlimit << 16; 665 } 666 667 return limits; 668 } 669 670 static void rps_set_power(struct intel_rps *rps, int new_power) 671 { 672 struct intel_gt *gt = rps_to_gt(rps); 673 struct intel_uncore *uncore = gt->uncore; 674 u32 threshold_up = 0, threshold_down = 0; /* in % */ 675 u32 ei_up = 0, ei_down = 0; 676 677 lockdep_assert_held(&rps->power.mutex); 678 679 if (new_power == rps->power.mode) 680 return; 681 682 threshold_up = 95; 683 threshold_down = 85; 684 685 /* Note the units here are not exactly 1us, but 1280ns. */ 686 switch (new_power) { 687 case LOW_POWER: 688 ei_up = 16000; 689 ei_down = 32000; 690 break; 691 692 case BETWEEN: 693 ei_up = 13000; 694 ei_down = 32000; 695 break; 696 697 case HIGH_POWER: 698 ei_up = 10000; 699 ei_down = 32000; 700 break; 701 } 702 703 /* When byt can survive without system hang with dynamic 704 * sw freq adjustments, this restriction can be lifted. 705 */ 706 if (IS_VALLEYVIEW(gt->i915)) 707 goto skip_hw_write; 708 709 GT_TRACE(gt, 710 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", 711 new_power, threshold_up, ei_up, threshold_down, ei_down); 712 713 set(uncore, GEN6_RP_UP_EI, 714 intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); 715 set(uncore, GEN6_RP_UP_THRESHOLD, 716 intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); 717 718 set(uncore, GEN6_RP_DOWN_EI, 719 intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); 720 set(uncore, GEN6_RP_DOWN_THRESHOLD, 721 intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); 722 723 set(uncore, GEN6_RP_CONTROL, 724 (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 725 GEN6_RP_MEDIA_HW_NORMAL_MODE | 726 GEN6_RP_MEDIA_IS_GFX | 727 GEN6_RP_ENABLE | 728 GEN6_RP_UP_BUSY_AVG | 729 GEN6_RP_DOWN_IDLE_AVG); 730 731 skip_hw_write: 732 rps->power.mode = new_power; 733 rps->power.up_threshold = threshold_up; 734 rps->power.down_threshold = threshold_down; 735 } 736 737 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 738 { 739 int new_power; 740 741 new_power = rps->power.mode; 742 switch (rps->power.mode) { 743 case LOW_POWER: 744 if (val > rps->efficient_freq + 1 && 745 val > rps->cur_freq) 746 new_power = BETWEEN; 747 break; 748 749 case BETWEEN: 750 if (val <= rps->efficient_freq && 751 val < rps->cur_freq) 752 new_power = LOW_POWER; 753 else if (val >= rps->rp0_freq && 754 val > rps->cur_freq) 755 new_power = HIGH_POWER; 756 break; 757 758 case HIGH_POWER: 759 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 760 val < rps->cur_freq) 761 new_power = BETWEEN; 762 break; 763 } 764 /* Max/min bins are special */ 765 if (val <= rps->min_freq_softlimit) 766 new_power = LOW_POWER; 767 if (val >= rps->max_freq_softlimit) 768 new_power = HIGH_POWER; 769 770 mutex_lock(&rps->power.mutex); 771 if (rps->power.interactive) 772 new_power = HIGH_POWER; 773 rps_set_power(rps, new_power); 774 mutex_unlock(&rps->power.mutex); 775 } 776 777 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 778 { 779 GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", 780 str_yes_no(interactive)); 781 782 mutex_lock(&rps->power.mutex); 783 if (interactive) { 784 if (!rps->power.interactive++ && intel_rps_is_active(rps)) 785 rps_set_power(rps, HIGH_POWER); 786 } else { 787 GEM_BUG_ON(!rps->power.interactive); 788 rps->power.interactive--; 789 } 790 mutex_unlock(&rps->power.mutex); 791 } 792 793 static int gen6_rps_set(struct intel_rps *rps, u8 val) 794 { 795 struct intel_uncore *uncore = rps_to_uncore(rps); 796 struct drm_i915_private *i915 = rps_to_i915(rps); 797 u32 swreq; 798 799 GEM_BUG_ON(rps_uses_slpc(rps)); 800 801 if (GRAPHICS_VER(i915) >= 9) 802 swreq = GEN9_FREQUENCY(val); 803 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 804 swreq = HSW_FREQUENCY(val); 805 else 806 swreq = (GEN6_FREQUENCY(val) | 807 GEN6_OFFSET(0) | 808 GEN6_AGGRESSIVE_TURBO); 809 set(uncore, GEN6_RPNSWREQ, swreq); 810 811 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", 812 val, intel_gpu_freq(rps, val), swreq); 813 814 return 0; 815 } 816 817 static int vlv_rps_set(struct intel_rps *rps, u8 val) 818 { 819 struct drm_i915_private *i915 = rps_to_i915(rps); 820 int err; 821 822 vlv_punit_get(i915); 823 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 824 vlv_punit_put(i915); 825 826 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", 827 val, intel_gpu_freq(rps, val)); 828 829 return err; 830 } 831 832 static int rps_set(struct intel_rps *rps, u8 val, bool update) 833 { 834 struct drm_i915_private *i915 = rps_to_i915(rps); 835 int err; 836 837 if (val == rps->last_freq) 838 return 0; 839 840 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 841 err = vlv_rps_set(rps, val); 842 else if (GRAPHICS_VER(i915) >= 6) 843 err = gen6_rps_set(rps, val); 844 else 845 err = gen5_rps_set(rps, val); 846 if (err) 847 return err; 848 849 if (update && GRAPHICS_VER(i915) >= 6) 850 gen6_rps_set_thresholds(rps, val); 851 rps->last_freq = val; 852 853 return 0; 854 } 855 856 void intel_rps_unpark(struct intel_rps *rps) 857 { 858 if (!intel_rps_is_enabled(rps)) 859 return; 860 861 GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); 862 863 /* 864 * Use the user's desired frequency as a guide, but for better 865 * performance, jump directly to RPe as our starting frequency. 866 */ 867 mutex_lock(&rps->lock); 868 869 intel_rps_set_active(rps); 870 intel_rps_set(rps, 871 clamp(rps->cur_freq, 872 rps->min_freq_softlimit, 873 rps->max_freq_softlimit)); 874 875 mutex_unlock(&rps->lock); 876 877 rps->pm_iir = 0; 878 if (intel_rps_has_interrupts(rps)) 879 rps_enable_interrupts(rps); 880 if (intel_rps_uses_timer(rps)) 881 rps_start_timer(rps); 882 883 if (GRAPHICS_VER(rps_to_i915(rps)) == 5) 884 gen5_rps_update(rps); 885 } 886 887 void intel_rps_park(struct intel_rps *rps) 888 { 889 int adj; 890 891 if (!intel_rps_is_enabled(rps)) 892 return; 893 894 if (!intel_rps_clear_active(rps)) 895 return; 896 897 if (intel_rps_uses_timer(rps)) 898 rps_stop_timer(rps); 899 if (intel_rps_has_interrupts(rps)) 900 rps_disable_interrupts(rps); 901 902 if (rps->last_freq <= rps->idle_freq) 903 return; 904 905 /* 906 * The punit delays the write of the frequency and voltage until it 907 * determines the GPU is awake. During normal usage we don't want to 908 * waste power changing the frequency if the GPU is sleeping (rc6). 909 * However, the GPU and driver is now idle and we do not want to delay 910 * switching to minimum voltage (reducing power whilst idle) as we do 911 * not expect to be woken in the near future and so must flush the 912 * change by waking the device. 913 * 914 * We choose to take the media powerwell (either would do to trick the 915 * punit into committing the voltage change) as that takes a lot less 916 * power than the render powerwell. 917 */ 918 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 919 rps_set(rps, rps->idle_freq, false); 920 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 921 922 /* 923 * Since we will try and restart from the previously requested 924 * frequency on unparking, treat this idle point as a downclock 925 * interrupt and reduce the frequency for resume. If we park/unpark 926 * more frequently than the rps worker can run, we will not respond 927 * to any EI and never see a change in frequency. 928 * 929 * (Note we accommodate Cherryview's limitation of only using an 930 * even bin by applying it to all.) 931 */ 932 adj = rps->last_adj; 933 if (adj < 0) 934 adj *= 2; 935 else /* CHV needs even encode values */ 936 adj = -2; 937 rps->last_adj = adj; 938 rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); 939 if (rps->cur_freq < rps->efficient_freq) { 940 rps->cur_freq = rps->efficient_freq; 941 rps->last_adj = 0; 942 } 943 944 GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); 945 } 946 947 u32 intel_rps_get_boost_frequency(struct intel_rps *rps) 948 { 949 struct intel_guc_slpc *slpc; 950 951 if (rps_uses_slpc(rps)) { 952 slpc = rps_to_slpc(rps); 953 954 return slpc->boost_freq; 955 } else { 956 return intel_gpu_freq(rps, rps->boost_freq); 957 } 958 } 959 960 static int rps_set_boost_freq(struct intel_rps *rps, u32 val) 961 { 962 bool boost = false; 963 964 /* Validate against (static) hardware limits */ 965 val = intel_freq_opcode(rps, val); 966 if (val < rps->min_freq || val > rps->max_freq) 967 return -EINVAL; 968 969 mutex_lock(&rps->lock); 970 if (val != rps->boost_freq) { 971 rps->boost_freq = val; 972 boost = atomic_read(&rps->num_waiters); 973 } 974 mutex_unlock(&rps->lock); 975 if (boost) 976 schedule_work(&rps->work); 977 978 return 0; 979 } 980 981 int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq) 982 { 983 struct intel_guc_slpc *slpc; 984 985 if (rps_uses_slpc(rps)) { 986 slpc = rps_to_slpc(rps); 987 988 return intel_guc_slpc_set_boost_freq(slpc, freq); 989 } else { 990 return rps_set_boost_freq(rps, freq); 991 } 992 } 993 994 void intel_rps_dec_waiters(struct intel_rps *rps) 995 { 996 struct intel_guc_slpc *slpc; 997 998 if (rps_uses_slpc(rps)) { 999 slpc = rps_to_slpc(rps); 1000 1001 intel_guc_slpc_dec_waiters(slpc); 1002 } else { 1003 atomic_dec(&rps->num_waiters); 1004 } 1005 } 1006 1007 void intel_rps_boost(struct i915_request *rq) 1008 { 1009 struct intel_guc_slpc *slpc; 1010 1011 if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) 1012 return; 1013 1014 /* Serializes with i915_request_retire() */ 1015 if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { 1016 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; 1017 1018 if (rps_uses_slpc(rps)) { 1019 slpc = rps_to_slpc(rps); 1020 1021 if (slpc->min_freq_softlimit >= slpc->boost_freq) 1022 return; 1023 1024 /* Return if old value is non zero */ 1025 if (!atomic_fetch_inc(&slpc->num_waiters)) { 1026 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1027 rq->fence.context, rq->fence.seqno); 1028 schedule_work(&slpc->boost_work); 1029 } 1030 1031 return; 1032 } 1033 1034 if (atomic_fetch_inc(&rps->num_waiters)) 1035 return; 1036 1037 if (!intel_rps_is_active(rps)) 1038 return; 1039 1040 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 1041 rq->fence.context, rq->fence.seqno); 1042 1043 if (READ_ONCE(rps->cur_freq) < rps->boost_freq) 1044 schedule_work(&rps->work); 1045 1046 WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ 1047 } 1048 } 1049 1050 int intel_rps_set(struct intel_rps *rps, u8 val) 1051 { 1052 int err; 1053 1054 lockdep_assert_held(&rps->lock); 1055 GEM_BUG_ON(val > rps->max_freq); 1056 GEM_BUG_ON(val < rps->min_freq); 1057 1058 if (intel_rps_is_active(rps)) { 1059 err = rps_set(rps, val, true); 1060 if (err) 1061 return err; 1062 1063 /* 1064 * Make sure we continue to get interrupts 1065 * until we hit the minimum or maximum frequencies. 1066 */ 1067 if (intel_rps_has_interrupts(rps)) { 1068 struct intel_uncore *uncore = rps_to_uncore(rps); 1069 1070 set(uncore, 1071 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); 1072 1073 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); 1074 } 1075 } 1076 1077 rps->cur_freq = val; 1078 return 0; 1079 } 1080 1081 static u32 intel_rps_read_state_cap(struct intel_rps *rps) 1082 { 1083 struct drm_i915_private *i915 = rps_to_i915(rps); 1084 struct intel_uncore *uncore = rps_to_uncore(rps); 1085 1086 if (IS_PONTEVECCHIO(i915)) 1087 return intel_uncore_read(uncore, PVC_RP_STATE_CAP); 1088 else if (IS_XEHPSDV(i915)) 1089 return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); 1090 else if (IS_GEN9_LP(i915)) 1091 return intel_uncore_read(uncore, BXT_RP_STATE_CAP); 1092 else 1093 return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 1094 } 1095 1096 static void 1097 mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1098 { 1099 struct intel_uncore *uncore = rps_to_uncore(rps); 1100 u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? 1101 intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : 1102 intel_uncore_read(uncore, MTL_RP_STATE_CAP); 1103 u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? 1104 intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : 1105 intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); 1106 1107 /* MTL values are in units of 16.67 MHz */ 1108 caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); 1109 caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); 1110 caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); 1111 } 1112 1113 static void 1114 __gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1115 { 1116 struct drm_i915_private *i915 = rps_to_i915(rps); 1117 u32 rp_state_cap; 1118 1119 rp_state_cap = intel_rps_read_state_cap(rps); 1120 1121 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 1122 if (IS_GEN9_LP(i915)) { 1123 caps->rp0_freq = (rp_state_cap >> 16) & 0xff; 1124 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1125 caps->min_freq = (rp_state_cap >> 0) & 0xff; 1126 } else { 1127 caps->rp0_freq = (rp_state_cap >> 0) & 0xff; 1128 if (GRAPHICS_VER(i915) >= 10) 1129 caps->rp1_freq = REG_FIELD_GET(RPE_MASK, 1130 intel_uncore_read(to_gt(i915)->uncore, 1131 GEN10_FREQ_INFO_REC)); 1132 else 1133 caps->rp1_freq = (rp_state_cap >> 8) & 0xff; 1134 caps->min_freq = (rp_state_cap >> 16) & 0xff; 1135 } 1136 1137 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1138 /* 1139 * In this case rp_state_cap register reports frequencies in 1140 * units of 50 MHz. Convert these to the actual "hw unit", i.e. 1141 * units of 16.67 MHz 1142 */ 1143 caps->rp0_freq *= GEN9_FREQ_SCALER; 1144 caps->rp1_freq *= GEN9_FREQ_SCALER; 1145 caps->min_freq *= GEN9_FREQ_SCALER; 1146 } 1147 } 1148 1149 /** 1150 * gen6_rps_get_freq_caps - Get freq caps exposed by HW 1151 * @rps: the intel_rps structure 1152 * @caps: returned freq caps 1153 * 1154 * Returned "caps" frequencies should be converted to MHz using 1155 * intel_gpu_freq() 1156 */ 1157 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) 1158 { 1159 struct drm_i915_private *i915 = rps_to_i915(rps); 1160 1161 if (IS_METEORLAKE(i915)) 1162 return mtl_get_freq_caps(rps, caps); 1163 else 1164 return __gen6_rps_get_freq_caps(rps, caps); 1165 } 1166 1167 static void gen6_rps_init(struct intel_rps *rps) 1168 { 1169 struct drm_i915_private *i915 = rps_to_i915(rps); 1170 struct intel_rps_freq_caps caps; 1171 1172 gen6_rps_get_freq_caps(rps, &caps); 1173 rps->rp0_freq = caps.rp0_freq; 1174 rps->rp1_freq = caps.rp1_freq; 1175 rps->min_freq = caps.min_freq; 1176 1177 /* hw_max = RP0 until we check for overclocking */ 1178 rps->max_freq = rps->rp0_freq; 1179 1180 rps->efficient_freq = rps->rp1_freq; 1181 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 1182 IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { 1183 u32 ddcc_status = 0; 1184 u32 mult = 1; 1185 1186 if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) 1187 mult = GEN9_FREQ_SCALER; 1188 if (snb_pcode_read(rps_to_gt(rps)->uncore, 1189 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 1190 &ddcc_status, NULL) == 0) 1191 rps->efficient_freq = 1192 clamp_t(u32, 1193 ((ddcc_status >> 8) & 0xff) * mult, 1194 rps->min_freq, 1195 rps->max_freq); 1196 } 1197 } 1198 1199 static bool rps_reset(struct intel_rps *rps) 1200 { 1201 struct drm_i915_private *i915 = rps_to_i915(rps); 1202 1203 /* force a reset */ 1204 rps->power.mode = -1; 1205 rps->last_freq = -1; 1206 1207 if (rps_set(rps, rps->min_freq, true)) { 1208 drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); 1209 return false; 1210 } 1211 1212 rps->cur_freq = rps->min_freq; 1213 return true; 1214 } 1215 1216 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 1217 static bool gen9_rps_enable(struct intel_rps *rps) 1218 { 1219 struct intel_gt *gt = rps_to_gt(rps); 1220 struct intel_uncore *uncore = gt->uncore; 1221 1222 /* Program defaults and thresholds for RPS */ 1223 if (GRAPHICS_VER(gt->i915) == 9) 1224 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1225 GEN9_FREQUENCY(rps->rp1_freq)); 1226 1227 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 1228 1229 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1230 1231 return rps_reset(rps); 1232 } 1233 1234 static bool gen8_rps_enable(struct intel_rps *rps) 1235 { 1236 struct intel_uncore *uncore = rps_to_uncore(rps); 1237 1238 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1239 HSW_FREQUENCY(rps->rp1_freq)); 1240 1241 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1242 1243 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1244 1245 return rps_reset(rps); 1246 } 1247 1248 static bool gen6_rps_enable(struct intel_rps *rps) 1249 { 1250 struct intel_uncore *uncore = rps_to_uncore(rps); 1251 1252 /* Power down if completely idle for over 50ms */ 1253 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 1254 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1255 1256 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1257 GEN6_PM_RP_DOWN_THRESHOLD | 1258 GEN6_PM_RP_DOWN_TIMEOUT); 1259 1260 return rps_reset(rps); 1261 } 1262 1263 static int chv_rps_max_freq(struct intel_rps *rps) 1264 { 1265 struct drm_i915_private *i915 = rps_to_i915(rps); 1266 struct intel_gt *gt = rps_to_gt(rps); 1267 u32 val; 1268 1269 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1270 1271 switch (gt->info.sseu.eu_total) { 1272 case 8: 1273 /* (2 * 4) config */ 1274 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 1275 break; 1276 case 12: 1277 /* (2 * 6) config */ 1278 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 1279 break; 1280 case 16: 1281 /* (2 * 8) config */ 1282 default: 1283 /* Setting (2 * 8) Min RP0 for any other combination */ 1284 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 1285 break; 1286 } 1287 1288 return val & FB_GFX_FREQ_FUSE_MASK; 1289 } 1290 1291 static int chv_rps_rpe_freq(struct intel_rps *rps) 1292 { 1293 struct drm_i915_private *i915 = rps_to_i915(rps); 1294 u32 val; 1295 1296 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 1297 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 1298 1299 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 1300 } 1301 1302 static int chv_rps_guar_freq(struct intel_rps *rps) 1303 { 1304 struct drm_i915_private *i915 = rps_to_i915(rps); 1305 u32 val; 1306 1307 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1308 1309 return val & FB_GFX_FREQ_FUSE_MASK; 1310 } 1311 1312 static u32 chv_rps_min_freq(struct intel_rps *rps) 1313 { 1314 struct drm_i915_private *i915 = rps_to_i915(rps); 1315 u32 val; 1316 1317 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 1318 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 1319 1320 return val & FB_GFX_FREQ_FUSE_MASK; 1321 } 1322 1323 static bool chv_rps_enable(struct intel_rps *rps) 1324 { 1325 struct intel_uncore *uncore = rps_to_uncore(rps); 1326 struct drm_i915_private *i915 = rps_to_i915(rps); 1327 u32 val; 1328 1329 /* 1: Program defaults and thresholds for RPS*/ 1330 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1331 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1332 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1333 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1334 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1335 1336 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1337 1338 /* 2: Enable RPS */ 1339 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1340 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1341 GEN6_RP_MEDIA_IS_GFX | 1342 GEN6_RP_ENABLE | 1343 GEN6_RP_UP_BUSY_AVG | 1344 GEN6_RP_DOWN_IDLE_AVG); 1345 1346 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1347 GEN6_PM_RP_DOWN_THRESHOLD | 1348 GEN6_PM_RP_DOWN_TIMEOUT); 1349 1350 /* Setting Fixed Bias */ 1351 vlv_punit_get(i915); 1352 1353 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1354 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1355 1356 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1357 1358 vlv_punit_put(i915); 1359 1360 /* RPS code assumes GPLL is used */ 1361 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1362 "GPLL not enabled\n"); 1363 1364 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1365 str_yes_no(val & GPLLENABLE)); 1366 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1367 1368 return rps_reset(rps); 1369 } 1370 1371 static int vlv_rps_guar_freq(struct intel_rps *rps) 1372 { 1373 struct drm_i915_private *i915 = rps_to_i915(rps); 1374 u32 val, rp1; 1375 1376 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1377 1378 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1379 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1380 1381 return rp1; 1382 } 1383 1384 static int vlv_rps_max_freq(struct intel_rps *rps) 1385 { 1386 struct drm_i915_private *i915 = rps_to_i915(rps); 1387 u32 val, rp0; 1388 1389 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1390 1391 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1392 /* Clamp to max */ 1393 rp0 = min_t(u32, rp0, 0xea); 1394 1395 return rp0; 1396 } 1397 1398 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1399 { 1400 struct drm_i915_private *i915 = rps_to_i915(rps); 1401 u32 val, rpe; 1402 1403 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1404 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1405 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1406 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1407 1408 return rpe; 1409 } 1410 1411 static int vlv_rps_min_freq(struct intel_rps *rps) 1412 { 1413 struct drm_i915_private *i915 = rps_to_i915(rps); 1414 u32 val; 1415 1416 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1417 /* 1418 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1419 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1420 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1421 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1422 * to make sure it matches what Punit accepts. 1423 */ 1424 return max_t(u32, val, 0xc0); 1425 } 1426 1427 static bool vlv_rps_enable(struct intel_rps *rps) 1428 { 1429 struct intel_uncore *uncore = rps_to_uncore(rps); 1430 struct drm_i915_private *i915 = rps_to_i915(rps); 1431 u32 val; 1432 1433 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1434 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1435 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1436 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1437 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1438 1439 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1440 1441 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1442 GEN6_RP_MEDIA_TURBO | 1443 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1444 GEN6_RP_MEDIA_IS_GFX | 1445 GEN6_RP_ENABLE | 1446 GEN6_RP_UP_BUSY_AVG | 1447 GEN6_RP_DOWN_IDLE_CONT); 1448 1449 /* WaGsvRC0ResidencyMethod:vlv */ 1450 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 1451 1452 vlv_punit_get(i915); 1453 1454 /* Setting Fixed Bias */ 1455 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1456 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1457 1458 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1459 1460 vlv_punit_put(i915); 1461 1462 /* RPS code assumes GPLL is used */ 1463 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1464 "GPLL not enabled\n"); 1465 1466 drm_dbg(&i915->drm, "GPLL enabled? %s\n", 1467 str_yes_no(val & GPLLENABLE)); 1468 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1469 1470 return rps_reset(rps); 1471 } 1472 1473 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1474 { 1475 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1476 struct intel_uncore *uncore = rps_to_uncore(rps); 1477 unsigned int t, state1, state2; 1478 u32 pxvid, ext_v; 1479 u64 corr, corr2; 1480 1481 lockdep_assert_held(&mchdev_lock); 1482 1483 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1484 pxvid = (pxvid >> 24) & 0x7f; 1485 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1486 1487 state1 = ext_v; 1488 1489 /* Revel in the empirically derived constants */ 1490 1491 /* Correction factor in 1/100000 units */ 1492 t = ips_mch_val(uncore); 1493 if (t > 80) 1494 corr = t * 2349 + 135940; 1495 else if (t >= 50) 1496 corr = t * 964 + 29317; 1497 else /* < 50 */ 1498 corr = t * 301 + 1004; 1499 1500 corr = div_u64(corr * 150142 * state1, 10000) - 78642; 1501 corr2 = div_u64(corr, 100000) * ips->corr; 1502 1503 state2 = div_u64(corr2 * state1, 10000); 1504 state2 /= 100; /* convert to mW */ 1505 1506 __gen5_ips_update(ips); 1507 1508 return ips->gfx_power + state2; 1509 } 1510 1511 static bool has_busy_stats(struct intel_rps *rps) 1512 { 1513 struct intel_engine_cs *engine; 1514 enum intel_engine_id id; 1515 1516 for_each_engine(engine, rps_to_gt(rps), id) { 1517 if (!intel_engine_supports_stats(engine)) 1518 return false; 1519 } 1520 1521 return true; 1522 } 1523 1524 void intel_rps_enable(struct intel_rps *rps) 1525 { 1526 struct drm_i915_private *i915 = rps_to_i915(rps); 1527 struct intel_uncore *uncore = rps_to_uncore(rps); 1528 bool enabled = false; 1529 1530 if (!HAS_RPS(i915)) 1531 return; 1532 1533 if (rps_uses_slpc(rps)) 1534 return; 1535 1536 intel_gt_check_clock_frequency(rps_to_gt(rps)); 1537 1538 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1539 if (rps->max_freq <= rps->min_freq) 1540 /* leave disabled, no room for dynamic reclocking */; 1541 else if (IS_CHERRYVIEW(i915)) 1542 enabled = chv_rps_enable(rps); 1543 else if (IS_VALLEYVIEW(i915)) 1544 enabled = vlv_rps_enable(rps); 1545 else if (GRAPHICS_VER(i915) >= 9) 1546 enabled = gen9_rps_enable(rps); 1547 else if (GRAPHICS_VER(i915) >= 8) 1548 enabled = gen8_rps_enable(rps); 1549 else if (GRAPHICS_VER(i915) >= 6) 1550 enabled = gen6_rps_enable(rps); 1551 else if (IS_IRONLAKE_M(i915)) 1552 enabled = gen5_rps_enable(rps); 1553 else 1554 MISSING_CASE(GRAPHICS_VER(i915)); 1555 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1556 if (!enabled) 1557 return; 1558 1559 GT_TRACE(rps_to_gt(rps), 1560 "min:%x, max:%x, freq:[%d, %d]\n", 1561 rps->min_freq, rps->max_freq, 1562 intel_gpu_freq(rps, rps->min_freq), 1563 intel_gpu_freq(rps, rps->max_freq)); 1564 1565 GEM_BUG_ON(rps->max_freq < rps->min_freq); 1566 GEM_BUG_ON(rps->idle_freq > rps->max_freq); 1567 1568 GEM_BUG_ON(rps->efficient_freq < rps->min_freq); 1569 GEM_BUG_ON(rps->efficient_freq > rps->max_freq); 1570 1571 if (has_busy_stats(rps)) 1572 intel_rps_set_timer(rps); 1573 else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11) 1574 intel_rps_set_interrupts(rps); 1575 else 1576 /* Ironlake currently uses intel_ips.ko */ {} 1577 1578 intel_rps_set_enabled(rps); 1579 } 1580 1581 static void gen6_rps_disable(struct intel_rps *rps) 1582 { 1583 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1584 } 1585 1586 void intel_rps_disable(struct intel_rps *rps) 1587 { 1588 struct drm_i915_private *i915 = rps_to_i915(rps); 1589 1590 if (!intel_rps_is_enabled(rps)) 1591 return; 1592 1593 intel_rps_clear_enabled(rps); 1594 intel_rps_clear_interrupts(rps); 1595 intel_rps_clear_timer(rps); 1596 1597 if (GRAPHICS_VER(i915) >= 6) 1598 gen6_rps_disable(rps); 1599 else if (IS_IRONLAKE_M(i915)) 1600 gen5_rps_disable(rps); 1601 } 1602 1603 static int byt_gpu_freq(struct intel_rps *rps, int val) 1604 { 1605 /* 1606 * N = val - 0xb7 1607 * Slow = Fast = GPLL ref * N 1608 */ 1609 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1610 } 1611 1612 static int byt_freq_opcode(struct intel_rps *rps, int val) 1613 { 1614 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1615 } 1616 1617 static int chv_gpu_freq(struct intel_rps *rps, int val) 1618 { 1619 /* 1620 * N = val / 2 1621 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1622 */ 1623 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1624 } 1625 1626 static int chv_freq_opcode(struct intel_rps *rps, int val) 1627 { 1628 /* CHV needs even values */ 1629 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1630 } 1631 1632 int intel_gpu_freq(struct intel_rps *rps, int val) 1633 { 1634 struct drm_i915_private *i915 = rps_to_i915(rps); 1635 1636 if (GRAPHICS_VER(i915) >= 9) 1637 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1638 GEN9_FREQ_SCALER); 1639 else if (IS_CHERRYVIEW(i915)) 1640 return chv_gpu_freq(rps, val); 1641 else if (IS_VALLEYVIEW(i915)) 1642 return byt_gpu_freq(rps, val); 1643 else if (GRAPHICS_VER(i915) >= 6) 1644 return val * GT_FREQUENCY_MULTIPLIER; 1645 else 1646 return val; 1647 } 1648 1649 int intel_freq_opcode(struct intel_rps *rps, int val) 1650 { 1651 struct drm_i915_private *i915 = rps_to_i915(rps); 1652 1653 if (GRAPHICS_VER(i915) >= 9) 1654 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1655 GT_FREQUENCY_MULTIPLIER); 1656 else if (IS_CHERRYVIEW(i915)) 1657 return chv_freq_opcode(rps, val); 1658 else if (IS_VALLEYVIEW(i915)) 1659 return byt_freq_opcode(rps, val); 1660 else if (GRAPHICS_VER(i915) >= 6) 1661 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1662 else 1663 return val; 1664 } 1665 1666 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1667 { 1668 struct drm_i915_private *i915 = rps_to_i915(rps); 1669 1670 rps->gpll_ref_freq = 1671 vlv_get_cck_clock(i915, "GPLL ref", 1672 CCK_GPLL_CLOCK_CONTROL, 1673 i915->czclk_freq); 1674 1675 drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", 1676 rps->gpll_ref_freq); 1677 } 1678 1679 static void vlv_rps_init(struct intel_rps *rps) 1680 { 1681 struct drm_i915_private *i915 = rps_to_i915(rps); 1682 1683 vlv_iosf_sb_get(i915, 1684 BIT(VLV_IOSF_SB_PUNIT) | 1685 BIT(VLV_IOSF_SB_NC) | 1686 BIT(VLV_IOSF_SB_CCK)); 1687 1688 vlv_init_gpll_ref_freq(rps); 1689 1690 rps->max_freq = vlv_rps_max_freq(rps); 1691 rps->rp0_freq = rps->max_freq; 1692 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1693 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1694 1695 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1696 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1697 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1698 1699 rps->rp1_freq = vlv_rps_guar_freq(rps); 1700 drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1701 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1702 1703 rps->min_freq = vlv_rps_min_freq(rps); 1704 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1705 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1706 1707 vlv_iosf_sb_put(i915, 1708 BIT(VLV_IOSF_SB_PUNIT) | 1709 BIT(VLV_IOSF_SB_NC) | 1710 BIT(VLV_IOSF_SB_CCK)); 1711 } 1712 1713 static void chv_rps_init(struct intel_rps *rps) 1714 { 1715 struct drm_i915_private *i915 = rps_to_i915(rps); 1716 1717 vlv_iosf_sb_get(i915, 1718 BIT(VLV_IOSF_SB_PUNIT) | 1719 BIT(VLV_IOSF_SB_NC) | 1720 BIT(VLV_IOSF_SB_CCK)); 1721 1722 vlv_init_gpll_ref_freq(rps); 1723 1724 rps->max_freq = chv_rps_max_freq(rps); 1725 rps->rp0_freq = rps->max_freq; 1726 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1727 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1728 1729 rps->efficient_freq = chv_rps_rpe_freq(rps); 1730 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1731 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1732 1733 rps->rp1_freq = chv_rps_guar_freq(rps); 1734 drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", 1735 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1736 1737 rps->min_freq = chv_rps_min_freq(rps); 1738 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1739 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1740 1741 vlv_iosf_sb_put(i915, 1742 BIT(VLV_IOSF_SB_PUNIT) | 1743 BIT(VLV_IOSF_SB_NC) | 1744 BIT(VLV_IOSF_SB_CCK)); 1745 1746 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | 1747 rps->rp1_freq | rps->min_freq) & 1, 1748 "Odd GPU freq values\n"); 1749 } 1750 1751 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1752 { 1753 ei->ktime = ktime_get_raw(); 1754 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1755 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1756 } 1757 1758 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1759 { 1760 struct intel_uncore *uncore = rps_to_uncore(rps); 1761 const struct intel_rps_ei *prev = &rps->ei; 1762 struct intel_rps_ei now; 1763 u32 events = 0; 1764 1765 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1766 return 0; 1767 1768 vlv_c0_read(uncore, &now); 1769 1770 if (prev->ktime) { 1771 u64 time, c0; 1772 u32 render, media; 1773 1774 time = ktime_us_delta(now.ktime, prev->ktime); 1775 1776 time *= rps_to_i915(rps)->czclk_freq; 1777 1778 /* Workload can be split between render + media, 1779 * e.g. SwapBuffers being blitted in X after being rendered in 1780 * mesa. To account for this we need to combine both engines 1781 * into our activity counter. 1782 */ 1783 render = now.render_c0 - prev->render_c0; 1784 media = now.media_c0 - prev->media_c0; 1785 c0 = max(render, media); 1786 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1787 1788 if (c0 > time * rps->power.up_threshold) 1789 events = GEN6_PM_RP_UP_THRESHOLD; 1790 else if (c0 < time * rps->power.down_threshold) 1791 events = GEN6_PM_RP_DOWN_THRESHOLD; 1792 } 1793 1794 rps->ei = now; 1795 return events; 1796 } 1797 1798 static void rps_work(struct work_struct *work) 1799 { 1800 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1801 struct intel_gt *gt = rps_to_gt(rps); 1802 struct drm_i915_private *i915 = rps_to_i915(rps); 1803 bool client_boost = false; 1804 int new_freq, adj, min, max; 1805 u32 pm_iir = 0; 1806 1807 spin_lock_irq(gt->irq_lock); 1808 pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; 1809 client_boost = atomic_read(&rps->num_waiters); 1810 spin_unlock_irq(gt->irq_lock); 1811 1812 /* Make sure we didn't queue anything we're not going to process. */ 1813 if (!pm_iir && !client_boost) 1814 goto out; 1815 1816 mutex_lock(&rps->lock); 1817 if (!intel_rps_is_active(rps)) { 1818 mutex_unlock(&rps->lock); 1819 return; 1820 } 1821 1822 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1823 1824 adj = rps->last_adj; 1825 new_freq = rps->cur_freq; 1826 min = rps->min_freq_softlimit; 1827 max = rps->max_freq_softlimit; 1828 if (client_boost) 1829 max = rps->max_freq; 1830 1831 GT_TRACE(gt, 1832 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", 1833 pm_iir, str_yes_no(client_boost), 1834 adj, new_freq, min, max); 1835 1836 if (client_boost && new_freq < rps->boost_freq) { 1837 new_freq = rps->boost_freq; 1838 adj = 0; 1839 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1840 if (adj > 0) 1841 adj *= 2; 1842 else /* CHV needs even encode values */ 1843 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1844 1845 if (new_freq >= rps->max_freq_softlimit) 1846 adj = 0; 1847 } else if (client_boost) { 1848 adj = 0; 1849 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1850 if (rps->cur_freq > rps->efficient_freq) 1851 new_freq = rps->efficient_freq; 1852 else if (rps->cur_freq > rps->min_freq_softlimit) 1853 new_freq = rps->min_freq_softlimit; 1854 adj = 0; 1855 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1856 if (adj < 0) 1857 adj *= 2; 1858 else /* CHV needs even encode values */ 1859 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1860 1861 if (new_freq <= rps->min_freq_softlimit) 1862 adj = 0; 1863 } else { /* unknown event */ 1864 adj = 0; 1865 } 1866 1867 /* 1868 * sysfs frequency limits may have snuck in while 1869 * servicing the interrupt 1870 */ 1871 new_freq += adj; 1872 new_freq = clamp_t(int, new_freq, min, max); 1873 1874 if (intel_rps_set(rps, new_freq)) { 1875 drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); 1876 adj = 0; 1877 } 1878 rps->last_adj = adj; 1879 1880 mutex_unlock(&rps->lock); 1881 1882 out: 1883 spin_lock_irq(gt->irq_lock); 1884 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1885 spin_unlock_irq(gt->irq_lock); 1886 } 1887 1888 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1889 { 1890 struct intel_gt *gt = rps_to_gt(rps); 1891 const u32 events = rps->pm_events & pm_iir; 1892 1893 lockdep_assert_held(gt->irq_lock); 1894 1895 if (unlikely(!events)) 1896 return; 1897 1898 GT_TRACE(gt, "irq events:%x\n", events); 1899 1900 gen6_gt_pm_mask_irq(gt, events); 1901 1902 rps->pm_iir |= events; 1903 schedule_work(&rps->work); 1904 } 1905 1906 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1907 { 1908 struct intel_gt *gt = rps_to_gt(rps); 1909 u32 events; 1910 1911 events = pm_iir & rps->pm_events; 1912 if (events) { 1913 spin_lock(gt->irq_lock); 1914 1915 GT_TRACE(gt, "irq events:%x\n", events); 1916 1917 gen6_gt_pm_mask_irq(gt, events); 1918 rps->pm_iir |= events; 1919 1920 schedule_work(&rps->work); 1921 spin_unlock(gt->irq_lock); 1922 } 1923 1924 if (GRAPHICS_VER(gt->i915) >= 8) 1925 return; 1926 1927 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1928 intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10); 1929 1930 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1931 drm_dbg(&rps_to_i915(rps)->drm, 1932 "Command parser error, pm_iir 0x%08x\n", pm_iir); 1933 } 1934 1935 void gen5_rps_irq_handler(struct intel_rps *rps) 1936 { 1937 struct intel_uncore *uncore = rps_to_uncore(rps); 1938 u32 busy_up, busy_down, max_avg, min_avg; 1939 u8 new_freq; 1940 1941 spin_lock(&mchdev_lock); 1942 1943 intel_uncore_write16(uncore, 1944 MEMINTRSTS, 1945 intel_uncore_read(uncore, MEMINTRSTS)); 1946 1947 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1948 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1949 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1950 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1951 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1952 1953 /* Handle RCS change request from hw */ 1954 new_freq = rps->cur_freq; 1955 if (busy_up > max_avg) 1956 new_freq++; 1957 else if (busy_down < min_avg) 1958 new_freq--; 1959 new_freq = clamp(new_freq, 1960 rps->min_freq_softlimit, 1961 rps->max_freq_softlimit); 1962 1963 if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) 1964 rps->cur_freq = new_freq; 1965 1966 spin_unlock(&mchdev_lock); 1967 } 1968 1969 void intel_rps_init_early(struct intel_rps *rps) 1970 { 1971 mutex_init(&rps->lock); 1972 mutex_init(&rps->power.mutex); 1973 1974 INIT_WORK(&rps->work, rps_work); 1975 timer_setup(&rps->timer, rps_timer, 0); 1976 1977 atomic_set(&rps->num_waiters, 0); 1978 } 1979 1980 void intel_rps_init(struct intel_rps *rps) 1981 { 1982 struct drm_i915_private *i915 = rps_to_i915(rps); 1983 1984 if (rps_uses_slpc(rps)) 1985 return; 1986 1987 if (IS_CHERRYVIEW(i915)) 1988 chv_rps_init(rps); 1989 else if (IS_VALLEYVIEW(i915)) 1990 vlv_rps_init(rps); 1991 else if (GRAPHICS_VER(i915) >= 6) 1992 gen6_rps_init(rps); 1993 else if (IS_IRONLAKE_M(i915)) 1994 gen5_rps_init(rps); 1995 1996 /* Derive initial user preferences/limits from the hardware limits */ 1997 rps->max_freq_softlimit = rps->max_freq; 1998 rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; 1999 rps->min_freq_softlimit = rps->min_freq; 2000 rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; 2001 2002 /* After setting max-softlimit, find the overclock max freq */ 2003 if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 2004 u32 params = 0; 2005 2006 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, ¶ms, NULL); 2007 if (params & BIT(31)) { /* OC supported */ 2008 drm_dbg(&i915->drm, 2009 "Overclocking supported, max: %dMHz, overclock: %dMHz\n", 2010 (rps->max_freq & 0xff) * 50, 2011 (params & 0xff) * 50); 2012 rps->max_freq = params & 0xff; 2013 } 2014 } 2015 2016 /* Finally allow us to boost to max by default */ 2017 rps->boost_freq = rps->max_freq; 2018 rps->idle_freq = rps->min_freq; 2019 2020 /* Start in the middle, from here we will autotune based on workload */ 2021 rps->cur_freq = rps->efficient_freq; 2022 2023 rps->pm_intrmsk_mbz = 0; 2024 2025 /* 2026 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 2027 * if GEN6_PM_UP_EI_EXPIRED is masked. 2028 * 2029 * TODO: verify if this can be reproduced on VLV,CHV. 2030 */ 2031 if (GRAPHICS_VER(i915) <= 7) 2032 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 2033 2034 if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) 2035 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 2036 2037 /* GuC needs ARAT expired interrupt unmasked */ 2038 if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) 2039 rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; 2040 } 2041 2042 void intel_rps_sanitize(struct intel_rps *rps) 2043 { 2044 if (rps_uses_slpc(rps)) 2045 return; 2046 2047 if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) 2048 rps_disable_interrupts(rps); 2049 } 2050 2051 u32 intel_rps_read_rpstat(struct intel_rps *rps) 2052 { 2053 struct drm_i915_private *i915 = rps_to_i915(rps); 2054 i915_reg_t rpstat; 2055 2056 rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; 2057 2058 return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); 2059 } 2060 2061 static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) 2062 { 2063 struct drm_i915_private *i915 = rps_to_i915(rps); 2064 u32 cagf; 2065 2066 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 2067 cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); 2068 else if (GRAPHICS_VER(i915) >= 12) 2069 cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); 2070 else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 2071 cagf = REG_FIELD_GET(RPE_MASK, rpstat); 2072 else if (GRAPHICS_VER(i915) >= 9) 2073 cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); 2074 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2075 cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); 2076 else if (GRAPHICS_VER(i915) >= 6) 2077 cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); 2078 else 2079 cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); 2080 2081 return cagf; 2082 } 2083 2084 static u32 __read_cagf(struct intel_rps *rps, bool take_fw) 2085 { 2086 struct drm_i915_private *i915 = rps_to_i915(rps); 2087 struct intel_uncore *uncore = rps_to_uncore(rps); 2088 i915_reg_t r = INVALID_MMIO_REG; 2089 u32 freq; 2090 2091 /* 2092 * For Gen12+ reading freq from HW does not need a forcewake and 2093 * registers will return 0 freq when GT is in RC6 2094 */ 2095 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { 2096 r = MTL_MIRROR_TARGET_WP1; 2097 } else if (GRAPHICS_VER(i915) >= 12) { 2098 r = GEN12_RPSTAT1; 2099 } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 2100 vlv_punit_get(i915); 2101 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 2102 vlv_punit_put(i915); 2103 } else if (GRAPHICS_VER(i915) >= 6) { 2104 r = GEN6_RPSTAT1; 2105 } else { 2106 r = MEMSTAT_ILK; 2107 } 2108 2109 if (i915_mmio_reg_valid(r)) 2110 freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); 2111 2112 return intel_rps_get_cagf(rps, freq); 2113 } 2114 2115 static u32 read_cagf(struct intel_rps *rps) 2116 { 2117 return __read_cagf(rps, true); 2118 } 2119 2120 u32 intel_rps_read_actual_frequency(struct intel_rps *rps) 2121 { 2122 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2123 intel_wakeref_t wakeref; 2124 u32 freq = 0; 2125 2126 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2127 freq = intel_gpu_freq(rps, read_cagf(rps)); 2128 2129 return freq; 2130 } 2131 2132 u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) 2133 { 2134 return intel_gpu_freq(rps, __read_cagf(rps, false)); 2135 } 2136 2137 static u32 intel_rps_read_punit_req(struct intel_rps *rps) 2138 { 2139 struct intel_uncore *uncore = rps_to_uncore(rps); 2140 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 2141 intel_wakeref_t wakeref; 2142 u32 freq = 0; 2143 2144 with_intel_runtime_pm_if_in_use(rpm, wakeref) 2145 freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2146 2147 return freq; 2148 } 2149 2150 static u32 intel_rps_get_req(u32 pureq) 2151 { 2152 u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; 2153 2154 return req; 2155 } 2156 2157 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) 2158 { 2159 u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); 2160 2161 return intel_gpu_freq(rps, freq); 2162 } 2163 2164 u32 intel_rps_get_requested_frequency(struct intel_rps *rps) 2165 { 2166 if (rps_uses_slpc(rps)) 2167 return intel_rps_read_punit_req_frequency(rps); 2168 else 2169 return intel_gpu_freq(rps, rps->cur_freq); 2170 } 2171 2172 u32 intel_rps_get_max_frequency(struct intel_rps *rps) 2173 { 2174 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2175 2176 if (rps_uses_slpc(rps)) 2177 return slpc->max_freq_softlimit; 2178 else 2179 return intel_gpu_freq(rps, rps->max_freq_softlimit); 2180 } 2181 2182 /** 2183 * intel_rps_get_max_raw_freq - returns the max frequency in some raw format. 2184 * @rps: the intel_rps structure 2185 * 2186 * Returns the max frequency in a raw format. In newer platforms raw is in 2187 * units of 50 MHz. 2188 */ 2189 u32 intel_rps_get_max_raw_freq(struct intel_rps *rps) 2190 { 2191 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2192 u32 freq; 2193 2194 if (rps_uses_slpc(rps)) { 2195 return DIV_ROUND_CLOSEST(slpc->rp0_freq, 2196 GT_FREQUENCY_MULTIPLIER); 2197 } else { 2198 freq = rps->max_freq; 2199 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2200 /* Convert GT frequency to 50 MHz units */ 2201 freq /= GEN9_FREQ_SCALER; 2202 } 2203 return freq; 2204 } 2205 } 2206 2207 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) 2208 { 2209 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2210 2211 if (rps_uses_slpc(rps)) 2212 return slpc->rp0_freq; 2213 else 2214 return intel_gpu_freq(rps, rps->rp0_freq); 2215 } 2216 2217 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) 2218 { 2219 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2220 2221 if (rps_uses_slpc(rps)) 2222 return slpc->rp1_freq; 2223 else 2224 return intel_gpu_freq(rps, rps->rp1_freq); 2225 } 2226 2227 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) 2228 { 2229 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2230 2231 if (rps_uses_slpc(rps)) 2232 return slpc->min_freq; 2233 else 2234 return intel_gpu_freq(rps, rps->min_freq); 2235 } 2236 2237 static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2238 { 2239 struct intel_gt *gt = rps_to_gt(rps); 2240 struct drm_i915_private *i915 = gt->i915; 2241 struct intel_uncore *uncore = gt->uncore; 2242 struct intel_rps_freq_caps caps; 2243 u32 rp_state_limits; 2244 u32 gt_perf_status; 2245 u32 rpmodectl, rpinclimit, rpdeclimit; 2246 u32 rpstat, cagf, reqf; 2247 u32 rpcurupei, rpcurup, rpprevup; 2248 u32 rpcurdownei, rpcurdown, rpprevdown; 2249 u32 rpupei, rpupt, rpdownei, rpdownt; 2250 u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; 2251 2252 rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); 2253 gen6_rps_get_freq_caps(rps, &caps); 2254 if (IS_GEN9_LP(i915)) 2255 gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); 2256 else 2257 gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); 2258 2259 /* RPSTAT1 is in the GT power well */ 2260 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 2261 2262 reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); 2263 if (GRAPHICS_VER(i915) >= 9) { 2264 reqf >>= 23; 2265 } else { 2266 reqf &= ~GEN6_TURBO_DISABLE; 2267 if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 2268 reqf >>= 24; 2269 else 2270 reqf >>= 25; 2271 } 2272 reqf = intel_gpu_freq(rps, reqf); 2273 2274 rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); 2275 rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2276 rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2277 2278 rpstat = intel_rps_read_rpstat(rps); 2279 rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; 2280 rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; 2281 rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; 2282 rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; 2283 rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; 2284 rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; 2285 2286 rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); 2287 rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); 2288 2289 rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 2290 rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); 2291 2292 cagf = intel_rps_read_actual_frequency(rps); 2293 2294 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 2295 2296 if (GRAPHICS_VER(i915) >= 11) { 2297 pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); 2298 pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); 2299 /* 2300 * The equivalent to the PM ISR & IIR cannot be read 2301 * without affecting the current state of the system 2302 */ 2303 pm_isr = 0; 2304 pm_iir = 0; 2305 } else if (GRAPHICS_VER(i915) >= 8) { 2306 pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); 2307 pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); 2308 pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); 2309 pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); 2310 } else { 2311 pm_ier = intel_uncore_read(uncore, GEN6_PMIER); 2312 pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); 2313 pm_isr = intel_uncore_read(uncore, GEN6_PMISR); 2314 pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); 2315 } 2316 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2317 2318 drm_printf(p, "Video Turbo Mode: %s\n", 2319 str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); 2320 drm_printf(p, "HW control enabled: %s\n", 2321 str_yes_no(rpmodectl & GEN6_RP_ENABLE)); 2322 drm_printf(p, "SW control enabled: %s\n", 2323 str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); 2324 2325 drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", 2326 pm_ier, pm_imr, pm_mask); 2327 if (GRAPHICS_VER(i915) <= 10) 2328 drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", 2329 pm_isr, pm_iir); 2330 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2331 rps->pm_intrmsk_mbz); 2332 drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 2333 drm_printf(p, "Render p-state ratio: %d\n", 2334 (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); 2335 drm_printf(p, "Render p-state VID: %d\n", 2336 gt_perf_status & 0xff); 2337 drm_printf(p, "Render p-state limit: %d\n", 2338 rp_state_limits & 0xff); 2339 drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); 2340 drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); 2341 drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); 2342 drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); 2343 drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); 2344 drm_printf(p, "CAGF: %dMHz\n", cagf); 2345 drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", 2346 rpcurupei, 2347 intel_gt_pm_interval_to_ns(gt, rpcurupei)); 2348 drm_printf(p, "RP CUR UP: %d (%lldns)\n", 2349 rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); 2350 drm_printf(p, "RP PREV UP: %d (%lldns)\n", 2351 rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); 2352 drm_printf(p, "Up threshold: %d%%\n", 2353 rps->power.up_threshold); 2354 drm_printf(p, "RP UP EI: %d (%lldns)\n", 2355 rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); 2356 drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", 2357 rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); 2358 2359 drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", 2360 rpcurdownei, 2361 intel_gt_pm_interval_to_ns(gt, rpcurdownei)); 2362 drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", 2363 rpcurdown, 2364 intel_gt_pm_interval_to_ns(gt, rpcurdown)); 2365 drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", 2366 rpprevdown, 2367 intel_gt_pm_interval_to_ns(gt, rpprevdown)); 2368 drm_printf(p, "Down threshold: %d%%\n", 2369 rps->power.down_threshold); 2370 drm_printf(p, "RP DOWN EI: %d (%lldns)\n", 2371 rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); 2372 drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", 2373 rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); 2374 2375 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2376 intel_gpu_freq(rps, caps.min_freq)); 2377 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2378 intel_gpu_freq(rps, caps.rp1_freq)); 2379 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2380 intel_gpu_freq(rps, caps.rp0_freq)); 2381 drm_printf(p, "Max overclocked frequency: %dMHz\n", 2382 intel_gpu_freq(rps, rps->max_freq)); 2383 2384 drm_printf(p, "Current freq: %d MHz\n", 2385 intel_gpu_freq(rps, rps->cur_freq)); 2386 drm_printf(p, "Actual freq: %d MHz\n", cagf); 2387 drm_printf(p, "Idle freq: %d MHz\n", 2388 intel_gpu_freq(rps, rps->idle_freq)); 2389 drm_printf(p, "Min freq: %d MHz\n", 2390 intel_gpu_freq(rps, rps->min_freq)); 2391 drm_printf(p, "Boost freq: %d MHz\n", 2392 intel_gpu_freq(rps, rps->boost_freq)); 2393 drm_printf(p, "Max freq: %d MHz\n", 2394 intel_gpu_freq(rps, rps->max_freq)); 2395 drm_printf(p, 2396 "efficient (RPe) frequency: %d MHz\n", 2397 intel_gpu_freq(rps, rps->efficient_freq)); 2398 } 2399 2400 static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2401 { 2402 struct intel_gt *gt = rps_to_gt(rps); 2403 struct intel_uncore *uncore = gt->uncore; 2404 struct intel_rps_freq_caps caps; 2405 u32 pm_mask; 2406 2407 gen6_rps_get_freq_caps(rps, &caps); 2408 pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); 2409 2410 drm_printf(p, "PM MASK=0x%08x\n", pm_mask); 2411 drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", 2412 rps->pm_intrmsk_mbz); 2413 drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); 2414 drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); 2415 drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", 2416 intel_gpu_freq(rps, caps.min_freq)); 2417 drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", 2418 intel_gpu_freq(rps, caps.rp1_freq)); 2419 drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", 2420 intel_gpu_freq(rps, caps.rp0_freq)); 2421 drm_printf(p, "Current freq: %d MHz\n", 2422 intel_rps_get_requested_frequency(rps)); 2423 drm_printf(p, "Actual freq: %d MHz\n", 2424 intel_rps_read_actual_frequency(rps)); 2425 drm_printf(p, "Min freq: %d MHz\n", 2426 intel_rps_get_min_frequency(rps)); 2427 drm_printf(p, "Boost freq: %d MHz\n", 2428 intel_rps_get_boost_frequency(rps)); 2429 drm_printf(p, "Max freq: %d MHz\n", 2430 intel_rps_get_max_frequency(rps)); 2431 drm_printf(p, 2432 "efficient (RPe) frequency: %d MHz\n", 2433 intel_gpu_freq(rps, caps.rp1_freq)); 2434 } 2435 2436 void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) 2437 { 2438 if (rps_uses_slpc(rps)) 2439 return slpc_frequency_dump(rps, p); 2440 else 2441 return rps_frequency_dump(rps, p); 2442 } 2443 2444 static int set_max_freq(struct intel_rps *rps, u32 val) 2445 { 2446 struct drm_i915_private *i915 = rps_to_i915(rps); 2447 int ret = 0; 2448 2449 mutex_lock(&rps->lock); 2450 2451 val = intel_freq_opcode(rps, val); 2452 if (val < rps->min_freq || 2453 val > rps->max_freq || 2454 val < rps->min_freq_softlimit) { 2455 ret = -EINVAL; 2456 goto unlock; 2457 } 2458 2459 if (val > rps->rp0_freq) 2460 drm_dbg(&i915->drm, "User requested overclocking to %d\n", 2461 intel_gpu_freq(rps, val)); 2462 2463 rps->max_freq_softlimit = val; 2464 2465 val = clamp_t(int, rps->cur_freq, 2466 rps->min_freq_softlimit, 2467 rps->max_freq_softlimit); 2468 2469 /* 2470 * We still need *_set_rps to process the new max_delay and 2471 * update the interrupt limits and PMINTRMSK even though 2472 * frequency request may be unchanged. 2473 */ 2474 intel_rps_set(rps, val); 2475 2476 unlock: 2477 mutex_unlock(&rps->lock); 2478 2479 return ret; 2480 } 2481 2482 int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) 2483 { 2484 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2485 2486 if (rps_uses_slpc(rps)) 2487 return intel_guc_slpc_set_max_freq(slpc, val); 2488 else 2489 return set_max_freq(rps, val); 2490 } 2491 2492 u32 intel_rps_get_min_frequency(struct intel_rps *rps) 2493 { 2494 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2495 2496 if (rps_uses_slpc(rps)) 2497 return slpc->min_freq_softlimit; 2498 else 2499 return intel_gpu_freq(rps, rps->min_freq_softlimit); 2500 } 2501 2502 /** 2503 * intel_rps_get_min_raw_freq - returns the min frequency in some raw format. 2504 * @rps: the intel_rps structure 2505 * 2506 * Returns the min frequency in a raw format. In newer platforms raw is in 2507 * units of 50 MHz. 2508 */ 2509 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps) 2510 { 2511 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2512 u32 freq; 2513 2514 if (rps_uses_slpc(rps)) { 2515 return DIV_ROUND_CLOSEST(slpc->min_freq, 2516 GT_FREQUENCY_MULTIPLIER); 2517 } else { 2518 freq = rps->min_freq; 2519 if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) { 2520 /* Convert GT frequency to 50 MHz units */ 2521 freq /= GEN9_FREQ_SCALER; 2522 } 2523 return freq; 2524 } 2525 } 2526 2527 static int set_min_freq(struct intel_rps *rps, u32 val) 2528 { 2529 int ret = 0; 2530 2531 mutex_lock(&rps->lock); 2532 2533 val = intel_freq_opcode(rps, val); 2534 if (val < rps->min_freq || 2535 val > rps->max_freq || 2536 val > rps->max_freq_softlimit) { 2537 ret = -EINVAL; 2538 goto unlock; 2539 } 2540 2541 rps->min_freq_softlimit = val; 2542 2543 val = clamp_t(int, rps->cur_freq, 2544 rps->min_freq_softlimit, 2545 rps->max_freq_softlimit); 2546 2547 /* 2548 * We still need *_set_rps to process the new min_delay and 2549 * update the interrupt limits and PMINTRMSK even though 2550 * frequency request may be unchanged. 2551 */ 2552 intel_rps_set(rps, val); 2553 2554 unlock: 2555 mutex_unlock(&rps->lock); 2556 2557 return ret; 2558 } 2559 2560 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) 2561 { 2562 struct intel_guc_slpc *slpc = rps_to_slpc(rps); 2563 2564 if (rps_uses_slpc(rps)) 2565 return intel_guc_slpc_set_min_freq(slpc, val); 2566 else 2567 return set_min_freq(rps, val); 2568 } 2569 2570 static void intel_rps_set_manual(struct intel_rps *rps, bool enable) 2571 { 2572 struct intel_uncore *uncore = rps_to_uncore(rps); 2573 u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE; 2574 2575 /* Allow punit to process software requests */ 2576 intel_uncore_write(uncore, GEN6_RP_CONTROL, state); 2577 } 2578 2579 void intel_rps_raise_unslice(struct intel_rps *rps) 2580 { 2581 struct intel_uncore *uncore = rps_to_uncore(rps); 2582 2583 mutex_lock(&rps->lock); 2584 2585 if (rps_uses_slpc(rps)) { 2586 /* RP limits have not been initialized yet for SLPC path */ 2587 struct intel_rps_freq_caps caps; 2588 2589 gen6_rps_get_freq_caps(rps, &caps); 2590 2591 intel_rps_set_manual(rps, true); 2592 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2593 ((caps.rp0_freq << 2594 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2595 GEN9_IGNORE_SLICE_RATIO)); 2596 intel_rps_set_manual(rps, false); 2597 } else { 2598 intel_rps_set(rps, rps->rp0_freq); 2599 } 2600 2601 mutex_unlock(&rps->lock); 2602 } 2603 2604 void intel_rps_lower_unslice(struct intel_rps *rps) 2605 { 2606 struct intel_uncore *uncore = rps_to_uncore(rps); 2607 2608 mutex_lock(&rps->lock); 2609 2610 if (rps_uses_slpc(rps)) { 2611 /* RP limits have not been initialized yet for SLPC path */ 2612 struct intel_rps_freq_caps caps; 2613 2614 gen6_rps_get_freq_caps(rps, &caps); 2615 2616 intel_rps_set_manual(rps, true); 2617 intel_uncore_write(uncore, GEN6_RPNSWREQ, 2618 ((caps.min_freq << 2619 GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | 2620 GEN9_IGNORE_SLICE_RATIO)); 2621 intel_rps_set_manual(rps, false); 2622 } else { 2623 intel_rps_set(rps, rps->min_freq); 2624 } 2625 2626 mutex_unlock(&rps->lock); 2627 } 2628 2629 static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) 2630 { 2631 struct intel_gt *gt = rps_to_gt(rps); 2632 intel_wakeref_t wakeref; 2633 u32 val; 2634 2635 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 2636 val = intel_uncore_read(gt->uncore, reg32); 2637 2638 return val; 2639 } 2640 2641 bool rps_read_mask_mmio(struct intel_rps *rps, 2642 i915_reg_t reg32, u32 mask) 2643 { 2644 return rps_read_mmio(rps, reg32) & mask; 2645 } 2646 2647 /* External interface for intel_ips.ko */ 2648 2649 static struct drm_i915_private __rcu *ips_mchdev; 2650 2651 /* 2652 * Tells the intel_ips driver that the i915 driver is now loaded, if 2653 * IPS got loaded first. 2654 * 2655 * This awkward dance is so that neither module has to depend on the 2656 * other in order for IPS to do the appropriate communication of 2657 * GPU turbo limits to i915. 2658 */ 2659 static void 2660 ips_ping_for_i915_load(void) 2661 { 2662 void (*link)(void); 2663 2664 link = symbol_get(ips_link_to_i915_driver); 2665 if (link) { 2666 link(); 2667 symbol_put(ips_link_to_i915_driver); 2668 } 2669 } 2670 2671 void intel_rps_driver_register(struct intel_rps *rps) 2672 { 2673 struct intel_gt *gt = rps_to_gt(rps); 2674 2675 /* 2676 * We only register the i915 ips part with intel-ips once everything is 2677 * set up, to avoid intel-ips sneaking in and reading bogus values. 2678 */ 2679 if (GRAPHICS_VER(gt->i915) == 5) { 2680 GEM_BUG_ON(ips_mchdev); 2681 rcu_assign_pointer(ips_mchdev, gt->i915); 2682 ips_ping_for_i915_load(); 2683 } 2684 } 2685 2686 void intel_rps_driver_unregister(struct intel_rps *rps) 2687 { 2688 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) 2689 rcu_assign_pointer(ips_mchdev, NULL); 2690 } 2691 2692 static struct drm_i915_private *mchdev_get(void) 2693 { 2694 struct drm_i915_private *i915; 2695 2696 rcu_read_lock(); 2697 i915 = rcu_dereference(ips_mchdev); 2698 if (i915 && !kref_get_unless_zero(&i915->drm.ref)) 2699 i915 = NULL; 2700 rcu_read_unlock(); 2701 2702 return i915; 2703 } 2704 2705 /** 2706 * i915_read_mch_val - return value for IPS use 2707 * 2708 * Calculate and return a value for the IPS driver to use when deciding whether 2709 * we have thermal and power headroom to increase CPU or GPU power budget. 2710 */ 2711 unsigned long i915_read_mch_val(void) 2712 { 2713 struct drm_i915_private *i915; 2714 unsigned long chipset_val = 0; 2715 unsigned long graphics_val = 0; 2716 intel_wakeref_t wakeref; 2717 2718 i915 = mchdev_get(); 2719 if (!i915) 2720 return 0; 2721 2722 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 2723 struct intel_ips *ips = &to_gt(i915)->rps.ips; 2724 2725 spin_lock_irq(&mchdev_lock); 2726 chipset_val = __ips_chipset_val(ips); 2727 graphics_val = __ips_gfx_val(ips); 2728 spin_unlock_irq(&mchdev_lock); 2729 } 2730 2731 drm_dev_put(&i915->drm); 2732 return chipset_val + graphics_val; 2733 } 2734 EXPORT_SYMBOL_GPL(i915_read_mch_val); 2735 2736 /** 2737 * i915_gpu_raise - raise GPU frequency limit 2738 * 2739 * Raise the limit; IPS indicates we have thermal headroom. 2740 */ 2741 bool i915_gpu_raise(void) 2742 { 2743 struct drm_i915_private *i915; 2744 struct intel_rps *rps; 2745 2746 i915 = mchdev_get(); 2747 if (!i915) 2748 return false; 2749 2750 rps = &to_gt(i915)->rps; 2751 2752 spin_lock_irq(&mchdev_lock); 2753 if (rps->max_freq_softlimit < rps->max_freq) 2754 rps->max_freq_softlimit++; 2755 spin_unlock_irq(&mchdev_lock); 2756 2757 drm_dev_put(&i915->drm); 2758 return true; 2759 } 2760 EXPORT_SYMBOL_GPL(i915_gpu_raise); 2761 2762 /** 2763 * i915_gpu_lower - lower GPU frequency limit 2764 * 2765 * IPS indicates we're close to a thermal limit, so throttle back the GPU 2766 * frequency maximum. 2767 */ 2768 bool i915_gpu_lower(void) 2769 { 2770 struct drm_i915_private *i915; 2771 struct intel_rps *rps; 2772 2773 i915 = mchdev_get(); 2774 if (!i915) 2775 return false; 2776 2777 rps = &to_gt(i915)->rps; 2778 2779 spin_lock_irq(&mchdev_lock); 2780 if (rps->max_freq_softlimit > rps->min_freq) 2781 rps->max_freq_softlimit--; 2782 spin_unlock_irq(&mchdev_lock); 2783 2784 drm_dev_put(&i915->drm); 2785 return true; 2786 } 2787 EXPORT_SYMBOL_GPL(i915_gpu_lower); 2788 2789 /** 2790 * i915_gpu_busy - indicate GPU business to IPS 2791 * 2792 * Tell the IPS driver whether or not the GPU is busy. 2793 */ 2794 bool i915_gpu_busy(void) 2795 { 2796 struct drm_i915_private *i915; 2797 bool ret; 2798 2799 i915 = mchdev_get(); 2800 if (!i915) 2801 return false; 2802 2803 ret = to_gt(i915)->awake; 2804 2805 drm_dev_put(&i915->drm); 2806 return ret; 2807 } 2808 EXPORT_SYMBOL_GPL(i915_gpu_busy); 2809 2810 /** 2811 * i915_gpu_turbo_disable - disable graphics turbo 2812 * 2813 * Disable graphics turbo by resetting the max frequency and setting the 2814 * current frequency to the default. 2815 */ 2816 bool i915_gpu_turbo_disable(void) 2817 { 2818 struct drm_i915_private *i915; 2819 struct intel_rps *rps; 2820 bool ret; 2821 2822 i915 = mchdev_get(); 2823 if (!i915) 2824 return false; 2825 2826 rps = &to_gt(i915)->rps; 2827 2828 spin_lock_irq(&mchdev_lock); 2829 rps->max_freq_softlimit = rps->min_freq; 2830 ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq); 2831 spin_unlock_irq(&mchdev_lock); 2832 2833 drm_dev_put(&i915->drm); 2834 return ret; 2835 } 2836 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 2837 2838 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2839 #include "selftest_rps.c" 2840 #include "selftest_slpc.c" 2841 #endif 2842