1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include <linux/irq.h> 8 #include <linux/pm_runtime.h> 9 10 #include "gt/intel_engine.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_engine_user.h" 13 #include "gt/intel_gt_pm.h" 14 15 #include "i915_drv.h" 16 #include "i915_pmu.h" 17 #include "intel_pm.h" 18 19 /* Frequency for the sampling timer for events which need it. */ 20 #define FREQUENCY 200 21 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 22 23 #define ENGINE_SAMPLE_MASK \ 24 (BIT(I915_SAMPLE_BUSY) | \ 25 BIT(I915_SAMPLE_WAIT) | \ 26 BIT(I915_SAMPLE_SEMA)) 27 28 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) 29 30 static cpumask_t i915_pmu_cpumask; 31 32 static u8 engine_config_sample(u64 config) 33 { 34 return config & I915_PMU_SAMPLE_MASK; 35 } 36 37 static u8 engine_event_sample(struct perf_event *event) 38 { 39 return engine_config_sample(event->attr.config); 40 } 41 42 static u8 engine_event_class(struct perf_event *event) 43 { 44 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 45 } 46 47 static u8 engine_event_instance(struct perf_event *event) 48 { 49 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 50 } 51 52 static bool is_engine_config(u64 config) 53 { 54 return config < __I915_PMU_OTHER(0); 55 } 56 57 static unsigned int config_enabled_bit(u64 config) 58 { 59 if (is_engine_config(config)) 60 return engine_config_sample(config); 61 else 62 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); 63 } 64 65 static u64 config_enabled_mask(u64 config) 66 { 67 return BIT_ULL(config_enabled_bit(config)); 68 } 69 70 static bool is_engine_event(struct perf_event *event) 71 { 72 return is_engine_config(event->attr.config); 73 } 74 75 static unsigned int event_enabled_bit(struct perf_event *event) 76 { 77 return config_enabled_bit(event->attr.config); 78 } 79 80 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) 81 { 82 struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); 83 u64 enable; 84 85 /* 86 * Only some counters need the sampling timer. 87 * 88 * We start with a bitmask of all currently enabled events. 89 */ 90 enable = pmu->enable; 91 92 /* 93 * Mask out all the ones which do not need the timer, or in 94 * other words keep all the ones that could need the timer. 95 */ 96 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | 97 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | 98 ENGINE_SAMPLE_MASK; 99 100 /* 101 * When the GPU is idle per-engine counters do not need to be 102 * running so clear those bits out. 103 */ 104 if (!gpu_active) 105 enable &= ~ENGINE_SAMPLE_MASK; 106 /* 107 * Also there is software busyness tracking available we do not 108 * need the timer for I915_SAMPLE_BUSY counter. 109 */ 110 else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) 111 enable &= ~BIT(I915_SAMPLE_BUSY); 112 113 /* 114 * If some bits remain it means we need the sampling timer running. 115 */ 116 return enable; 117 } 118 119 void i915_pmu_gt_parked(struct drm_i915_private *i915) 120 { 121 struct i915_pmu *pmu = &i915->pmu; 122 123 if (!pmu->base.event_init) 124 return; 125 126 spin_lock_irq(&pmu->lock); 127 /* 128 * Signal sampling timer to stop if only engine events are enabled and 129 * GPU went idle. 130 */ 131 pmu->timer_enabled = pmu_needs_timer(pmu, false); 132 spin_unlock_irq(&pmu->lock); 133 } 134 135 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) 136 { 137 if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { 138 pmu->timer_enabled = true; 139 pmu->timer_last = ktime_get(); 140 hrtimer_start_range_ns(&pmu->timer, 141 ns_to_ktime(PERIOD), 0, 142 HRTIMER_MODE_REL_PINNED); 143 } 144 } 145 146 void i915_pmu_gt_unparked(struct drm_i915_private *i915) 147 { 148 struct i915_pmu *pmu = &i915->pmu; 149 150 if (!pmu->base.event_init) 151 return; 152 153 spin_lock_irq(&pmu->lock); 154 /* 155 * Re-enable sampling timer when GPU goes active. 156 */ 157 __i915_pmu_maybe_start_timer(pmu); 158 spin_unlock_irq(&pmu->lock); 159 } 160 161 static void 162 add_sample(struct i915_pmu_sample *sample, u32 val) 163 { 164 sample->cur += val; 165 } 166 167 static void 168 engines_sample(struct intel_gt *gt, unsigned int period_ns) 169 { 170 struct drm_i915_private *i915 = gt->i915; 171 struct intel_engine_cs *engine; 172 enum intel_engine_id id; 173 174 if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 175 return; 176 177 for_each_engine(engine, i915, id) { 178 struct intel_engine_pmu *pmu = &engine->pmu; 179 unsigned long flags; 180 bool busy; 181 u32 val; 182 183 if (!intel_engine_pm_get_if_awake(engine)) 184 continue; 185 186 spin_lock_irqsave(&engine->uncore->lock, flags); 187 188 val = ENGINE_READ_FW(engine, RING_CTL); 189 if (val == 0) /* powerwell off => engine idle */ 190 goto skip; 191 192 if (val & RING_WAIT) 193 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); 194 if (val & RING_WAIT_SEMAPHORE) 195 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); 196 197 /* 198 * While waiting on a semaphore or event, MI_MODE reports the 199 * ring as idle. However, previously using the seqno, and with 200 * execlists sampling, we account for the ring waiting as the 201 * engine being busy. Therefore, we record the sample as being 202 * busy if either waiting or !idle. 203 */ 204 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); 205 if (!busy) { 206 val = ENGINE_READ_FW(engine, RING_MI_MODE); 207 busy = !(val & MODE_IDLE); 208 } 209 if (busy) 210 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); 211 212 skip: 213 spin_unlock_irqrestore(&engine->uncore->lock, flags); 214 intel_engine_pm_put(engine); 215 } 216 } 217 218 static void 219 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) 220 { 221 sample->cur += mul_u32_u32(val, mul); 222 } 223 224 static void 225 frequency_sample(struct intel_gt *gt, unsigned int period_ns) 226 { 227 struct drm_i915_private *i915 = gt->i915; 228 struct intel_uncore *uncore = gt->uncore; 229 struct i915_pmu *pmu = &i915->pmu; 230 231 if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { 232 u32 val; 233 234 val = i915->gt_pm.rps.cur_freq; 235 if (intel_gt_pm_get_if_awake(gt)) { 236 val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); 237 val = intel_get_cagf(i915, val); 238 intel_gt_pm_put(gt); 239 } 240 241 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], 242 intel_gpu_freq(i915, val), 243 period_ns / 1000); 244 } 245 246 if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { 247 add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], 248 intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq), 249 period_ns / 1000); 250 } 251 } 252 253 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 254 { 255 struct drm_i915_private *i915 = 256 container_of(hrtimer, struct drm_i915_private, pmu.timer); 257 struct i915_pmu *pmu = &i915->pmu; 258 struct intel_gt *gt = &i915->gt; 259 unsigned int period_ns; 260 ktime_t now; 261 262 if (!READ_ONCE(pmu->timer_enabled)) 263 return HRTIMER_NORESTART; 264 265 now = ktime_get(); 266 period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); 267 pmu->timer_last = now; 268 269 /* 270 * Strictly speaking the passed in period may not be 100% accurate for 271 * all internal calculation, since some amount of time can be spent on 272 * grabbing the forcewake. However the potential error from timer call- 273 * back delay greatly dominates this so we keep it simple. 274 */ 275 engines_sample(gt, period_ns); 276 frequency_sample(gt, period_ns); 277 278 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); 279 280 return HRTIMER_RESTART; 281 } 282 283 static u64 count_interrupts(struct drm_i915_private *i915) 284 { 285 /* open-coded kstat_irqs() */ 286 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 287 u64 sum = 0; 288 int cpu; 289 290 if (!desc || !desc->kstat_irqs) 291 return 0; 292 293 for_each_possible_cpu(cpu) 294 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 295 296 return sum; 297 } 298 299 static void engine_event_destroy(struct perf_event *event) 300 { 301 struct drm_i915_private *i915 = 302 container_of(event->pmu, typeof(*i915), pmu.base); 303 struct intel_engine_cs *engine; 304 305 engine = intel_engine_lookup_user(i915, 306 engine_event_class(event), 307 engine_event_instance(event)); 308 if (WARN_ON_ONCE(!engine)) 309 return; 310 311 if (engine_event_sample(event) == I915_SAMPLE_BUSY && 312 intel_engine_supports_stats(engine)) 313 intel_disable_engine_stats(engine); 314 } 315 316 static void i915_pmu_event_destroy(struct perf_event *event) 317 { 318 WARN_ON(event->parent); 319 320 if (is_engine_event(event)) 321 engine_event_destroy(event); 322 } 323 324 static int 325 engine_event_status(struct intel_engine_cs *engine, 326 enum drm_i915_pmu_engine_sample sample) 327 { 328 switch (sample) { 329 case I915_SAMPLE_BUSY: 330 case I915_SAMPLE_WAIT: 331 break; 332 case I915_SAMPLE_SEMA: 333 if (INTEL_GEN(engine->i915) < 6) 334 return -ENODEV; 335 break; 336 default: 337 return -ENOENT; 338 } 339 340 return 0; 341 } 342 343 static int 344 config_status(struct drm_i915_private *i915, u64 config) 345 { 346 switch (config) { 347 case I915_PMU_ACTUAL_FREQUENCY: 348 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 349 /* Requires a mutex for sampling! */ 350 return -ENODEV; 351 /* Fall-through. */ 352 case I915_PMU_REQUESTED_FREQUENCY: 353 if (INTEL_GEN(i915) < 6) 354 return -ENODEV; 355 break; 356 case I915_PMU_INTERRUPTS: 357 break; 358 case I915_PMU_RC6_RESIDENCY: 359 if (!HAS_RC6(i915)) 360 return -ENODEV; 361 break; 362 default: 363 return -ENOENT; 364 } 365 366 return 0; 367 } 368 369 static int engine_event_init(struct perf_event *event) 370 { 371 struct drm_i915_private *i915 = 372 container_of(event->pmu, typeof(*i915), pmu.base); 373 struct intel_engine_cs *engine; 374 u8 sample; 375 int ret; 376 377 engine = intel_engine_lookup_user(i915, engine_event_class(event), 378 engine_event_instance(event)); 379 if (!engine) 380 return -ENODEV; 381 382 sample = engine_event_sample(event); 383 ret = engine_event_status(engine, sample); 384 if (ret) 385 return ret; 386 387 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) 388 ret = intel_enable_engine_stats(engine); 389 390 return ret; 391 } 392 393 static int i915_pmu_event_init(struct perf_event *event) 394 { 395 struct drm_i915_private *i915 = 396 container_of(event->pmu, typeof(*i915), pmu.base); 397 int ret; 398 399 if (event->attr.type != event->pmu->type) 400 return -ENOENT; 401 402 /* unsupported modes and filters */ 403 if (event->attr.sample_period) /* no sampling */ 404 return -EINVAL; 405 406 if (has_branch_stack(event)) 407 return -EOPNOTSUPP; 408 409 if (event->cpu < 0) 410 return -EINVAL; 411 412 /* only allow running on one cpu at a time */ 413 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) 414 return -EINVAL; 415 416 if (is_engine_event(event)) 417 ret = engine_event_init(event); 418 else 419 ret = config_status(i915, event->attr.config); 420 if (ret) 421 return ret; 422 423 if (!event->parent) 424 event->destroy = i915_pmu_event_destroy; 425 426 return 0; 427 } 428 429 static u64 __get_rc6(struct intel_gt *gt) 430 { 431 struct drm_i915_private *i915 = gt->i915; 432 u64 val; 433 434 val = intel_rc6_residency_ns(i915, 435 IS_VALLEYVIEW(i915) ? 436 VLV_GT_RENDER_RC6 : 437 GEN6_GT_GFX_RC6); 438 439 if (HAS_RC6p(i915)) 440 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); 441 442 if (HAS_RC6pp(i915)) 443 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); 444 445 return val; 446 } 447 448 static u64 get_rc6(struct intel_gt *gt) 449 { 450 #if IS_ENABLED(CONFIG_PM) 451 struct drm_i915_private *i915 = gt->i915; 452 struct intel_runtime_pm *rpm = &i915->runtime_pm; 453 struct i915_pmu *pmu = &i915->pmu; 454 intel_wakeref_t wakeref; 455 unsigned long flags; 456 u64 val; 457 458 wakeref = intel_runtime_pm_get_if_in_use(rpm); 459 if (wakeref) { 460 val = __get_rc6(gt); 461 intel_runtime_pm_put(rpm, wakeref); 462 463 /* 464 * If we are coming back from being runtime suspended we must 465 * be careful not to report a larger value than returned 466 * previously. 467 */ 468 469 spin_lock_irqsave(&pmu->lock, flags); 470 471 if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 472 pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; 473 pmu->sample[__I915_SAMPLE_RC6].cur = val; 474 } else { 475 val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 476 } 477 478 spin_unlock_irqrestore(&pmu->lock, flags); 479 } else { 480 struct device *kdev = rpm->kdev; 481 482 /* 483 * We are runtime suspended. 484 * 485 * Report the delta from when the device was suspended to now, 486 * on top of the last known real value, as the approximated RC6 487 * counter value. 488 */ 489 spin_lock_irqsave(&pmu->lock, flags); 490 491 /* 492 * After the above branch intel_runtime_pm_get_if_in_use failed 493 * to get the runtime PM reference we cannot assume we are in 494 * runtime suspend since we can either: a) race with coming out 495 * of it before we took the power.lock, or b) there are other 496 * states than suspended which can bring us here. 497 * 498 * We need to double-check that we are indeed currently runtime 499 * suspended and if not we cannot do better than report the last 500 * known RC6 value. 501 */ 502 if (pm_runtime_status_suspended(kdev)) { 503 val = pm_runtime_suspended_time(kdev); 504 505 if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) 506 pmu->suspended_time_last = val; 507 508 val -= pmu->suspended_time_last; 509 val += pmu->sample[__I915_SAMPLE_RC6].cur; 510 511 pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; 512 } else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 513 val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 514 } else { 515 val = pmu->sample[__I915_SAMPLE_RC6].cur; 516 } 517 518 spin_unlock_irqrestore(&pmu->lock, flags); 519 } 520 521 return val; 522 #else 523 return __get_rc6(gt); 524 #endif 525 } 526 527 static u64 __i915_pmu_event_read(struct perf_event *event) 528 { 529 struct drm_i915_private *i915 = 530 container_of(event->pmu, typeof(*i915), pmu.base); 531 struct i915_pmu *pmu = &i915->pmu; 532 u64 val = 0; 533 534 if (is_engine_event(event)) { 535 u8 sample = engine_event_sample(event); 536 struct intel_engine_cs *engine; 537 538 engine = intel_engine_lookup_user(i915, 539 engine_event_class(event), 540 engine_event_instance(event)); 541 542 if (WARN_ON_ONCE(!engine)) { 543 /* Do nothing */ 544 } else if (sample == I915_SAMPLE_BUSY && 545 intel_engine_supports_stats(engine)) { 546 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 547 } else { 548 val = engine->pmu.sample[sample].cur; 549 } 550 } else { 551 switch (event->attr.config) { 552 case I915_PMU_ACTUAL_FREQUENCY: 553 val = 554 div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur, 555 USEC_PER_SEC /* to MHz */); 556 break; 557 case I915_PMU_REQUESTED_FREQUENCY: 558 val = 559 div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur, 560 USEC_PER_SEC /* to MHz */); 561 break; 562 case I915_PMU_INTERRUPTS: 563 val = count_interrupts(i915); 564 break; 565 case I915_PMU_RC6_RESIDENCY: 566 val = get_rc6(&i915->gt); 567 break; 568 } 569 } 570 571 return val; 572 } 573 574 static void i915_pmu_event_read(struct perf_event *event) 575 { 576 struct hw_perf_event *hwc = &event->hw; 577 u64 prev, new; 578 579 again: 580 prev = local64_read(&hwc->prev_count); 581 new = __i915_pmu_event_read(event); 582 583 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 584 goto again; 585 586 local64_add(new - prev, &event->count); 587 } 588 589 static void i915_pmu_enable(struct perf_event *event) 590 { 591 struct drm_i915_private *i915 = 592 container_of(event->pmu, typeof(*i915), pmu.base); 593 unsigned int bit = event_enabled_bit(event); 594 struct i915_pmu *pmu = &i915->pmu; 595 unsigned long flags; 596 597 spin_lock_irqsave(&pmu->lock, flags); 598 599 /* 600 * Update the bitmask of enabled events and increment 601 * the event reference counter. 602 */ 603 BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); 604 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); 605 GEM_BUG_ON(pmu->enable_count[bit] == ~0); 606 pmu->enable |= BIT_ULL(bit); 607 pmu->enable_count[bit]++; 608 609 /* 610 * Start the sampling timer if needed and not already enabled. 611 */ 612 __i915_pmu_maybe_start_timer(pmu); 613 614 /* 615 * For per-engine events the bitmask and reference counting 616 * is stored per engine. 617 */ 618 if (is_engine_event(event)) { 619 u8 sample = engine_event_sample(event); 620 struct intel_engine_cs *engine; 621 622 engine = intel_engine_lookup_user(i915, 623 engine_event_class(event), 624 engine_event_instance(event)); 625 626 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != 627 I915_ENGINE_SAMPLE_COUNT); 628 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != 629 I915_ENGINE_SAMPLE_COUNT); 630 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); 631 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); 632 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 633 634 engine->pmu.enable |= BIT(sample); 635 engine->pmu.enable_count[sample]++; 636 } 637 638 spin_unlock_irqrestore(&pmu->lock, flags); 639 640 /* 641 * Store the current counter value so we can report the correct delta 642 * for all listeners. Even when the event was already enabled and has 643 * an existing non-zero value. 644 */ 645 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 646 } 647 648 static void i915_pmu_disable(struct perf_event *event) 649 { 650 struct drm_i915_private *i915 = 651 container_of(event->pmu, typeof(*i915), pmu.base); 652 unsigned int bit = event_enabled_bit(event); 653 struct i915_pmu *pmu = &i915->pmu; 654 unsigned long flags; 655 656 spin_lock_irqsave(&pmu->lock, flags); 657 658 if (is_engine_event(event)) { 659 u8 sample = engine_event_sample(event); 660 struct intel_engine_cs *engine; 661 662 engine = intel_engine_lookup_user(i915, 663 engine_event_class(event), 664 engine_event_instance(event)); 665 666 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); 667 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); 668 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 669 670 /* 671 * Decrement the reference count and clear the enabled 672 * bitmask when the last listener on an event goes away. 673 */ 674 if (--engine->pmu.enable_count[sample] == 0) 675 engine->pmu.enable &= ~BIT(sample); 676 } 677 678 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); 679 GEM_BUG_ON(pmu->enable_count[bit] == 0); 680 /* 681 * Decrement the reference count and clear the enabled 682 * bitmask when the last listener on an event goes away. 683 */ 684 if (--pmu->enable_count[bit] == 0) { 685 pmu->enable &= ~BIT_ULL(bit); 686 pmu->timer_enabled &= pmu_needs_timer(pmu, true); 687 } 688 689 spin_unlock_irqrestore(&pmu->lock, flags); 690 } 691 692 static void i915_pmu_event_start(struct perf_event *event, int flags) 693 { 694 i915_pmu_enable(event); 695 event->hw.state = 0; 696 } 697 698 static void i915_pmu_event_stop(struct perf_event *event, int flags) 699 { 700 if (flags & PERF_EF_UPDATE) 701 i915_pmu_event_read(event); 702 i915_pmu_disable(event); 703 event->hw.state = PERF_HES_STOPPED; 704 } 705 706 static int i915_pmu_event_add(struct perf_event *event, int flags) 707 { 708 if (flags & PERF_EF_START) 709 i915_pmu_event_start(event, flags); 710 711 return 0; 712 } 713 714 static void i915_pmu_event_del(struct perf_event *event, int flags) 715 { 716 i915_pmu_event_stop(event, PERF_EF_UPDATE); 717 } 718 719 static int i915_pmu_event_event_idx(struct perf_event *event) 720 { 721 return 0; 722 } 723 724 struct i915_str_attribute { 725 struct device_attribute attr; 726 const char *str; 727 }; 728 729 static ssize_t i915_pmu_format_show(struct device *dev, 730 struct device_attribute *attr, char *buf) 731 { 732 struct i915_str_attribute *eattr; 733 734 eattr = container_of(attr, struct i915_str_attribute, attr); 735 return sprintf(buf, "%s\n", eattr->str); 736 } 737 738 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 739 (&((struct i915_str_attribute[]) { \ 740 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 741 .str = _config, } \ 742 })[0].attr.attr) 743 744 static struct attribute *i915_pmu_format_attrs[] = { 745 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 746 NULL, 747 }; 748 749 static const struct attribute_group i915_pmu_format_attr_group = { 750 .name = "format", 751 .attrs = i915_pmu_format_attrs, 752 }; 753 754 struct i915_ext_attribute { 755 struct device_attribute attr; 756 unsigned long val; 757 }; 758 759 static ssize_t i915_pmu_event_show(struct device *dev, 760 struct device_attribute *attr, char *buf) 761 { 762 struct i915_ext_attribute *eattr; 763 764 eattr = container_of(attr, struct i915_ext_attribute, attr); 765 return sprintf(buf, "config=0x%lx\n", eattr->val); 766 } 767 768 static struct attribute_group i915_pmu_events_attr_group = { 769 .name = "events", 770 /* Patch in attrs at runtime. */ 771 }; 772 773 static ssize_t 774 i915_pmu_get_attr_cpumask(struct device *dev, 775 struct device_attribute *attr, 776 char *buf) 777 { 778 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); 779 } 780 781 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); 782 783 static struct attribute *i915_cpumask_attrs[] = { 784 &dev_attr_cpumask.attr, 785 NULL, 786 }; 787 788 static const struct attribute_group i915_pmu_cpumask_attr_group = { 789 .attrs = i915_cpumask_attrs, 790 }; 791 792 static const struct attribute_group *i915_pmu_attr_groups[] = { 793 &i915_pmu_format_attr_group, 794 &i915_pmu_events_attr_group, 795 &i915_pmu_cpumask_attr_group, 796 NULL 797 }; 798 799 #define __event(__config, __name, __unit) \ 800 { \ 801 .config = (__config), \ 802 .name = (__name), \ 803 .unit = (__unit), \ 804 } 805 806 #define __engine_event(__sample, __name) \ 807 { \ 808 .sample = (__sample), \ 809 .name = (__name), \ 810 } 811 812 static struct i915_ext_attribute * 813 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) 814 { 815 sysfs_attr_init(&attr->attr.attr); 816 attr->attr.attr.name = name; 817 attr->attr.attr.mode = 0444; 818 attr->attr.show = i915_pmu_event_show; 819 attr->val = config; 820 821 return ++attr; 822 } 823 824 static struct perf_pmu_events_attr * 825 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 826 const char *str) 827 { 828 sysfs_attr_init(&attr->attr.attr); 829 attr->attr.attr.name = name; 830 attr->attr.attr.mode = 0444; 831 attr->attr.show = perf_event_sysfs_show; 832 attr->event_str = str; 833 834 return ++attr; 835 } 836 837 static struct attribute ** 838 create_event_attributes(struct i915_pmu *pmu) 839 { 840 struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); 841 static const struct { 842 u64 config; 843 const char *name; 844 const char *unit; 845 } events[] = { 846 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), 847 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), 848 __event(I915_PMU_INTERRUPTS, "interrupts", NULL), 849 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), 850 }; 851 static const struct { 852 enum drm_i915_pmu_engine_sample sample; 853 char *name; 854 } engine_events[] = { 855 __engine_event(I915_SAMPLE_BUSY, "busy"), 856 __engine_event(I915_SAMPLE_SEMA, "sema"), 857 __engine_event(I915_SAMPLE_WAIT, "wait"), 858 }; 859 unsigned int count = 0; 860 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 861 struct i915_ext_attribute *i915_attr = NULL, *i915_iter; 862 struct attribute **attr = NULL, **attr_iter; 863 struct intel_engine_cs *engine; 864 unsigned int i; 865 866 /* Count how many counters we will be exposing. */ 867 for (i = 0; i < ARRAY_SIZE(events); i++) { 868 if (!config_status(i915, events[i].config)) 869 count++; 870 } 871 872 for_each_uabi_engine(engine, i915) { 873 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 874 if (!engine_event_status(engine, 875 engine_events[i].sample)) 876 count++; 877 } 878 } 879 880 /* Allocate attribute objects and table. */ 881 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); 882 if (!i915_attr) 883 goto err_alloc; 884 885 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 886 if (!pmu_attr) 887 goto err_alloc; 888 889 /* Max one pointer of each attribute type plus a termination entry. */ 890 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 891 if (!attr) 892 goto err_alloc; 893 894 i915_iter = i915_attr; 895 pmu_iter = pmu_attr; 896 attr_iter = attr; 897 898 /* Initialize supported non-engine counters. */ 899 for (i = 0; i < ARRAY_SIZE(events); i++) { 900 char *str; 901 902 if (config_status(i915, events[i].config)) 903 continue; 904 905 str = kstrdup(events[i].name, GFP_KERNEL); 906 if (!str) 907 goto err; 908 909 *attr_iter++ = &i915_iter->attr.attr; 910 i915_iter = add_i915_attr(i915_iter, str, events[i].config); 911 912 if (events[i].unit) { 913 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); 914 if (!str) 915 goto err; 916 917 *attr_iter++ = &pmu_iter->attr.attr; 918 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); 919 } 920 } 921 922 /* Initialize supported engine counters. */ 923 for_each_uabi_engine(engine, i915) { 924 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 925 char *str; 926 927 if (engine_event_status(engine, 928 engine_events[i].sample)) 929 continue; 930 931 str = kasprintf(GFP_KERNEL, "%s-%s", 932 engine->name, engine_events[i].name); 933 if (!str) 934 goto err; 935 936 *attr_iter++ = &i915_iter->attr.attr; 937 i915_iter = 938 add_i915_attr(i915_iter, str, 939 __I915_PMU_ENGINE(engine->uabi_class, 940 engine->uabi_instance, 941 engine_events[i].sample)); 942 943 str = kasprintf(GFP_KERNEL, "%s-%s.unit", 944 engine->name, engine_events[i].name); 945 if (!str) 946 goto err; 947 948 *attr_iter++ = &pmu_iter->attr.attr; 949 pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); 950 } 951 } 952 953 pmu->i915_attr = i915_attr; 954 pmu->pmu_attr = pmu_attr; 955 956 return attr; 957 958 err:; 959 for (attr_iter = attr; *attr_iter; attr_iter++) 960 kfree((*attr_iter)->name); 961 962 err_alloc: 963 kfree(attr); 964 kfree(i915_attr); 965 kfree(pmu_attr); 966 967 return NULL; 968 } 969 970 static void free_event_attributes(struct i915_pmu *pmu) 971 { 972 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; 973 974 for (; *attr_iter; attr_iter++) 975 kfree((*attr_iter)->name); 976 977 kfree(i915_pmu_events_attr_group.attrs); 978 kfree(pmu->i915_attr); 979 kfree(pmu->pmu_attr); 980 981 i915_pmu_events_attr_group.attrs = NULL; 982 pmu->i915_attr = NULL; 983 pmu->pmu_attr = NULL; 984 } 985 986 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 987 { 988 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 989 990 GEM_BUG_ON(!pmu->base.event_init); 991 992 /* Select the first online CPU as a designated reader. */ 993 if (!cpumask_weight(&i915_pmu_cpumask)) 994 cpumask_set_cpu(cpu, &i915_pmu_cpumask); 995 996 return 0; 997 } 998 999 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 1000 { 1001 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 1002 unsigned int target; 1003 1004 GEM_BUG_ON(!pmu->base.event_init); 1005 1006 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { 1007 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 1008 /* Migrate events if there is a valid target */ 1009 if (target < nr_cpu_ids) { 1010 cpumask_set_cpu(target, &i915_pmu_cpumask); 1011 perf_pmu_migrate_context(&pmu->base, cpu, target); 1012 } 1013 } 1014 1015 return 0; 1016 } 1017 1018 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 1019 1020 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) 1021 { 1022 enum cpuhp_state slot; 1023 int ret; 1024 1025 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 1026 "perf/x86/intel/i915:online", 1027 i915_pmu_cpu_online, 1028 i915_pmu_cpu_offline); 1029 if (ret < 0) 1030 return ret; 1031 1032 slot = ret; 1033 ret = cpuhp_state_add_instance(slot, &pmu->node); 1034 if (ret) { 1035 cpuhp_remove_multi_state(slot); 1036 return ret; 1037 } 1038 1039 cpuhp_slot = slot; 1040 return 0; 1041 } 1042 1043 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) 1044 { 1045 WARN_ON(cpuhp_slot == CPUHP_INVALID); 1046 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node)); 1047 cpuhp_remove_multi_state(cpuhp_slot); 1048 } 1049 1050 void i915_pmu_register(struct drm_i915_private *i915) 1051 { 1052 struct i915_pmu *pmu = &i915->pmu; 1053 int ret; 1054 1055 if (INTEL_GEN(i915) <= 2) { 1056 dev_info(i915->drm.dev, "PMU not supported for this GPU."); 1057 return; 1058 } 1059 1060 i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); 1061 if (!i915_pmu_events_attr_group.attrs) { 1062 ret = -ENOMEM; 1063 goto err; 1064 } 1065 1066 pmu->base.attr_groups = i915_pmu_attr_groups; 1067 pmu->base.task_ctx_nr = perf_invalid_context; 1068 pmu->base.event_init = i915_pmu_event_init; 1069 pmu->base.add = i915_pmu_event_add; 1070 pmu->base.del = i915_pmu_event_del; 1071 pmu->base.start = i915_pmu_event_start; 1072 pmu->base.stop = i915_pmu_event_stop; 1073 pmu->base.read = i915_pmu_event_read; 1074 pmu->base.event_idx = i915_pmu_event_event_idx; 1075 1076 spin_lock_init(&pmu->lock); 1077 hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1078 pmu->timer.function = i915_sample; 1079 1080 ret = perf_pmu_register(&pmu->base, "i915", -1); 1081 if (ret) 1082 goto err; 1083 1084 ret = i915_pmu_register_cpuhp_state(pmu); 1085 if (ret) 1086 goto err_unreg; 1087 1088 return; 1089 1090 err_unreg: 1091 perf_pmu_unregister(&pmu->base); 1092 err: 1093 pmu->base.event_init = NULL; 1094 free_event_attributes(pmu); 1095 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); 1096 } 1097 1098 void i915_pmu_unregister(struct drm_i915_private *i915) 1099 { 1100 struct i915_pmu *pmu = &i915->pmu; 1101 1102 if (!pmu->base.event_init) 1103 return; 1104 1105 WARN_ON(pmu->enable); 1106 1107 hrtimer_cancel(&pmu->timer); 1108 1109 i915_pmu_unregister_cpuhp_state(pmu); 1110 1111 perf_pmu_unregister(&pmu->base); 1112 pmu->base.event_init = NULL; 1113 free_event_attributes(pmu); 1114 } 1115