1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include "i915_pmu.h" 8 #include "intel_ringbuffer.h" 9 #include "i915_drv.h" 10 11 /* Frequency for the sampling timer for events which need it. */ 12 #define FREQUENCY 200 13 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 14 15 #define ENGINE_SAMPLE_MASK \ 16 (BIT(I915_SAMPLE_BUSY) | \ 17 BIT(I915_SAMPLE_WAIT) | \ 18 BIT(I915_SAMPLE_SEMA)) 19 20 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) 21 22 static cpumask_t i915_pmu_cpumask; 23 24 static u8 engine_config_sample(u64 config) 25 { 26 return config & I915_PMU_SAMPLE_MASK; 27 } 28 29 static u8 engine_event_sample(struct perf_event *event) 30 { 31 return engine_config_sample(event->attr.config); 32 } 33 34 static u8 engine_event_class(struct perf_event *event) 35 { 36 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 37 } 38 39 static u8 engine_event_instance(struct perf_event *event) 40 { 41 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 42 } 43 44 static bool is_engine_config(u64 config) 45 { 46 return config < __I915_PMU_OTHER(0); 47 } 48 49 static unsigned int config_enabled_bit(u64 config) 50 { 51 if (is_engine_config(config)) 52 return engine_config_sample(config); 53 else 54 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); 55 } 56 57 static u64 config_enabled_mask(u64 config) 58 { 59 return BIT_ULL(config_enabled_bit(config)); 60 } 61 62 static bool is_engine_event(struct perf_event *event) 63 { 64 return is_engine_config(event->attr.config); 65 } 66 67 static unsigned int event_enabled_bit(struct perf_event *event) 68 { 69 return config_enabled_bit(event->attr.config); 70 } 71 72 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) 73 { 74 u64 enable; 75 76 /* 77 * Only some counters need the sampling timer. 78 * 79 * We start with a bitmask of all currently enabled events. 80 */ 81 enable = i915->pmu.enable; 82 83 /* 84 * Mask out all the ones which do not need the timer, or in 85 * other words keep all the ones that could need the timer. 86 */ 87 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | 88 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | 89 ENGINE_SAMPLE_MASK; 90 91 /* 92 * When the GPU is idle per-engine counters do not need to be 93 * running so clear those bits out. 94 */ 95 if (!gpu_active) 96 enable &= ~ENGINE_SAMPLE_MASK; 97 /* 98 * Also there is software busyness tracking available we do not 99 * need the timer for I915_SAMPLE_BUSY counter. 100 * 101 * Use RCS as proxy for all engines. 102 */ 103 else if (intel_engine_supports_stats(i915->engine[RCS])) 104 enable &= ~BIT(I915_SAMPLE_BUSY); 105 106 /* 107 * If some bits remain it means we need the sampling timer running. 108 */ 109 return enable; 110 } 111 112 void i915_pmu_gt_parked(struct drm_i915_private *i915) 113 { 114 if (!i915->pmu.base.event_init) 115 return; 116 117 spin_lock_irq(&i915->pmu.lock); 118 /* 119 * Signal sampling timer to stop if only engine events are enabled and 120 * GPU went idle. 121 */ 122 i915->pmu.timer_enabled = pmu_needs_timer(i915, false); 123 spin_unlock_irq(&i915->pmu.lock); 124 } 125 126 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) 127 { 128 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { 129 i915->pmu.timer_enabled = true; 130 i915->pmu.timer_last = ktime_get(); 131 hrtimer_start_range_ns(&i915->pmu.timer, 132 ns_to_ktime(PERIOD), 0, 133 HRTIMER_MODE_REL_PINNED); 134 } 135 } 136 137 void i915_pmu_gt_unparked(struct drm_i915_private *i915) 138 { 139 if (!i915->pmu.base.event_init) 140 return; 141 142 spin_lock_irq(&i915->pmu.lock); 143 /* 144 * Re-enable sampling timer when GPU goes active. 145 */ 146 __i915_pmu_maybe_start_timer(i915); 147 spin_unlock_irq(&i915->pmu.lock); 148 } 149 150 static bool grab_forcewake(struct drm_i915_private *i915, bool fw) 151 { 152 if (!fw) 153 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 154 155 return true; 156 } 157 158 static void 159 add_sample(struct i915_pmu_sample *sample, u32 val) 160 { 161 sample->cur += val; 162 } 163 164 static void 165 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) 166 { 167 struct intel_engine_cs *engine; 168 enum intel_engine_id id; 169 bool fw = false; 170 171 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 172 return; 173 174 if (!dev_priv->gt.awake) 175 return; 176 177 if (!intel_runtime_pm_get_if_in_use(dev_priv)) 178 return; 179 180 for_each_engine(engine, dev_priv, id) { 181 u32 current_seqno = intel_engine_get_seqno(engine); 182 u32 last_seqno = intel_engine_last_submit(engine); 183 u32 val; 184 185 val = !i915_seqno_passed(current_seqno, last_seqno); 186 187 if (val) 188 add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], 189 period_ns); 190 191 if (val && (engine->pmu.enable & 192 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { 193 fw = grab_forcewake(dev_priv, fw); 194 195 val = I915_READ_FW(RING_CTL(engine->mmio_base)); 196 } else { 197 val = 0; 198 } 199 200 if (val & RING_WAIT) 201 add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], 202 period_ns); 203 204 if (val & RING_WAIT_SEMAPHORE) 205 add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], 206 period_ns); 207 } 208 209 if (fw) 210 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 211 212 intel_runtime_pm_put(dev_priv); 213 } 214 215 static void 216 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) 217 { 218 sample->cur += mul_u32_u32(val, mul); 219 } 220 221 static void 222 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns) 223 { 224 if (dev_priv->pmu.enable & 225 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { 226 u32 val; 227 228 val = dev_priv->gt_pm.rps.cur_freq; 229 if (dev_priv->gt.awake && 230 intel_runtime_pm_get_if_in_use(dev_priv)) { 231 val = intel_get_cagf(dev_priv, 232 I915_READ_NOTRACE(GEN6_RPSTAT1)); 233 intel_runtime_pm_put(dev_priv); 234 } 235 236 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], 237 intel_gpu_freq(dev_priv, val), 238 period_ns / 1000); 239 } 240 241 if (dev_priv->pmu.enable & 242 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { 243 add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 244 intel_gpu_freq(dev_priv, 245 dev_priv->gt_pm.rps.cur_freq), 246 period_ns / 1000); 247 } 248 } 249 250 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 251 { 252 struct drm_i915_private *i915 = 253 container_of(hrtimer, struct drm_i915_private, pmu.timer); 254 unsigned int period_ns; 255 ktime_t now; 256 257 if (!READ_ONCE(i915->pmu.timer_enabled)) 258 return HRTIMER_NORESTART; 259 260 now = ktime_get(); 261 period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last)); 262 i915->pmu.timer_last = now; 263 264 /* 265 * Strictly speaking the passed in period may not be 100% accurate for 266 * all internal calculation, since some amount of time can be spent on 267 * grabbing the forcewake. However the potential error from timer call- 268 * back delay greatly dominates this so we keep it simple. 269 */ 270 engines_sample(i915, period_ns); 271 frequency_sample(i915, period_ns); 272 273 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); 274 275 return HRTIMER_RESTART; 276 } 277 278 static u64 count_interrupts(struct drm_i915_private *i915) 279 { 280 /* open-coded kstat_irqs() */ 281 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 282 u64 sum = 0; 283 int cpu; 284 285 if (!desc || !desc->kstat_irqs) 286 return 0; 287 288 for_each_possible_cpu(cpu) 289 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 290 291 return sum; 292 } 293 294 static void engine_event_destroy(struct perf_event *event) 295 { 296 struct drm_i915_private *i915 = 297 container_of(event->pmu, typeof(*i915), pmu.base); 298 struct intel_engine_cs *engine; 299 300 engine = intel_engine_lookup_user(i915, 301 engine_event_class(event), 302 engine_event_instance(event)); 303 if (WARN_ON_ONCE(!engine)) 304 return; 305 306 if (engine_event_sample(event) == I915_SAMPLE_BUSY && 307 intel_engine_supports_stats(engine)) 308 intel_disable_engine_stats(engine); 309 } 310 311 static void i915_pmu_event_destroy(struct perf_event *event) 312 { 313 WARN_ON(event->parent); 314 315 if (is_engine_event(event)) 316 engine_event_destroy(event); 317 } 318 319 static int 320 engine_event_status(struct intel_engine_cs *engine, 321 enum drm_i915_pmu_engine_sample sample) 322 { 323 switch (sample) { 324 case I915_SAMPLE_BUSY: 325 case I915_SAMPLE_WAIT: 326 break; 327 case I915_SAMPLE_SEMA: 328 if (INTEL_GEN(engine->i915) < 6) 329 return -ENODEV; 330 break; 331 default: 332 return -ENOENT; 333 } 334 335 return 0; 336 } 337 338 static int 339 config_status(struct drm_i915_private *i915, u64 config) 340 { 341 switch (config) { 342 case I915_PMU_ACTUAL_FREQUENCY: 343 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 344 /* Requires a mutex for sampling! */ 345 return -ENODEV; 346 /* Fall-through. */ 347 case I915_PMU_REQUESTED_FREQUENCY: 348 if (INTEL_GEN(i915) < 6) 349 return -ENODEV; 350 break; 351 case I915_PMU_INTERRUPTS: 352 break; 353 case I915_PMU_RC6_RESIDENCY: 354 if (!HAS_RC6(i915)) 355 return -ENODEV; 356 break; 357 default: 358 return -ENOENT; 359 } 360 361 return 0; 362 } 363 364 static int engine_event_init(struct perf_event *event) 365 { 366 struct drm_i915_private *i915 = 367 container_of(event->pmu, typeof(*i915), pmu.base); 368 struct intel_engine_cs *engine; 369 u8 sample; 370 int ret; 371 372 engine = intel_engine_lookup_user(i915, engine_event_class(event), 373 engine_event_instance(event)); 374 if (!engine) 375 return -ENODEV; 376 377 sample = engine_event_sample(event); 378 ret = engine_event_status(engine, sample); 379 if (ret) 380 return ret; 381 382 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) 383 ret = intel_enable_engine_stats(engine); 384 385 return ret; 386 } 387 388 static int i915_pmu_event_init(struct perf_event *event) 389 { 390 struct drm_i915_private *i915 = 391 container_of(event->pmu, typeof(*i915), pmu.base); 392 int ret; 393 394 if (event->attr.type != event->pmu->type) 395 return -ENOENT; 396 397 /* unsupported modes and filters */ 398 if (event->attr.sample_period) /* no sampling */ 399 return -EINVAL; 400 401 if (has_branch_stack(event)) 402 return -EOPNOTSUPP; 403 404 if (event->cpu < 0) 405 return -EINVAL; 406 407 /* only allow running on one cpu at a time */ 408 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) 409 return -EINVAL; 410 411 if (is_engine_event(event)) 412 ret = engine_event_init(event); 413 else 414 ret = config_status(i915, event->attr.config); 415 if (ret) 416 return ret; 417 418 if (!event->parent) 419 event->destroy = i915_pmu_event_destroy; 420 421 return 0; 422 } 423 424 static u64 __get_rc6(struct drm_i915_private *i915) 425 { 426 u64 val; 427 428 val = intel_rc6_residency_ns(i915, 429 IS_VALLEYVIEW(i915) ? 430 VLV_GT_RENDER_RC6 : 431 GEN6_GT_GFX_RC6); 432 433 if (HAS_RC6p(i915)) 434 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); 435 436 if (HAS_RC6pp(i915)) 437 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); 438 439 return val; 440 } 441 442 static u64 get_rc6(struct drm_i915_private *i915) 443 { 444 #if IS_ENABLED(CONFIG_PM) 445 unsigned long flags; 446 u64 val; 447 448 if (intel_runtime_pm_get_if_in_use(i915)) { 449 val = __get_rc6(i915); 450 intel_runtime_pm_put(i915); 451 452 /* 453 * If we are coming back from being runtime suspended we must 454 * be careful not to report a larger value than returned 455 * previously. 456 */ 457 458 spin_lock_irqsave(&i915->pmu.lock, flags); 459 460 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 461 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; 462 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; 463 } else { 464 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 465 } 466 467 spin_unlock_irqrestore(&i915->pmu.lock, flags); 468 } else { 469 struct pci_dev *pdev = i915->drm.pdev; 470 struct device *kdev = &pdev->dev; 471 472 /* 473 * We are runtime suspended. 474 * 475 * Report the delta from when the device was suspended to now, 476 * on top of the last known real value, as the approximated RC6 477 * counter value. 478 */ 479 spin_lock_irqsave(&i915->pmu.lock, flags); 480 spin_lock(&kdev->power.lock); 481 482 /* 483 * After the above branch intel_runtime_pm_get_if_in_use failed 484 * to get the runtime PM reference we cannot assume we are in 485 * runtime suspend since we can either: a) race with coming out 486 * of it before we took the power.lock, or b) there are other 487 * states than suspended which can bring us here. 488 * 489 * We need to double-check that we are indeed currently runtime 490 * suspended and if not we cannot do better than report the last 491 * known RC6 value. 492 */ 493 if (kdev->power.runtime_status == RPM_SUSPENDED) { 494 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) 495 i915->pmu.suspended_jiffies_last = 496 kdev->power.suspended_jiffies; 497 498 val = kdev->power.suspended_jiffies - 499 i915->pmu.suspended_jiffies_last; 500 val += jiffies - kdev->power.accounting_timestamp; 501 502 val = jiffies_to_nsecs(val); 503 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; 504 505 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; 506 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 507 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 508 } else { 509 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; 510 } 511 512 spin_unlock(&kdev->power.lock); 513 spin_unlock_irqrestore(&i915->pmu.lock, flags); 514 } 515 516 return val; 517 #else 518 return __get_rc6(i915); 519 #endif 520 } 521 522 static u64 __i915_pmu_event_read(struct perf_event *event) 523 { 524 struct drm_i915_private *i915 = 525 container_of(event->pmu, typeof(*i915), pmu.base); 526 u64 val = 0; 527 528 if (is_engine_event(event)) { 529 u8 sample = engine_event_sample(event); 530 struct intel_engine_cs *engine; 531 532 engine = intel_engine_lookup_user(i915, 533 engine_event_class(event), 534 engine_event_instance(event)); 535 536 if (WARN_ON_ONCE(!engine)) { 537 /* Do nothing */ 538 } else if (sample == I915_SAMPLE_BUSY && 539 intel_engine_supports_stats(engine)) { 540 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 541 } else { 542 val = engine->pmu.sample[sample].cur; 543 } 544 } else { 545 switch (event->attr.config) { 546 case I915_PMU_ACTUAL_FREQUENCY: 547 val = 548 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, 549 USEC_PER_SEC /* to MHz */); 550 break; 551 case I915_PMU_REQUESTED_FREQUENCY: 552 val = 553 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, 554 USEC_PER_SEC /* to MHz */); 555 break; 556 case I915_PMU_INTERRUPTS: 557 val = count_interrupts(i915); 558 break; 559 case I915_PMU_RC6_RESIDENCY: 560 val = get_rc6(i915); 561 break; 562 } 563 } 564 565 return val; 566 } 567 568 static void i915_pmu_event_read(struct perf_event *event) 569 { 570 struct hw_perf_event *hwc = &event->hw; 571 u64 prev, new; 572 573 again: 574 prev = local64_read(&hwc->prev_count); 575 new = __i915_pmu_event_read(event); 576 577 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 578 goto again; 579 580 local64_add(new - prev, &event->count); 581 } 582 583 static void i915_pmu_enable(struct perf_event *event) 584 { 585 struct drm_i915_private *i915 = 586 container_of(event->pmu, typeof(*i915), pmu.base); 587 unsigned int bit = event_enabled_bit(event); 588 unsigned long flags; 589 590 spin_lock_irqsave(&i915->pmu.lock, flags); 591 592 /* 593 * Update the bitmask of enabled events and increment 594 * the event reference counter. 595 */ 596 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 597 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); 598 i915->pmu.enable |= BIT_ULL(bit); 599 i915->pmu.enable_count[bit]++; 600 601 /* 602 * Start the sampling timer if needed and not already enabled. 603 */ 604 __i915_pmu_maybe_start_timer(i915); 605 606 /* 607 * For per-engine events the bitmask and reference counting 608 * is stored per engine. 609 */ 610 if (is_engine_event(event)) { 611 u8 sample = engine_event_sample(event); 612 struct intel_engine_cs *engine; 613 614 engine = intel_engine_lookup_user(i915, 615 engine_event_class(event), 616 engine_event_instance(event)); 617 GEM_BUG_ON(!engine); 618 engine->pmu.enable |= BIT(sample); 619 620 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 621 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 622 engine->pmu.enable_count[sample]++; 623 } 624 625 spin_unlock_irqrestore(&i915->pmu.lock, flags); 626 627 /* 628 * Store the current counter value so we can report the correct delta 629 * for all listeners. Even when the event was already enabled and has 630 * an existing non-zero value. 631 */ 632 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 633 } 634 635 static void i915_pmu_disable(struct perf_event *event) 636 { 637 struct drm_i915_private *i915 = 638 container_of(event->pmu, typeof(*i915), pmu.base); 639 unsigned int bit = event_enabled_bit(event); 640 unsigned long flags; 641 642 spin_lock_irqsave(&i915->pmu.lock, flags); 643 644 if (is_engine_event(event)) { 645 u8 sample = engine_event_sample(event); 646 struct intel_engine_cs *engine; 647 648 engine = intel_engine_lookup_user(i915, 649 engine_event_class(event), 650 engine_event_instance(event)); 651 GEM_BUG_ON(!engine); 652 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 653 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 654 /* 655 * Decrement the reference count and clear the enabled 656 * bitmask when the last listener on an event goes away. 657 */ 658 if (--engine->pmu.enable_count[sample] == 0) 659 engine->pmu.enable &= ~BIT(sample); 660 } 661 662 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 663 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); 664 /* 665 * Decrement the reference count and clear the enabled 666 * bitmask when the last listener on an event goes away. 667 */ 668 if (--i915->pmu.enable_count[bit] == 0) { 669 i915->pmu.enable &= ~BIT_ULL(bit); 670 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); 671 } 672 673 spin_unlock_irqrestore(&i915->pmu.lock, flags); 674 } 675 676 static void i915_pmu_event_start(struct perf_event *event, int flags) 677 { 678 i915_pmu_enable(event); 679 event->hw.state = 0; 680 } 681 682 static void i915_pmu_event_stop(struct perf_event *event, int flags) 683 { 684 if (flags & PERF_EF_UPDATE) 685 i915_pmu_event_read(event); 686 i915_pmu_disable(event); 687 event->hw.state = PERF_HES_STOPPED; 688 } 689 690 static int i915_pmu_event_add(struct perf_event *event, int flags) 691 { 692 if (flags & PERF_EF_START) 693 i915_pmu_event_start(event, flags); 694 695 return 0; 696 } 697 698 static void i915_pmu_event_del(struct perf_event *event, int flags) 699 { 700 i915_pmu_event_stop(event, PERF_EF_UPDATE); 701 } 702 703 static int i915_pmu_event_event_idx(struct perf_event *event) 704 { 705 return 0; 706 } 707 708 struct i915_str_attribute { 709 struct device_attribute attr; 710 const char *str; 711 }; 712 713 static ssize_t i915_pmu_format_show(struct device *dev, 714 struct device_attribute *attr, char *buf) 715 { 716 struct i915_str_attribute *eattr; 717 718 eattr = container_of(attr, struct i915_str_attribute, attr); 719 return sprintf(buf, "%s\n", eattr->str); 720 } 721 722 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 723 (&((struct i915_str_attribute[]) { \ 724 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 725 .str = _config, } \ 726 })[0].attr.attr) 727 728 static struct attribute *i915_pmu_format_attrs[] = { 729 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 730 NULL, 731 }; 732 733 static const struct attribute_group i915_pmu_format_attr_group = { 734 .name = "format", 735 .attrs = i915_pmu_format_attrs, 736 }; 737 738 struct i915_ext_attribute { 739 struct device_attribute attr; 740 unsigned long val; 741 }; 742 743 static ssize_t i915_pmu_event_show(struct device *dev, 744 struct device_attribute *attr, char *buf) 745 { 746 struct i915_ext_attribute *eattr; 747 748 eattr = container_of(attr, struct i915_ext_attribute, attr); 749 return sprintf(buf, "config=0x%lx\n", eattr->val); 750 } 751 752 static struct attribute_group i915_pmu_events_attr_group = { 753 .name = "events", 754 /* Patch in attrs at runtime. */ 755 }; 756 757 static ssize_t 758 i915_pmu_get_attr_cpumask(struct device *dev, 759 struct device_attribute *attr, 760 char *buf) 761 { 762 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); 763 } 764 765 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); 766 767 static struct attribute *i915_cpumask_attrs[] = { 768 &dev_attr_cpumask.attr, 769 NULL, 770 }; 771 772 static const struct attribute_group i915_pmu_cpumask_attr_group = { 773 .attrs = i915_cpumask_attrs, 774 }; 775 776 static const struct attribute_group *i915_pmu_attr_groups[] = { 777 &i915_pmu_format_attr_group, 778 &i915_pmu_events_attr_group, 779 &i915_pmu_cpumask_attr_group, 780 NULL 781 }; 782 783 #define __event(__config, __name, __unit) \ 784 { \ 785 .config = (__config), \ 786 .name = (__name), \ 787 .unit = (__unit), \ 788 } 789 790 #define __engine_event(__sample, __name) \ 791 { \ 792 .sample = (__sample), \ 793 .name = (__name), \ 794 } 795 796 static struct i915_ext_attribute * 797 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) 798 { 799 sysfs_attr_init(&attr->attr.attr); 800 attr->attr.attr.name = name; 801 attr->attr.attr.mode = 0444; 802 attr->attr.show = i915_pmu_event_show; 803 attr->val = config; 804 805 return ++attr; 806 } 807 808 static struct perf_pmu_events_attr * 809 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 810 const char *str) 811 { 812 sysfs_attr_init(&attr->attr.attr); 813 attr->attr.attr.name = name; 814 attr->attr.attr.mode = 0444; 815 attr->attr.show = perf_event_sysfs_show; 816 attr->event_str = str; 817 818 return ++attr; 819 } 820 821 static struct attribute ** 822 create_event_attributes(struct drm_i915_private *i915) 823 { 824 static const struct { 825 u64 config; 826 const char *name; 827 const char *unit; 828 } events[] = { 829 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), 830 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), 831 __event(I915_PMU_INTERRUPTS, "interrupts", NULL), 832 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), 833 }; 834 static const struct { 835 enum drm_i915_pmu_engine_sample sample; 836 char *name; 837 } engine_events[] = { 838 __engine_event(I915_SAMPLE_BUSY, "busy"), 839 __engine_event(I915_SAMPLE_SEMA, "sema"), 840 __engine_event(I915_SAMPLE_WAIT, "wait"), 841 }; 842 unsigned int count = 0; 843 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 844 struct i915_ext_attribute *i915_attr = NULL, *i915_iter; 845 struct attribute **attr = NULL, **attr_iter; 846 struct intel_engine_cs *engine; 847 enum intel_engine_id id; 848 unsigned int i; 849 850 /* Count how many counters we will be exposing. */ 851 for (i = 0; i < ARRAY_SIZE(events); i++) { 852 if (!config_status(i915, events[i].config)) 853 count++; 854 } 855 856 for_each_engine(engine, i915, id) { 857 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 858 if (!engine_event_status(engine, 859 engine_events[i].sample)) 860 count++; 861 } 862 } 863 864 /* Allocate attribute objects and table. */ 865 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); 866 if (!i915_attr) 867 goto err_alloc; 868 869 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 870 if (!pmu_attr) 871 goto err_alloc; 872 873 /* Max one pointer of each attribute type plus a termination entry. */ 874 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 875 if (!attr) 876 goto err_alloc; 877 878 i915_iter = i915_attr; 879 pmu_iter = pmu_attr; 880 attr_iter = attr; 881 882 /* Initialize supported non-engine counters. */ 883 for (i = 0; i < ARRAY_SIZE(events); i++) { 884 char *str; 885 886 if (config_status(i915, events[i].config)) 887 continue; 888 889 str = kstrdup(events[i].name, GFP_KERNEL); 890 if (!str) 891 goto err; 892 893 *attr_iter++ = &i915_iter->attr.attr; 894 i915_iter = add_i915_attr(i915_iter, str, events[i].config); 895 896 if (events[i].unit) { 897 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); 898 if (!str) 899 goto err; 900 901 *attr_iter++ = &pmu_iter->attr.attr; 902 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); 903 } 904 } 905 906 /* Initialize supported engine counters. */ 907 for_each_engine(engine, i915, id) { 908 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 909 char *str; 910 911 if (engine_event_status(engine, 912 engine_events[i].sample)) 913 continue; 914 915 str = kasprintf(GFP_KERNEL, "%s-%s", 916 engine->name, engine_events[i].name); 917 if (!str) 918 goto err; 919 920 *attr_iter++ = &i915_iter->attr.attr; 921 i915_iter = 922 add_i915_attr(i915_iter, str, 923 __I915_PMU_ENGINE(engine->uabi_class, 924 engine->instance, 925 engine_events[i].sample)); 926 927 str = kasprintf(GFP_KERNEL, "%s-%s.unit", 928 engine->name, engine_events[i].name); 929 if (!str) 930 goto err; 931 932 *attr_iter++ = &pmu_iter->attr.attr; 933 pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); 934 } 935 } 936 937 i915->pmu.i915_attr = i915_attr; 938 i915->pmu.pmu_attr = pmu_attr; 939 940 return attr; 941 942 err:; 943 for (attr_iter = attr; *attr_iter; attr_iter++) 944 kfree((*attr_iter)->name); 945 946 err_alloc: 947 kfree(attr); 948 kfree(i915_attr); 949 kfree(pmu_attr); 950 951 return NULL; 952 } 953 954 static void free_event_attributes(struct drm_i915_private *i915) 955 { 956 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; 957 958 for (; *attr_iter; attr_iter++) 959 kfree((*attr_iter)->name); 960 961 kfree(i915_pmu_events_attr_group.attrs); 962 kfree(i915->pmu.i915_attr); 963 kfree(i915->pmu.pmu_attr); 964 965 i915_pmu_events_attr_group.attrs = NULL; 966 i915->pmu.i915_attr = NULL; 967 i915->pmu.pmu_attr = NULL; 968 } 969 970 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 971 { 972 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 973 974 GEM_BUG_ON(!pmu->base.event_init); 975 976 /* Select the first online CPU as a designated reader. */ 977 if (!cpumask_weight(&i915_pmu_cpumask)) 978 cpumask_set_cpu(cpu, &i915_pmu_cpumask); 979 980 return 0; 981 } 982 983 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 984 { 985 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 986 unsigned int target; 987 988 GEM_BUG_ON(!pmu->base.event_init); 989 990 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { 991 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 992 /* Migrate events if there is a valid target */ 993 if (target < nr_cpu_ids) { 994 cpumask_set_cpu(target, &i915_pmu_cpumask); 995 perf_pmu_migrate_context(&pmu->base, cpu, target); 996 } 997 } 998 999 return 0; 1000 } 1001 1002 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 1003 1004 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) 1005 { 1006 enum cpuhp_state slot; 1007 int ret; 1008 1009 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 1010 "perf/x86/intel/i915:online", 1011 i915_pmu_cpu_online, 1012 i915_pmu_cpu_offline); 1013 if (ret < 0) 1014 return ret; 1015 1016 slot = ret; 1017 ret = cpuhp_state_add_instance(slot, &i915->pmu.node); 1018 if (ret) { 1019 cpuhp_remove_multi_state(slot); 1020 return ret; 1021 } 1022 1023 cpuhp_slot = slot; 1024 return 0; 1025 } 1026 1027 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) 1028 { 1029 WARN_ON(cpuhp_slot == CPUHP_INVALID); 1030 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); 1031 cpuhp_remove_multi_state(cpuhp_slot); 1032 } 1033 1034 void i915_pmu_register(struct drm_i915_private *i915) 1035 { 1036 int ret; 1037 1038 if (INTEL_GEN(i915) <= 2) { 1039 DRM_INFO("PMU not supported for this GPU."); 1040 return; 1041 } 1042 1043 i915_pmu_events_attr_group.attrs = create_event_attributes(i915); 1044 if (!i915_pmu_events_attr_group.attrs) { 1045 ret = -ENOMEM; 1046 goto err; 1047 } 1048 1049 i915->pmu.base.attr_groups = i915_pmu_attr_groups; 1050 i915->pmu.base.task_ctx_nr = perf_invalid_context; 1051 i915->pmu.base.event_init = i915_pmu_event_init; 1052 i915->pmu.base.add = i915_pmu_event_add; 1053 i915->pmu.base.del = i915_pmu_event_del; 1054 i915->pmu.base.start = i915_pmu_event_start; 1055 i915->pmu.base.stop = i915_pmu_event_stop; 1056 i915->pmu.base.read = i915_pmu_event_read; 1057 i915->pmu.base.event_idx = i915_pmu_event_event_idx; 1058 1059 spin_lock_init(&i915->pmu.lock); 1060 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1061 i915->pmu.timer.function = i915_sample; 1062 1063 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 1064 if (ret) 1065 goto err; 1066 1067 ret = i915_pmu_register_cpuhp_state(i915); 1068 if (ret) 1069 goto err_unreg; 1070 1071 return; 1072 1073 err_unreg: 1074 perf_pmu_unregister(&i915->pmu.base); 1075 err: 1076 i915->pmu.base.event_init = NULL; 1077 free_event_attributes(i915); 1078 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); 1079 } 1080 1081 void i915_pmu_unregister(struct drm_i915_private *i915) 1082 { 1083 if (!i915->pmu.base.event_init) 1084 return; 1085 1086 WARN_ON(i915->pmu.enable); 1087 1088 hrtimer_cancel(&i915->pmu.timer); 1089 1090 i915_pmu_unregister_cpuhp_state(i915); 1091 1092 perf_pmu_unregister(&i915->pmu.base); 1093 i915->pmu.base.event_init = NULL; 1094 free_event_attributes(i915); 1095 } 1096