1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include "i915_pmu.h" 8 #include "intel_ringbuffer.h" 9 #include "i915_drv.h" 10 11 /* Frequency for the sampling timer for events which need it. */ 12 #define FREQUENCY 200 13 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 14 15 #define ENGINE_SAMPLE_MASK \ 16 (BIT(I915_SAMPLE_BUSY) | \ 17 BIT(I915_SAMPLE_WAIT) | \ 18 BIT(I915_SAMPLE_SEMA)) 19 20 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) 21 22 static cpumask_t i915_pmu_cpumask; 23 24 static u8 engine_config_sample(u64 config) 25 { 26 return config & I915_PMU_SAMPLE_MASK; 27 } 28 29 static u8 engine_event_sample(struct perf_event *event) 30 { 31 return engine_config_sample(event->attr.config); 32 } 33 34 static u8 engine_event_class(struct perf_event *event) 35 { 36 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 37 } 38 39 static u8 engine_event_instance(struct perf_event *event) 40 { 41 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 42 } 43 44 static bool is_engine_config(u64 config) 45 { 46 return config < __I915_PMU_OTHER(0); 47 } 48 49 static unsigned int config_enabled_bit(u64 config) 50 { 51 if (is_engine_config(config)) 52 return engine_config_sample(config); 53 else 54 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); 55 } 56 57 static u64 config_enabled_mask(u64 config) 58 { 59 return BIT_ULL(config_enabled_bit(config)); 60 } 61 62 static bool is_engine_event(struct perf_event *event) 63 { 64 return is_engine_config(event->attr.config); 65 } 66 67 static unsigned int event_enabled_bit(struct perf_event *event) 68 { 69 return config_enabled_bit(event->attr.config); 70 } 71 72 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) 73 { 74 u64 enable; 75 76 /* 77 * Only some counters need the sampling timer. 78 * 79 * We start with a bitmask of all currently enabled events. 80 */ 81 enable = i915->pmu.enable; 82 83 /* 84 * Mask out all the ones which do not need the timer, or in 85 * other words keep all the ones that could need the timer. 86 */ 87 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | 88 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | 89 ENGINE_SAMPLE_MASK; 90 91 /* 92 * When the GPU is idle per-engine counters do not need to be 93 * running so clear those bits out. 94 */ 95 if (!gpu_active) 96 enable &= ~ENGINE_SAMPLE_MASK; 97 /* 98 * Also there is software busyness tracking available we do not 99 * need the timer for I915_SAMPLE_BUSY counter. 100 * 101 * Use RCS as proxy for all engines. 102 */ 103 else if (intel_engine_supports_stats(i915->engine[RCS])) 104 enable &= ~BIT(I915_SAMPLE_BUSY); 105 106 /* 107 * If some bits remain it means we need the sampling timer running. 108 */ 109 return enable; 110 } 111 112 void i915_pmu_gt_parked(struct drm_i915_private *i915) 113 { 114 if (!i915->pmu.base.event_init) 115 return; 116 117 spin_lock_irq(&i915->pmu.lock); 118 /* 119 * Signal sampling timer to stop if only engine events are enabled and 120 * GPU went idle. 121 */ 122 i915->pmu.timer_enabled = pmu_needs_timer(i915, false); 123 spin_unlock_irq(&i915->pmu.lock); 124 } 125 126 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) 127 { 128 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { 129 i915->pmu.timer_enabled = true; 130 hrtimer_start_range_ns(&i915->pmu.timer, 131 ns_to_ktime(PERIOD), 0, 132 HRTIMER_MODE_REL_PINNED); 133 } 134 } 135 136 void i915_pmu_gt_unparked(struct drm_i915_private *i915) 137 { 138 if (!i915->pmu.base.event_init) 139 return; 140 141 spin_lock_irq(&i915->pmu.lock); 142 /* 143 * Re-enable sampling timer when GPU goes active. 144 */ 145 __i915_pmu_maybe_start_timer(i915); 146 spin_unlock_irq(&i915->pmu.lock); 147 } 148 149 static bool grab_forcewake(struct drm_i915_private *i915, bool fw) 150 { 151 if (!fw) 152 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 153 154 return true; 155 } 156 157 static void 158 update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) 159 { 160 sample->cur += mul_u32_u32(val, unit); 161 } 162 163 static void engines_sample(struct drm_i915_private *dev_priv) 164 { 165 struct intel_engine_cs *engine; 166 enum intel_engine_id id; 167 bool fw = false; 168 169 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 170 return; 171 172 if (!dev_priv->gt.awake) 173 return; 174 175 if (!intel_runtime_pm_get_if_in_use(dev_priv)) 176 return; 177 178 for_each_engine(engine, dev_priv, id) { 179 u32 current_seqno = intel_engine_get_seqno(engine); 180 u32 last_seqno = intel_engine_last_submit(engine); 181 u32 val; 182 183 val = !i915_seqno_passed(current_seqno, last_seqno); 184 185 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], 186 PERIOD, val); 187 188 if (val && (engine->pmu.enable & 189 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { 190 fw = grab_forcewake(dev_priv, fw); 191 192 val = I915_READ_FW(RING_CTL(engine->mmio_base)); 193 } else { 194 val = 0; 195 } 196 197 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], 198 PERIOD, !!(val & RING_WAIT)); 199 200 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], 201 PERIOD, !!(val & RING_WAIT_SEMAPHORE)); 202 } 203 204 if (fw) 205 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 206 207 intel_runtime_pm_put(dev_priv); 208 } 209 210 static void frequency_sample(struct drm_i915_private *dev_priv) 211 { 212 if (dev_priv->pmu.enable & 213 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { 214 u32 val; 215 216 val = dev_priv->gt_pm.rps.cur_freq; 217 if (dev_priv->gt.awake && 218 intel_runtime_pm_get_if_in_use(dev_priv)) { 219 val = intel_get_cagf(dev_priv, 220 I915_READ_NOTRACE(GEN6_RPSTAT1)); 221 intel_runtime_pm_put(dev_priv); 222 } 223 224 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], 225 1, intel_gpu_freq(dev_priv, val)); 226 } 227 228 if (dev_priv->pmu.enable & 229 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { 230 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, 231 intel_gpu_freq(dev_priv, 232 dev_priv->gt_pm.rps.cur_freq)); 233 } 234 } 235 236 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 237 { 238 struct drm_i915_private *i915 = 239 container_of(hrtimer, struct drm_i915_private, pmu.timer); 240 241 if (!READ_ONCE(i915->pmu.timer_enabled)) 242 return HRTIMER_NORESTART; 243 244 engines_sample(i915); 245 frequency_sample(i915); 246 247 hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); 248 return HRTIMER_RESTART; 249 } 250 251 static u64 count_interrupts(struct drm_i915_private *i915) 252 { 253 /* open-coded kstat_irqs() */ 254 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 255 u64 sum = 0; 256 int cpu; 257 258 if (!desc || !desc->kstat_irqs) 259 return 0; 260 261 for_each_possible_cpu(cpu) 262 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 263 264 return sum; 265 } 266 267 static void engine_event_destroy(struct perf_event *event) 268 { 269 struct drm_i915_private *i915 = 270 container_of(event->pmu, typeof(*i915), pmu.base); 271 struct intel_engine_cs *engine; 272 273 engine = intel_engine_lookup_user(i915, 274 engine_event_class(event), 275 engine_event_instance(event)); 276 if (WARN_ON_ONCE(!engine)) 277 return; 278 279 if (engine_event_sample(event) == I915_SAMPLE_BUSY && 280 intel_engine_supports_stats(engine)) 281 intel_disable_engine_stats(engine); 282 } 283 284 static void i915_pmu_event_destroy(struct perf_event *event) 285 { 286 WARN_ON(event->parent); 287 288 if (is_engine_event(event)) 289 engine_event_destroy(event); 290 } 291 292 static int 293 engine_event_status(struct intel_engine_cs *engine, 294 enum drm_i915_pmu_engine_sample sample) 295 { 296 switch (sample) { 297 case I915_SAMPLE_BUSY: 298 case I915_SAMPLE_WAIT: 299 break; 300 case I915_SAMPLE_SEMA: 301 if (INTEL_GEN(engine->i915) < 6) 302 return -ENODEV; 303 break; 304 default: 305 return -ENOENT; 306 } 307 308 return 0; 309 } 310 311 static int 312 config_status(struct drm_i915_private *i915, u64 config) 313 { 314 switch (config) { 315 case I915_PMU_ACTUAL_FREQUENCY: 316 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 317 /* Requires a mutex for sampling! */ 318 return -ENODEV; 319 /* Fall-through. */ 320 case I915_PMU_REQUESTED_FREQUENCY: 321 if (INTEL_GEN(i915) < 6) 322 return -ENODEV; 323 break; 324 case I915_PMU_INTERRUPTS: 325 break; 326 case I915_PMU_RC6_RESIDENCY: 327 if (!HAS_RC6(i915)) 328 return -ENODEV; 329 break; 330 default: 331 return -ENOENT; 332 } 333 334 return 0; 335 } 336 337 static int engine_event_init(struct perf_event *event) 338 { 339 struct drm_i915_private *i915 = 340 container_of(event->pmu, typeof(*i915), pmu.base); 341 struct intel_engine_cs *engine; 342 u8 sample; 343 int ret; 344 345 engine = intel_engine_lookup_user(i915, engine_event_class(event), 346 engine_event_instance(event)); 347 if (!engine) 348 return -ENODEV; 349 350 sample = engine_event_sample(event); 351 ret = engine_event_status(engine, sample); 352 if (ret) 353 return ret; 354 355 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) 356 ret = intel_enable_engine_stats(engine); 357 358 return ret; 359 } 360 361 static int i915_pmu_event_init(struct perf_event *event) 362 { 363 struct drm_i915_private *i915 = 364 container_of(event->pmu, typeof(*i915), pmu.base); 365 int ret; 366 367 if (event->attr.type != event->pmu->type) 368 return -ENOENT; 369 370 /* unsupported modes and filters */ 371 if (event->attr.sample_period) /* no sampling */ 372 return -EINVAL; 373 374 if (has_branch_stack(event)) 375 return -EOPNOTSUPP; 376 377 if (event->cpu < 0) 378 return -EINVAL; 379 380 /* only allow running on one cpu at a time */ 381 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) 382 return -EINVAL; 383 384 if (is_engine_event(event)) 385 ret = engine_event_init(event); 386 else 387 ret = config_status(i915, event->attr.config); 388 if (ret) 389 return ret; 390 391 if (!event->parent) 392 event->destroy = i915_pmu_event_destroy; 393 394 return 0; 395 } 396 397 static u64 __get_rc6(struct drm_i915_private *i915) 398 { 399 u64 val; 400 401 val = intel_rc6_residency_ns(i915, 402 IS_VALLEYVIEW(i915) ? 403 VLV_GT_RENDER_RC6 : 404 GEN6_GT_GFX_RC6); 405 406 if (HAS_RC6p(i915)) 407 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); 408 409 if (HAS_RC6pp(i915)) 410 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); 411 412 return val; 413 } 414 415 static u64 get_rc6(struct drm_i915_private *i915) 416 { 417 #if IS_ENABLED(CONFIG_PM) 418 unsigned long flags; 419 u64 val; 420 421 if (intel_runtime_pm_get_if_in_use(i915)) { 422 val = __get_rc6(i915); 423 intel_runtime_pm_put(i915); 424 425 /* 426 * If we are coming back from being runtime suspended we must 427 * be careful not to report a larger value than returned 428 * previously. 429 */ 430 431 spin_lock_irqsave(&i915->pmu.lock, flags); 432 433 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 434 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; 435 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; 436 } else { 437 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 438 } 439 440 spin_unlock_irqrestore(&i915->pmu.lock, flags); 441 } else { 442 struct pci_dev *pdev = i915->drm.pdev; 443 struct device *kdev = &pdev->dev; 444 445 /* 446 * We are runtime suspended. 447 * 448 * Report the delta from when the device was suspended to now, 449 * on top of the last known real value, as the approximated RC6 450 * counter value. 451 */ 452 spin_lock_irqsave(&i915->pmu.lock, flags); 453 spin_lock(&kdev->power.lock); 454 455 /* 456 * After the above branch intel_runtime_pm_get_if_in_use failed 457 * to get the runtime PM reference we cannot assume we are in 458 * runtime suspend since we can either: a) race with coming out 459 * of it before we took the power.lock, or b) there are other 460 * states than suspended which can bring us here. 461 * 462 * We need to double-check that we are indeed currently runtime 463 * suspended and if not we cannot do better than report the last 464 * known RC6 value. 465 */ 466 if (kdev->power.runtime_status == RPM_SUSPENDED) { 467 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) 468 i915->pmu.suspended_jiffies_last = 469 kdev->power.suspended_jiffies; 470 471 val = kdev->power.suspended_jiffies - 472 i915->pmu.suspended_jiffies_last; 473 val += jiffies - kdev->power.accounting_timestamp; 474 475 val = jiffies_to_nsecs(val); 476 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; 477 478 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; 479 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 480 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 481 } else { 482 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; 483 } 484 485 spin_unlock(&kdev->power.lock); 486 spin_unlock_irqrestore(&i915->pmu.lock, flags); 487 } 488 489 return val; 490 #else 491 return __get_rc6(i915); 492 #endif 493 } 494 495 static u64 __i915_pmu_event_read(struct perf_event *event) 496 { 497 struct drm_i915_private *i915 = 498 container_of(event->pmu, typeof(*i915), pmu.base); 499 u64 val = 0; 500 501 if (is_engine_event(event)) { 502 u8 sample = engine_event_sample(event); 503 struct intel_engine_cs *engine; 504 505 engine = intel_engine_lookup_user(i915, 506 engine_event_class(event), 507 engine_event_instance(event)); 508 509 if (WARN_ON_ONCE(!engine)) { 510 /* Do nothing */ 511 } else if (sample == I915_SAMPLE_BUSY && 512 intel_engine_supports_stats(engine)) { 513 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 514 } else { 515 val = engine->pmu.sample[sample].cur; 516 } 517 } else { 518 switch (event->attr.config) { 519 case I915_PMU_ACTUAL_FREQUENCY: 520 val = 521 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, 522 FREQUENCY); 523 break; 524 case I915_PMU_REQUESTED_FREQUENCY: 525 val = 526 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, 527 FREQUENCY); 528 break; 529 case I915_PMU_INTERRUPTS: 530 val = count_interrupts(i915); 531 break; 532 case I915_PMU_RC6_RESIDENCY: 533 val = get_rc6(i915); 534 break; 535 } 536 } 537 538 return val; 539 } 540 541 static void i915_pmu_event_read(struct perf_event *event) 542 { 543 struct hw_perf_event *hwc = &event->hw; 544 u64 prev, new; 545 546 again: 547 prev = local64_read(&hwc->prev_count); 548 new = __i915_pmu_event_read(event); 549 550 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 551 goto again; 552 553 local64_add(new - prev, &event->count); 554 } 555 556 static void i915_pmu_enable(struct perf_event *event) 557 { 558 struct drm_i915_private *i915 = 559 container_of(event->pmu, typeof(*i915), pmu.base); 560 unsigned int bit = event_enabled_bit(event); 561 unsigned long flags; 562 563 spin_lock_irqsave(&i915->pmu.lock, flags); 564 565 /* 566 * Update the bitmask of enabled events and increment 567 * the event reference counter. 568 */ 569 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 570 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); 571 i915->pmu.enable |= BIT_ULL(bit); 572 i915->pmu.enable_count[bit]++; 573 574 /* 575 * Start the sampling timer if needed and not already enabled. 576 */ 577 __i915_pmu_maybe_start_timer(i915); 578 579 /* 580 * For per-engine events the bitmask and reference counting 581 * is stored per engine. 582 */ 583 if (is_engine_event(event)) { 584 u8 sample = engine_event_sample(event); 585 struct intel_engine_cs *engine; 586 587 engine = intel_engine_lookup_user(i915, 588 engine_event_class(event), 589 engine_event_instance(event)); 590 GEM_BUG_ON(!engine); 591 engine->pmu.enable |= BIT(sample); 592 593 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 594 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 595 engine->pmu.enable_count[sample]++; 596 } 597 598 spin_unlock_irqrestore(&i915->pmu.lock, flags); 599 600 /* 601 * Store the current counter value so we can report the correct delta 602 * for all listeners. Even when the event was already enabled and has 603 * an existing non-zero value. 604 */ 605 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 606 } 607 608 static void i915_pmu_disable(struct perf_event *event) 609 { 610 struct drm_i915_private *i915 = 611 container_of(event->pmu, typeof(*i915), pmu.base); 612 unsigned int bit = event_enabled_bit(event); 613 unsigned long flags; 614 615 spin_lock_irqsave(&i915->pmu.lock, flags); 616 617 if (is_engine_event(event)) { 618 u8 sample = engine_event_sample(event); 619 struct intel_engine_cs *engine; 620 621 engine = intel_engine_lookup_user(i915, 622 engine_event_class(event), 623 engine_event_instance(event)); 624 GEM_BUG_ON(!engine); 625 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 626 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 627 /* 628 * Decrement the reference count and clear the enabled 629 * bitmask when the last listener on an event goes away. 630 */ 631 if (--engine->pmu.enable_count[sample] == 0) 632 engine->pmu.enable &= ~BIT(sample); 633 } 634 635 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 636 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); 637 /* 638 * Decrement the reference count and clear the enabled 639 * bitmask when the last listener on an event goes away. 640 */ 641 if (--i915->pmu.enable_count[bit] == 0) { 642 i915->pmu.enable &= ~BIT_ULL(bit); 643 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); 644 } 645 646 spin_unlock_irqrestore(&i915->pmu.lock, flags); 647 } 648 649 static void i915_pmu_event_start(struct perf_event *event, int flags) 650 { 651 i915_pmu_enable(event); 652 event->hw.state = 0; 653 } 654 655 static void i915_pmu_event_stop(struct perf_event *event, int flags) 656 { 657 if (flags & PERF_EF_UPDATE) 658 i915_pmu_event_read(event); 659 i915_pmu_disable(event); 660 event->hw.state = PERF_HES_STOPPED; 661 } 662 663 static int i915_pmu_event_add(struct perf_event *event, int flags) 664 { 665 if (flags & PERF_EF_START) 666 i915_pmu_event_start(event, flags); 667 668 return 0; 669 } 670 671 static void i915_pmu_event_del(struct perf_event *event, int flags) 672 { 673 i915_pmu_event_stop(event, PERF_EF_UPDATE); 674 } 675 676 static int i915_pmu_event_event_idx(struct perf_event *event) 677 { 678 return 0; 679 } 680 681 struct i915_str_attribute { 682 struct device_attribute attr; 683 const char *str; 684 }; 685 686 static ssize_t i915_pmu_format_show(struct device *dev, 687 struct device_attribute *attr, char *buf) 688 { 689 struct i915_str_attribute *eattr; 690 691 eattr = container_of(attr, struct i915_str_attribute, attr); 692 return sprintf(buf, "%s\n", eattr->str); 693 } 694 695 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 696 (&((struct i915_str_attribute[]) { \ 697 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 698 .str = _config, } \ 699 })[0].attr.attr) 700 701 static struct attribute *i915_pmu_format_attrs[] = { 702 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 703 NULL, 704 }; 705 706 static const struct attribute_group i915_pmu_format_attr_group = { 707 .name = "format", 708 .attrs = i915_pmu_format_attrs, 709 }; 710 711 struct i915_ext_attribute { 712 struct device_attribute attr; 713 unsigned long val; 714 }; 715 716 static ssize_t i915_pmu_event_show(struct device *dev, 717 struct device_attribute *attr, char *buf) 718 { 719 struct i915_ext_attribute *eattr; 720 721 eattr = container_of(attr, struct i915_ext_attribute, attr); 722 return sprintf(buf, "config=0x%lx\n", eattr->val); 723 } 724 725 static struct attribute_group i915_pmu_events_attr_group = { 726 .name = "events", 727 /* Patch in attrs at runtime. */ 728 }; 729 730 static ssize_t 731 i915_pmu_get_attr_cpumask(struct device *dev, 732 struct device_attribute *attr, 733 char *buf) 734 { 735 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); 736 } 737 738 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); 739 740 static struct attribute *i915_cpumask_attrs[] = { 741 &dev_attr_cpumask.attr, 742 NULL, 743 }; 744 745 static const struct attribute_group i915_pmu_cpumask_attr_group = { 746 .attrs = i915_cpumask_attrs, 747 }; 748 749 static const struct attribute_group *i915_pmu_attr_groups[] = { 750 &i915_pmu_format_attr_group, 751 &i915_pmu_events_attr_group, 752 &i915_pmu_cpumask_attr_group, 753 NULL 754 }; 755 756 #define __event(__config, __name, __unit) \ 757 { \ 758 .config = (__config), \ 759 .name = (__name), \ 760 .unit = (__unit), \ 761 } 762 763 #define __engine_event(__sample, __name) \ 764 { \ 765 .sample = (__sample), \ 766 .name = (__name), \ 767 } 768 769 static struct i915_ext_attribute * 770 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) 771 { 772 sysfs_attr_init(&attr->attr.attr); 773 attr->attr.attr.name = name; 774 attr->attr.attr.mode = 0444; 775 attr->attr.show = i915_pmu_event_show; 776 attr->val = config; 777 778 return ++attr; 779 } 780 781 static struct perf_pmu_events_attr * 782 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 783 const char *str) 784 { 785 sysfs_attr_init(&attr->attr.attr); 786 attr->attr.attr.name = name; 787 attr->attr.attr.mode = 0444; 788 attr->attr.show = perf_event_sysfs_show; 789 attr->event_str = str; 790 791 return ++attr; 792 } 793 794 static struct attribute ** 795 create_event_attributes(struct drm_i915_private *i915) 796 { 797 static const struct { 798 u64 config; 799 const char *name; 800 const char *unit; 801 } events[] = { 802 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), 803 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), 804 __event(I915_PMU_INTERRUPTS, "interrupts", NULL), 805 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), 806 }; 807 static const struct { 808 enum drm_i915_pmu_engine_sample sample; 809 char *name; 810 } engine_events[] = { 811 __engine_event(I915_SAMPLE_BUSY, "busy"), 812 __engine_event(I915_SAMPLE_SEMA, "sema"), 813 __engine_event(I915_SAMPLE_WAIT, "wait"), 814 }; 815 unsigned int count = 0; 816 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 817 struct i915_ext_attribute *i915_attr = NULL, *i915_iter; 818 struct attribute **attr = NULL, **attr_iter; 819 struct intel_engine_cs *engine; 820 enum intel_engine_id id; 821 unsigned int i; 822 823 /* Count how many counters we will be exposing. */ 824 for (i = 0; i < ARRAY_SIZE(events); i++) { 825 if (!config_status(i915, events[i].config)) 826 count++; 827 } 828 829 for_each_engine(engine, i915, id) { 830 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 831 if (!engine_event_status(engine, 832 engine_events[i].sample)) 833 count++; 834 } 835 } 836 837 /* Allocate attribute objects and table. */ 838 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); 839 if (!i915_attr) 840 goto err_alloc; 841 842 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 843 if (!pmu_attr) 844 goto err_alloc; 845 846 /* Max one pointer of each attribute type plus a termination entry. */ 847 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 848 if (!attr) 849 goto err_alloc; 850 851 i915_iter = i915_attr; 852 pmu_iter = pmu_attr; 853 attr_iter = attr; 854 855 /* Initialize supported non-engine counters. */ 856 for (i = 0; i < ARRAY_SIZE(events); i++) { 857 char *str; 858 859 if (config_status(i915, events[i].config)) 860 continue; 861 862 str = kstrdup(events[i].name, GFP_KERNEL); 863 if (!str) 864 goto err; 865 866 *attr_iter++ = &i915_iter->attr.attr; 867 i915_iter = add_i915_attr(i915_iter, str, events[i].config); 868 869 if (events[i].unit) { 870 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); 871 if (!str) 872 goto err; 873 874 *attr_iter++ = &pmu_iter->attr.attr; 875 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); 876 } 877 } 878 879 /* Initialize supported engine counters. */ 880 for_each_engine(engine, i915, id) { 881 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 882 char *str; 883 884 if (engine_event_status(engine, 885 engine_events[i].sample)) 886 continue; 887 888 str = kasprintf(GFP_KERNEL, "%s-%s", 889 engine->name, engine_events[i].name); 890 if (!str) 891 goto err; 892 893 *attr_iter++ = &i915_iter->attr.attr; 894 i915_iter = 895 add_i915_attr(i915_iter, str, 896 __I915_PMU_ENGINE(engine->uabi_class, 897 engine->instance, 898 engine_events[i].sample)); 899 900 str = kasprintf(GFP_KERNEL, "%s-%s.unit", 901 engine->name, engine_events[i].name); 902 if (!str) 903 goto err; 904 905 *attr_iter++ = &pmu_iter->attr.attr; 906 pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); 907 } 908 } 909 910 i915->pmu.i915_attr = i915_attr; 911 i915->pmu.pmu_attr = pmu_attr; 912 913 return attr; 914 915 err:; 916 for (attr_iter = attr; *attr_iter; attr_iter++) 917 kfree((*attr_iter)->name); 918 919 err_alloc: 920 kfree(attr); 921 kfree(i915_attr); 922 kfree(pmu_attr); 923 924 return NULL; 925 } 926 927 static void free_event_attributes(struct drm_i915_private *i915) 928 { 929 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; 930 931 for (; *attr_iter; attr_iter++) 932 kfree((*attr_iter)->name); 933 934 kfree(i915_pmu_events_attr_group.attrs); 935 kfree(i915->pmu.i915_attr); 936 kfree(i915->pmu.pmu_attr); 937 938 i915_pmu_events_attr_group.attrs = NULL; 939 i915->pmu.i915_attr = NULL; 940 i915->pmu.pmu_attr = NULL; 941 } 942 943 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 944 { 945 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 946 947 GEM_BUG_ON(!pmu->base.event_init); 948 949 /* Select the first online CPU as a designated reader. */ 950 if (!cpumask_weight(&i915_pmu_cpumask)) 951 cpumask_set_cpu(cpu, &i915_pmu_cpumask); 952 953 return 0; 954 } 955 956 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 957 { 958 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 959 unsigned int target; 960 961 GEM_BUG_ON(!pmu->base.event_init); 962 963 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { 964 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 965 /* Migrate events if there is a valid target */ 966 if (target < nr_cpu_ids) { 967 cpumask_set_cpu(target, &i915_pmu_cpumask); 968 perf_pmu_migrate_context(&pmu->base, cpu, target); 969 } 970 } 971 972 return 0; 973 } 974 975 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 976 977 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) 978 { 979 enum cpuhp_state slot; 980 int ret; 981 982 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 983 "perf/x86/intel/i915:online", 984 i915_pmu_cpu_online, 985 i915_pmu_cpu_offline); 986 if (ret < 0) 987 return ret; 988 989 slot = ret; 990 ret = cpuhp_state_add_instance(slot, &i915->pmu.node); 991 if (ret) { 992 cpuhp_remove_multi_state(slot); 993 return ret; 994 } 995 996 cpuhp_slot = slot; 997 return 0; 998 } 999 1000 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) 1001 { 1002 WARN_ON(cpuhp_slot == CPUHP_INVALID); 1003 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); 1004 cpuhp_remove_multi_state(cpuhp_slot); 1005 } 1006 1007 void i915_pmu_register(struct drm_i915_private *i915) 1008 { 1009 int ret; 1010 1011 if (INTEL_GEN(i915) <= 2) { 1012 DRM_INFO("PMU not supported for this GPU."); 1013 return; 1014 } 1015 1016 i915_pmu_events_attr_group.attrs = create_event_attributes(i915); 1017 if (!i915_pmu_events_attr_group.attrs) { 1018 ret = -ENOMEM; 1019 goto err; 1020 } 1021 1022 i915->pmu.base.attr_groups = i915_pmu_attr_groups; 1023 i915->pmu.base.task_ctx_nr = perf_invalid_context; 1024 i915->pmu.base.event_init = i915_pmu_event_init; 1025 i915->pmu.base.add = i915_pmu_event_add; 1026 i915->pmu.base.del = i915_pmu_event_del; 1027 i915->pmu.base.start = i915_pmu_event_start; 1028 i915->pmu.base.stop = i915_pmu_event_stop; 1029 i915->pmu.base.read = i915_pmu_event_read; 1030 i915->pmu.base.event_idx = i915_pmu_event_event_idx; 1031 1032 spin_lock_init(&i915->pmu.lock); 1033 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1034 i915->pmu.timer.function = i915_sample; 1035 1036 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 1037 if (ret) 1038 goto err; 1039 1040 ret = i915_pmu_register_cpuhp_state(i915); 1041 if (ret) 1042 goto err_unreg; 1043 1044 return; 1045 1046 err_unreg: 1047 perf_pmu_unregister(&i915->pmu.base); 1048 err: 1049 i915->pmu.base.event_init = NULL; 1050 free_event_attributes(i915); 1051 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); 1052 } 1053 1054 void i915_pmu_unregister(struct drm_i915_private *i915) 1055 { 1056 if (!i915->pmu.base.event_init) 1057 return; 1058 1059 WARN_ON(i915->pmu.enable); 1060 1061 hrtimer_cancel(&i915->pmu.timer); 1062 1063 i915_pmu_unregister_cpuhp_state(i915); 1064 1065 perf_pmu_unregister(&i915->pmu.base); 1066 i915->pmu.base.event_init = NULL; 1067 free_event_attributes(i915); 1068 } 1069