1 /* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/perf_event.h> 26 #include <linux/pm_runtime.h> 27 28 #include "i915_drv.h" 29 #include "i915_pmu.h" 30 #include "intel_ringbuffer.h" 31 32 /* Frequency for the sampling timer for events which need it. */ 33 #define FREQUENCY 200 34 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 35 36 #define ENGINE_SAMPLE_MASK \ 37 (BIT(I915_SAMPLE_BUSY) | \ 38 BIT(I915_SAMPLE_WAIT) | \ 39 BIT(I915_SAMPLE_SEMA)) 40 41 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) 42 43 static cpumask_t i915_pmu_cpumask; 44 45 static u8 engine_config_sample(u64 config) 46 { 47 return config & I915_PMU_SAMPLE_MASK; 48 } 49 50 static u8 engine_event_sample(struct perf_event *event) 51 { 52 return engine_config_sample(event->attr.config); 53 } 54 55 static u8 engine_event_class(struct perf_event *event) 56 { 57 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 58 } 59 60 static u8 engine_event_instance(struct perf_event *event) 61 { 62 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 63 } 64 65 static bool is_engine_config(u64 config) 66 { 67 return config < __I915_PMU_OTHER(0); 68 } 69 70 static unsigned int config_enabled_bit(u64 config) 71 { 72 if (is_engine_config(config)) 73 return engine_config_sample(config); 74 else 75 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); 76 } 77 78 static u64 config_enabled_mask(u64 config) 79 { 80 return BIT_ULL(config_enabled_bit(config)); 81 } 82 83 static bool is_engine_event(struct perf_event *event) 84 { 85 return is_engine_config(event->attr.config); 86 } 87 88 static unsigned int event_enabled_bit(struct perf_event *event) 89 { 90 return config_enabled_bit(event->attr.config); 91 } 92 93 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) 94 { 95 u64 enable; 96 97 /* 98 * Only some counters need the sampling timer. 99 * 100 * We start with a bitmask of all currently enabled events. 101 */ 102 enable = i915->pmu.enable; 103 104 /* 105 * Mask out all the ones which do not need the timer, or in 106 * other words keep all the ones that could need the timer. 107 */ 108 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | 109 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | 110 ENGINE_SAMPLE_MASK; 111 112 /* 113 * When the GPU is idle per-engine counters do not need to be 114 * running so clear those bits out. 115 */ 116 if (!gpu_active) 117 enable &= ~ENGINE_SAMPLE_MASK; 118 /* 119 * Also there is software busyness tracking available we do not 120 * need the timer for I915_SAMPLE_BUSY counter. 121 * 122 * Use RCS as proxy for all engines. 123 */ 124 else if (intel_engine_supports_stats(i915->engine[RCS])) 125 enable &= ~BIT(I915_SAMPLE_BUSY); 126 127 /* 128 * If some bits remain it means we need the sampling timer running. 129 */ 130 return enable; 131 } 132 133 void i915_pmu_gt_parked(struct drm_i915_private *i915) 134 { 135 if (!i915->pmu.base.event_init) 136 return; 137 138 spin_lock_irq(&i915->pmu.lock); 139 /* 140 * Signal sampling timer to stop if only engine events are enabled and 141 * GPU went idle. 142 */ 143 i915->pmu.timer_enabled = pmu_needs_timer(i915, false); 144 spin_unlock_irq(&i915->pmu.lock); 145 } 146 147 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) 148 { 149 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { 150 i915->pmu.timer_enabled = true; 151 hrtimer_start_range_ns(&i915->pmu.timer, 152 ns_to_ktime(PERIOD), 0, 153 HRTIMER_MODE_REL_PINNED); 154 } 155 } 156 157 void i915_pmu_gt_unparked(struct drm_i915_private *i915) 158 { 159 if (!i915->pmu.base.event_init) 160 return; 161 162 spin_lock_irq(&i915->pmu.lock); 163 /* 164 * Re-enable sampling timer when GPU goes active. 165 */ 166 __i915_pmu_maybe_start_timer(i915); 167 spin_unlock_irq(&i915->pmu.lock); 168 } 169 170 static bool grab_forcewake(struct drm_i915_private *i915, bool fw) 171 { 172 if (!fw) 173 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 174 175 return true; 176 } 177 178 static void 179 update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) 180 { 181 sample->cur += mul_u32_u32(val, unit); 182 } 183 184 static void engines_sample(struct drm_i915_private *dev_priv) 185 { 186 struct intel_engine_cs *engine; 187 enum intel_engine_id id; 188 bool fw = false; 189 190 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 191 return; 192 193 if (!dev_priv->gt.awake) 194 return; 195 196 if (!intel_runtime_pm_get_if_in_use(dev_priv)) 197 return; 198 199 for_each_engine(engine, dev_priv, id) { 200 u32 current_seqno = intel_engine_get_seqno(engine); 201 u32 last_seqno = intel_engine_last_submit(engine); 202 u32 val; 203 204 val = !i915_seqno_passed(current_seqno, last_seqno); 205 206 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], 207 PERIOD, val); 208 209 if (val && (engine->pmu.enable & 210 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { 211 fw = grab_forcewake(dev_priv, fw); 212 213 val = I915_READ_FW(RING_CTL(engine->mmio_base)); 214 } else { 215 val = 0; 216 } 217 218 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], 219 PERIOD, !!(val & RING_WAIT)); 220 221 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], 222 PERIOD, !!(val & RING_WAIT_SEMAPHORE)); 223 } 224 225 if (fw) 226 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 227 228 intel_runtime_pm_put(dev_priv); 229 } 230 231 static void frequency_sample(struct drm_i915_private *dev_priv) 232 { 233 if (dev_priv->pmu.enable & 234 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { 235 u32 val; 236 237 val = dev_priv->gt_pm.rps.cur_freq; 238 if (dev_priv->gt.awake && 239 intel_runtime_pm_get_if_in_use(dev_priv)) { 240 val = intel_get_cagf(dev_priv, 241 I915_READ_NOTRACE(GEN6_RPSTAT1)); 242 intel_runtime_pm_put(dev_priv); 243 } 244 245 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], 246 1, intel_gpu_freq(dev_priv, val)); 247 } 248 249 if (dev_priv->pmu.enable & 250 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { 251 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, 252 intel_gpu_freq(dev_priv, 253 dev_priv->gt_pm.rps.cur_freq)); 254 } 255 } 256 257 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 258 { 259 struct drm_i915_private *i915 = 260 container_of(hrtimer, struct drm_i915_private, pmu.timer); 261 262 if (!READ_ONCE(i915->pmu.timer_enabled)) 263 return HRTIMER_NORESTART; 264 265 engines_sample(i915); 266 frequency_sample(i915); 267 268 hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); 269 return HRTIMER_RESTART; 270 } 271 272 static u64 count_interrupts(struct drm_i915_private *i915) 273 { 274 /* open-coded kstat_irqs() */ 275 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 276 u64 sum = 0; 277 int cpu; 278 279 if (!desc || !desc->kstat_irqs) 280 return 0; 281 282 for_each_possible_cpu(cpu) 283 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 284 285 return sum; 286 } 287 288 static void i915_pmu_event_destroy(struct perf_event *event) 289 { 290 WARN_ON(event->parent); 291 } 292 293 static int engine_event_init(struct perf_event *event) 294 { 295 struct drm_i915_private *i915 = 296 container_of(event->pmu, typeof(*i915), pmu.base); 297 298 if (!intel_engine_lookup_user(i915, engine_event_class(event), 299 engine_event_instance(event))) 300 return -ENODEV; 301 302 switch (engine_event_sample(event)) { 303 case I915_SAMPLE_BUSY: 304 case I915_SAMPLE_WAIT: 305 break; 306 case I915_SAMPLE_SEMA: 307 if (INTEL_GEN(i915) < 6) 308 return -ENODEV; 309 break; 310 default: 311 return -ENOENT; 312 } 313 314 return 0; 315 } 316 317 static int i915_pmu_event_init(struct perf_event *event) 318 { 319 struct drm_i915_private *i915 = 320 container_of(event->pmu, typeof(*i915), pmu.base); 321 int ret; 322 323 if (event->attr.type != event->pmu->type) 324 return -ENOENT; 325 326 /* unsupported modes and filters */ 327 if (event->attr.sample_period) /* no sampling */ 328 return -EINVAL; 329 330 if (has_branch_stack(event)) 331 return -EOPNOTSUPP; 332 333 if (event->cpu < 0) 334 return -EINVAL; 335 336 /* only allow running on one cpu at a time */ 337 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) 338 return -EINVAL; 339 340 if (is_engine_event(event)) { 341 ret = engine_event_init(event); 342 } else { 343 ret = 0; 344 switch (event->attr.config) { 345 case I915_PMU_ACTUAL_FREQUENCY: 346 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 347 /* Requires a mutex for sampling! */ 348 ret = -ENODEV; 349 case I915_PMU_REQUESTED_FREQUENCY: 350 if (INTEL_GEN(i915) < 6) 351 ret = -ENODEV; 352 break; 353 case I915_PMU_INTERRUPTS: 354 break; 355 case I915_PMU_RC6_RESIDENCY: 356 if (!HAS_RC6(i915)) 357 ret = -ENODEV; 358 break; 359 default: 360 ret = -ENOENT; 361 break; 362 } 363 } 364 if (ret) 365 return ret; 366 367 if (!event->parent) 368 event->destroy = i915_pmu_event_destroy; 369 370 return 0; 371 } 372 373 static u64 __i915_pmu_event_read(struct perf_event *event) 374 { 375 struct drm_i915_private *i915 = 376 container_of(event->pmu, typeof(*i915), pmu.base); 377 u64 val = 0; 378 379 if (is_engine_event(event)) { 380 u8 sample = engine_event_sample(event); 381 struct intel_engine_cs *engine; 382 383 engine = intel_engine_lookup_user(i915, 384 engine_event_class(event), 385 engine_event_instance(event)); 386 387 if (WARN_ON_ONCE(!engine)) { 388 /* Do nothing */ 389 } else if (sample == I915_SAMPLE_BUSY && 390 engine->pmu.busy_stats) { 391 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 392 } else { 393 val = engine->pmu.sample[sample].cur; 394 } 395 } else { 396 switch (event->attr.config) { 397 case I915_PMU_ACTUAL_FREQUENCY: 398 val = 399 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, 400 FREQUENCY); 401 break; 402 case I915_PMU_REQUESTED_FREQUENCY: 403 val = 404 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, 405 FREQUENCY); 406 break; 407 case I915_PMU_INTERRUPTS: 408 val = count_interrupts(i915); 409 break; 410 case I915_PMU_RC6_RESIDENCY: 411 intel_runtime_pm_get(i915); 412 val = intel_rc6_residency_ns(i915, 413 IS_VALLEYVIEW(i915) ? 414 VLV_GT_RENDER_RC6 : 415 GEN6_GT_GFX_RC6); 416 if (HAS_RC6p(i915)) 417 val += intel_rc6_residency_ns(i915, 418 GEN6_GT_GFX_RC6p); 419 if (HAS_RC6pp(i915)) 420 val += intel_rc6_residency_ns(i915, 421 GEN6_GT_GFX_RC6pp); 422 intel_runtime_pm_put(i915); 423 break; 424 } 425 } 426 427 return val; 428 } 429 430 static void i915_pmu_event_read(struct perf_event *event) 431 { 432 struct hw_perf_event *hwc = &event->hw; 433 u64 prev, new; 434 435 again: 436 prev = local64_read(&hwc->prev_count); 437 new = __i915_pmu_event_read(event); 438 439 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 440 goto again; 441 442 local64_add(new - prev, &event->count); 443 } 444 445 static bool engine_needs_busy_stats(struct intel_engine_cs *engine) 446 { 447 return intel_engine_supports_stats(engine) && 448 (engine->pmu.enable & BIT(I915_SAMPLE_BUSY)); 449 } 450 451 static void i915_pmu_enable(struct perf_event *event) 452 { 453 struct drm_i915_private *i915 = 454 container_of(event->pmu, typeof(*i915), pmu.base); 455 unsigned int bit = event_enabled_bit(event); 456 unsigned long flags; 457 458 spin_lock_irqsave(&i915->pmu.lock, flags); 459 460 /* 461 * Update the bitmask of enabled events and increment 462 * the event reference counter. 463 */ 464 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 465 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); 466 i915->pmu.enable |= BIT_ULL(bit); 467 i915->pmu.enable_count[bit]++; 468 469 /* 470 * Start the sampling timer if needed and not already enabled. 471 */ 472 __i915_pmu_maybe_start_timer(i915); 473 474 /* 475 * For per-engine events the bitmask and reference counting 476 * is stored per engine. 477 */ 478 if (is_engine_event(event)) { 479 u8 sample = engine_event_sample(event); 480 struct intel_engine_cs *engine; 481 482 engine = intel_engine_lookup_user(i915, 483 engine_event_class(event), 484 engine_event_instance(event)); 485 GEM_BUG_ON(!engine); 486 engine->pmu.enable |= BIT(sample); 487 488 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 489 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 490 if (engine->pmu.enable_count[sample]++ == 0) { 491 /* 492 * Enable engine busy stats tracking if needed or 493 * alternatively cancel the scheduled disable. 494 * 495 * If the delayed disable was pending, cancel it and 496 * in this case do not enable since it already is. 497 */ 498 if (engine_needs_busy_stats(engine) && 499 !engine->pmu.busy_stats) { 500 engine->pmu.busy_stats = true; 501 if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) 502 intel_enable_engine_stats(engine); 503 } 504 } 505 } 506 507 /* 508 * Store the current counter value so we can report the correct delta 509 * for all listeners. Even when the event was already enabled and has 510 * an existing non-zero value. 511 */ 512 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 513 514 spin_unlock_irqrestore(&i915->pmu.lock, flags); 515 } 516 517 static void __disable_busy_stats(struct work_struct *work) 518 { 519 struct intel_engine_cs *engine = 520 container_of(work, typeof(*engine), pmu.disable_busy_stats.work); 521 522 intel_disable_engine_stats(engine); 523 } 524 525 static void i915_pmu_disable(struct perf_event *event) 526 { 527 struct drm_i915_private *i915 = 528 container_of(event->pmu, typeof(*i915), pmu.base); 529 unsigned int bit = event_enabled_bit(event); 530 unsigned long flags; 531 532 spin_lock_irqsave(&i915->pmu.lock, flags); 533 534 if (is_engine_event(event)) { 535 u8 sample = engine_event_sample(event); 536 struct intel_engine_cs *engine; 537 538 engine = intel_engine_lookup_user(i915, 539 engine_event_class(event), 540 engine_event_instance(event)); 541 GEM_BUG_ON(!engine); 542 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 543 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 544 /* 545 * Decrement the reference count and clear the enabled 546 * bitmask when the last listener on an event goes away. 547 */ 548 if (--engine->pmu.enable_count[sample] == 0) { 549 engine->pmu.enable &= ~BIT(sample); 550 if (!engine_needs_busy_stats(engine) && 551 engine->pmu.busy_stats) { 552 engine->pmu.busy_stats = false; 553 /* 554 * We request a delayed disable to handle the 555 * rapid on/off cycles on events, which can 556 * happen when tools like perf stat start, in a 557 * nicer way. 558 * 559 * In addition, this also helps with busy stats 560 * accuracy with background CPU offline/online 561 * migration events. 562 */ 563 queue_delayed_work(system_wq, 564 &engine->pmu.disable_busy_stats, 565 round_jiffies_up_relative(HZ)); 566 } 567 } 568 } 569 570 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 571 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); 572 /* 573 * Decrement the reference count and clear the enabled 574 * bitmask when the last listener on an event goes away. 575 */ 576 if (--i915->pmu.enable_count[bit] == 0) { 577 i915->pmu.enable &= ~BIT_ULL(bit); 578 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); 579 } 580 581 spin_unlock_irqrestore(&i915->pmu.lock, flags); 582 } 583 584 static void i915_pmu_event_start(struct perf_event *event, int flags) 585 { 586 i915_pmu_enable(event); 587 event->hw.state = 0; 588 } 589 590 static void i915_pmu_event_stop(struct perf_event *event, int flags) 591 { 592 if (flags & PERF_EF_UPDATE) 593 i915_pmu_event_read(event); 594 i915_pmu_disable(event); 595 event->hw.state = PERF_HES_STOPPED; 596 } 597 598 static int i915_pmu_event_add(struct perf_event *event, int flags) 599 { 600 if (flags & PERF_EF_START) 601 i915_pmu_event_start(event, flags); 602 603 return 0; 604 } 605 606 static void i915_pmu_event_del(struct perf_event *event, int flags) 607 { 608 i915_pmu_event_stop(event, PERF_EF_UPDATE); 609 } 610 611 static int i915_pmu_event_event_idx(struct perf_event *event) 612 { 613 return 0; 614 } 615 616 struct i915_str_attribute { 617 struct device_attribute attr; 618 const char *str; 619 }; 620 621 static ssize_t i915_pmu_format_show(struct device *dev, 622 struct device_attribute *attr, char *buf) 623 { 624 struct i915_str_attribute *eattr; 625 626 eattr = container_of(attr, struct i915_str_attribute, attr); 627 return sprintf(buf, "%s\n", eattr->str); 628 } 629 630 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 631 (&((struct i915_str_attribute[]) { \ 632 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 633 .str = _config, } \ 634 })[0].attr.attr) 635 636 static struct attribute *i915_pmu_format_attrs[] = { 637 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 638 NULL, 639 }; 640 641 static const struct attribute_group i915_pmu_format_attr_group = { 642 .name = "format", 643 .attrs = i915_pmu_format_attrs, 644 }; 645 646 struct i915_ext_attribute { 647 struct device_attribute attr; 648 unsigned long val; 649 }; 650 651 static ssize_t i915_pmu_event_show(struct device *dev, 652 struct device_attribute *attr, char *buf) 653 { 654 struct i915_ext_attribute *eattr; 655 656 eattr = container_of(attr, struct i915_ext_attribute, attr); 657 return sprintf(buf, "config=0x%lx\n", eattr->val); 658 } 659 660 #define I915_EVENT_ATTR(_name, _config) \ 661 (&((struct i915_ext_attribute[]) { \ 662 { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ 663 .val = _config, } \ 664 })[0].attr.attr) 665 666 #define I915_EVENT_STR(_name, _str) \ 667 (&((struct perf_pmu_events_attr[]) { \ 668 { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ 669 .id = 0, \ 670 .event_str = _str, } \ 671 })[0].attr.attr) 672 673 #define I915_EVENT(_name, _config, _unit) \ 674 I915_EVENT_ATTR(_name, _config), \ 675 I915_EVENT_STR(_name.unit, _unit) 676 677 #define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ 678 I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ 679 I915_EVENT_STR(_name.unit, "ns") 680 681 #define I915_ENGINE_EVENTS(_name, _class, _instance) \ 682 I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ 683 I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ 684 I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) 685 686 static struct attribute *i915_pmu_events_attrs[] = { 687 I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), 688 I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), 689 I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), 690 I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), 691 I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), 692 693 I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), 694 I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), 695 696 I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), 697 698 I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), 699 700 NULL, 701 }; 702 703 static const struct attribute_group i915_pmu_events_attr_group = { 704 .name = "events", 705 .attrs = i915_pmu_events_attrs, 706 }; 707 708 static ssize_t 709 i915_pmu_get_attr_cpumask(struct device *dev, 710 struct device_attribute *attr, 711 char *buf) 712 { 713 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); 714 } 715 716 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); 717 718 static struct attribute *i915_cpumask_attrs[] = { 719 &dev_attr_cpumask.attr, 720 NULL, 721 }; 722 723 static struct attribute_group i915_pmu_cpumask_attr_group = { 724 .attrs = i915_cpumask_attrs, 725 }; 726 727 static const struct attribute_group *i915_pmu_attr_groups[] = { 728 &i915_pmu_format_attr_group, 729 &i915_pmu_events_attr_group, 730 &i915_pmu_cpumask_attr_group, 731 NULL 732 }; 733 734 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 735 { 736 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 737 738 GEM_BUG_ON(!pmu->base.event_init); 739 740 /* Select the first online CPU as a designated reader. */ 741 if (!cpumask_weight(&i915_pmu_cpumask)) 742 cpumask_set_cpu(cpu, &i915_pmu_cpumask); 743 744 return 0; 745 } 746 747 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 748 { 749 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 750 unsigned int target; 751 752 GEM_BUG_ON(!pmu->base.event_init); 753 754 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { 755 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 756 /* Migrate events if there is a valid target */ 757 if (target < nr_cpu_ids) { 758 cpumask_set_cpu(target, &i915_pmu_cpumask); 759 perf_pmu_migrate_context(&pmu->base, cpu, target); 760 } 761 } 762 763 return 0; 764 } 765 766 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 767 768 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) 769 { 770 enum cpuhp_state slot; 771 int ret; 772 773 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 774 "perf/x86/intel/i915:online", 775 i915_pmu_cpu_online, 776 i915_pmu_cpu_offline); 777 if (ret < 0) 778 return ret; 779 780 slot = ret; 781 ret = cpuhp_state_add_instance(slot, &i915->pmu.node); 782 if (ret) { 783 cpuhp_remove_multi_state(slot); 784 return ret; 785 } 786 787 cpuhp_slot = slot; 788 return 0; 789 } 790 791 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) 792 { 793 WARN_ON(cpuhp_slot == CPUHP_INVALID); 794 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); 795 cpuhp_remove_multi_state(cpuhp_slot); 796 } 797 798 void i915_pmu_register(struct drm_i915_private *i915) 799 { 800 struct intel_engine_cs *engine; 801 enum intel_engine_id id; 802 int ret; 803 804 if (INTEL_GEN(i915) <= 2) { 805 DRM_INFO("PMU not supported for this GPU."); 806 return; 807 } 808 809 i915->pmu.base.attr_groups = i915_pmu_attr_groups; 810 i915->pmu.base.task_ctx_nr = perf_invalid_context; 811 i915->pmu.base.event_init = i915_pmu_event_init; 812 i915->pmu.base.add = i915_pmu_event_add; 813 i915->pmu.base.del = i915_pmu_event_del; 814 i915->pmu.base.start = i915_pmu_event_start; 815 i915->pmu.base.stop = i915_pmu_event_stop; 816 i915->pmu.base.read = i915_pmu_event_read; 817 i915->pmu.base.event_idx = i915_pmu_event_event_idx; 818 819 spin_lock_init(&i915->pmu.lock); 820 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 821 i915->pmu.timer.function = i915_sample; 822 823 for_each_engine(engine, i915, id) 824 INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats, 825 __disable_busy_stats); 826 827 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 828 if (ret) 829 goto err; 830 831 ret = i915_pmu_register_cpuhp_state(i915); 832 if (ret) 833 goto err_unreg; 834 835 return; 836 837 err_unreg: 838 perf_pmu_unregister(&i915->pmu.base); 839 err: 840 i915->pmu.base.event_init = NULL; 841 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); 842 } 843 844 void i915_pmu_unregister(struct drm_i915_private *i915) 845 { 846 struct intel_engine_cs *engine; 847 enum intel_engine_id id; 848 849 if (!i915->pmu.base.event_init) 850 return; 851 852 WARN_ON(i915->pmu.enable); 853 854 hrtimer_cancel(&i915->pmu.timer); 855 856 for_each_engine(engine, i915, id) { 857 GEM_BUG_ON(engine->pmu.busy_stats); 858 flush_delayed_work(&engine->pmu.disable_busy_stats); 859 } 860 861 i915_pmu_unregister_cpuhp_state(i915); 862 863 perf_pmu_unregister(&i915->pmu.base); 864 i915->pmu.base.event_init = NULL; 865 } 866