xref: /openbmc/linux/drivers/gpu/drm/i915/i915_pmu.c (revision 25ebbc57ca56df3cf9149e9da6b1d3169c8487db)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/pm_runtime.h>
8 
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_regs.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_gt_regs.h"
15 #include "gt/intel_rc6.h"
16 #include "gt/intel_rps.h"
17 
18 #include "i915_drv.h"
19 #include "i915_pmu.h"
20 
21 /* Frequency for the sampling timer for events which need it. */
22 #define FREQUENCY 200
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24 
25 #define ENGINE_SAMPLE_MASK \
26 	(BIT(I915_SAMPLE_BUSY) | \
27 	 BIT(I915_SAMPLE_WAIT) | \
28 	 BIT(I915_SAMPLE_SEMA))
29 
30 static cpumask_t i915_pmu_cpumask;
31 static unsigned int i915_pmu_target_cpu = -1;
32 
33 static u8 engine_config_sample(u64 config)
34 {
35 	return config & I915_PMU_SAMPLE_MASK;
36 }
37 
38 static u8 engine_event_sample(struct perf_event *event)
39 {
40 	return engine_config_sample(event->attr.config);
41 }
42 
43 static u8 engine_event_class(struct perf_event *event)
44 {
45 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
46 }
47 
48 static u8 engine_event_instance(struct perf_event *event)
49 {
50 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
51 }
52 
53 static bool is_engine_config(u64 config)
54 {
55 	return config < __I915_PMU_OTHER(0);
56 }
57 
58 static unsigned int other_bit(const u64 config)
59 {
60 	unsigned int val;
61 
62 	switch (config) {
63 	case I915_PMU_ACTUAL_FREQUENCY:
64 		val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
65 		break;
66 	case I915_PMU_REQUESTED_FREQUENCY:
67 		val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
68 		break;
69 	case I915_PMU_RC6_RESIDENCY:
70 		val = __I915_PMU_RC6_RESIDENCY_ENABLED;
71 		break;
72 	default:
73 		/*
74 		 * Events that do not require sampling, or tracking state
75 		 * transitions between enabled and disabled can be ignored.
76 		 */
77 		return -1;
78 	}
79 
80 	return I915_ENGINE_SAMPLE_COUNT + val;
81 }
82 
83 static unsigned int config_bit(const u64 config)
84 {
85 	if (is_engine_config(config))
86 		return engine_config_sample(config);
87 	else
88 		return other_bit(config);
89 }
90 
91 static u64 config_mask(u64 config)
92 {
93 	return BIT_ULL(config_bit(config));
94 }
95 
96 static bool is_engine_event(struct perf_event *event)
97 {
98 	return is_engine_config(event->attr.config);
99 }
100 
101 static unsigned int event_bit(struct perf_event *event)
102 {
103 	return config_bit(event->attr.config);
104 }
105 
106 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
107 {
108 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
109 	u32 enable;
110 
111 	/*
112 	 * Only some counters need the sampling timer.
113 	 *
114 	 * We start with a bitmask of all currently enabled events.
115 	 */
116 	enable = pmu->enable;
117 
118 	/*
119 	 * Mask out all the ones which do not need the timer, or in
120 	 * other words keep all the ones that could need the timer.
121 	 */
122 	enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
123 		  config_mask(I915_PMU_REQUESTED_FREQUENCY) |
124 		  ENGINE_SAMPLE_MASK;
125 
126 	/*
127 	 * When the GPU is idle per-engine counters do not need to be
128 	 * running so clear those bits out.
129 	 */
130 	if (!gpu_active)
131 		enable &= ~ENGINE_SAMPLE_MASK;
132 	/*
133 	 * Also there is software busyness tracking available we do not
134 	 * need the timer for I915_SAMPLE_BUSY counter.
135 	 */
136 	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
137 		enable &= ~BIT(I915_SAMPLE_BUSY);
138 
139 	/*
140 	 * If some bits remain it means we need the sampling timer running.
141 	 */
142 	return enable;
143 }
144 
145 static u64 __get_rc6(struct intel_gt *gt)
146 {
147 	struct drm_i915_private *i915 = gt->i915;
148 	u64 val;
149 
150 	val = intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6);
151 
152 	if (HAS_RC6p(i915))
153 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6p);
154 
155 	if (HAS_RC6pp(i915))
156 		val += intel_rc6_residency_ns(&gt->rc6, INTEL_RC6_RES_RC6pp);
157 
158 	return val;
159 }
160 
161 static inline s64 ktime_since_raw(const ktime_t kt)
162 {
163 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
164 }
165 
166 static u64 get_rc6(struct intel_gt *gt)
167 {
168 	struct drm_i915_private *i915 = gt->i915;
169 	struct i915_pmu *pmu = &i915->pmu;
170 	unsigned long flags;
171 	bool awake = false;
172 	u64 val;
173 
174 	if (intel_gt_pm_get_if_awake(gt)) {
175 		val = __get_rc6(gt);
176 		intel_gt_pm_put_async(gt);
177 		awake = true;
178 	}
179 
180 	spin_lock_irqsave(&pmu->lock, flags);
181 
182 	if (awake) {
183 		pmu->sample[__I915_SAMPLE_RC6].cur = val;
184 	} else {
185 		/*
186 		 * We think we are runtime suspended.
187 		 *
188 		 * Report the delta from when the device was suspended to now,
189 		 * on top of the last known real value, as the approximated RC6
190 		 * counter value.
191 		 */
192 		val = ktime_since_raw(pmu->sleep_last);
193 		val += pmu->sample[__I915_SAMPLE_RC6].cur;
194 	}
195 
196 	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
197 		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
198 	else
199 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
200 
201 	spin_unlock_irqrestore(&pmu->lock, flags);
202 
203 	return val;
204 }
205 
206 static void init_rc6(struct i915_pmu *pmu)
207 {
208 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
209 	intel_wakeref_t wakeref;
210 
211 	with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
212 		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
213 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
214 					pmu->sample[__I915_SAMPLE_RC6].cur;
215 		pmu->sleep_last = ktime_get_raw();
216 	}
217 }
218 
219 static void park_rc6(struct drm_i915_private *i915)
220 {
221 	struct i915_pmu *pmu = &i915->pmu;
222 
223 	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
224 	pmu->sleep_last = ktime_get_raw();
225 }
226 
227 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
228 {
229 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
230 		pmu->timer_enabled = true;
231 		pmu->timer_last = ktime_get();
232 		hrtimer_start_range_ns(&pmu->timer,
233 				       ns_to_ktime(PERIOD), 0,
234 				       HRTIMER_MODE_REL_PINNED);
235 	}
236 }
237 
238 void i915_pmu_gt_parked(struct drm_i915_private *i915)
239 {
240 	struct i915_pmu *pmu = &i915->pmu;
241 
242 	if (!pmu->base.event_init)
243 		return;
244 
245 	spin_lock_irq(&pmu->lock);
246 
247 	park_rc6(i915);
248 
249 	/*
250 	 * Signal sampling timer to stop if only engine events are enabled and
251 	 * GPU went idle.
252 	 */
253 	pmu->timer_enabled = pmu_needs_timer(pmu, false);
254 
255 	spin_unlock_irq(&pmu->lock);
256 }
257 
258 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
259 {
260 	struct i915_pmu *pmu = &i915->pmu;
261 
262 	if (!pmu->base.event_init)
263 		return;
264 
265 	spin_lock_irq(&pmu->lock);
266 
267 	/*
268 	 * Re-enable sampling timer when GPU goes active.
269 	 */
270 	__i915_pmu_maybe_start_timer(pmu);
271 
272 	spin_unlock_irq(&pmu->lock);
273 }
274 
275 static void
276 add_sample(struct i915_pmu_sample *sample, u32 val)
277 {
278 	sample->cur += val;
279 }
280 
281 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
282 {
283 	/*
284 	 * We have to avoid concurrent mmio cache line access on gen7 or
285 	 * risk a machine hang. For a fun history lesson dig out the old
286 	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
287 	 */
288 	return GRAPHICS_VER(i915) == 7;
289 }
290 
291 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
292 {
293 	struct intel_engine_pmu *pmu = &engine->pmu;
294 	bool busy;
295 	u32 val;
296 
297 	val = ENGINE_READ_FW(engine, RING_CTL);
298 	if (val == 0) /* powerwell off => engine idle */
299 		return;
300 
301 	if (val & RING_WAIT)
302 		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
303 	if (val & RING_WAIT_SEMAPHORE)
304 		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
305 
306 	/* No need to sample when busy stats are supported. */
307 	if (intel_engine_supports_stats(engine))
308 		return;
309 
310 	/*
311 	 * While waiting on a semaphore or event, MI_MODE reports the
312 	 * ring as idle. However, previously using the seqno, and with
313 	 * execlists sampling, we account for the ring waiting as the
314 	 * engine being busy. Therefore, we record the sample as being
315 	 * busy if either waiting or !idle.
316 	 */
317 	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
318 	if (!busy) {
319 		val = ENGINE_READ_FW(engine, RING_MI_MODE);
320 		busy = !(val & MODE_IDLE);
321 	}
322 	if (busy)
323 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
324 }
325 
326 static void
327 engines_sample(struct intel_gt *gt, unsigned int period_ns)
328 {
329 	struct drm_i915_private *i915 = gt->i915;
330 	struct intel_engine_cs *engine;
331 	enum intel_engine_id id;
332 	unsigned long flags;
333 
334 	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
335 		return;
336 
337 	if (!intel_gt_pm_is_awake(gt))
338 		return;
339 
340 	for_each_engine(engine, gt, id) {
341 		if (!intel_engine_pm_get_if_awake(engine))
342 			continue;
343 
344 		if (exclusive_mmio_access(i915)) {
345 			spin_lock_irqsave(&engine->uncore->lock, flags);
346 			engine_sample(engine, period_ns);
347 			spin_unlock_irqrestore(&engine->uncore->lock, flags);
348 		} else {
349 			engine_sample(engine, period_ns);
350 		}
351 
352 		intel_engine_pm_put_async(engine);
353 	}
354 }
355 
356 static void
357 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
358 {
359 	sample->cur += mul_u32_u32(val, mul);
360 }
361 
362 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
363 {
364 	return pmu->enable &
365 	       (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
366 		config_mask(I915_PMU_REQUESTED_FREQUENCY));
367 }
368 
369 static void
370 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
371 {
372 	struct drm_i915_private *i915 = gt->i915;
373 	struct i915_pmu *pmu = &i915->pmu;
374 	struct intel_rps *rps = &gt->rps;
375 
376 	if (!frequency_sampling_enabled(pmu))
377 		return;
378 
379 	/* Report 0/0 (actual/requested) frequency while parked. */
380 	if (!intel_gt_pm_get_if_awake(gt))
381 		return;
382 
383 	if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
384 		u32 val;
385 
386 		/*
387 		 * We take a quick peek here without using forcewake
388 		 * so that we don't perturb the system under observation
389 		 * (forcewake => !rc6 => increased power use). We expect
390 		 * that if the read fails because it is outside of the
391 		 * mmio power well, then it will return 0 -- in which
392 		 * case we assume the system is running at the intended
393 		 * frequency. Fortunately, the read should rarely fail!
394 		 */
395 		val = intel_rps_read_rpstat_fw(rps);
396 		if (val)
397 			val = intel_rps_get_cagf(rps, val);
398 		else
399 			val = rps->cur_freq;
400 
401 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
402 				intel_gpu_freq(rps, val), period_ns / 1000);
403 	}
404 
405 	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
406 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
407 				intel_rps_get_requested_frequency(rps),
408 				period_ns / 1000);
409 	}
410 
411 	intel_gt_pm_put_async(gt);
412 }
413 
414 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
415 {
416 	struct drm_i915_private *i915 =
417 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
418 	struct i915_pmu *pmu = &i915->pmu;
419 	struct intel_gt *gt = to_gt(i915);
420 	unsigned int period_ns;
421 	ktime_t now;
422 
423 	if (!READ_ONCE(pmu->timer_enabled))
424 		return HRTIMER_NORESTART;
425 
426 	now = ktime_get();
427 	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
428 	pmu->timer_last = now;
429 
430 	/*
431 	 * Strictly speaking the passed in period may not be 100% accurate for
432 	 * all internal calculation, since some amount of time can be spent on
433 	 * grabbing the forcewake. However the potential error from timer call-
434 	 * back delay greatly dominates this so we keep it simple.
435 	 */
436 	engines_sample(gt, period_ns);
437 	frequency_sample(gt, period_ns);
438 
439 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
440 
441 	return HRTIMER_RESTART;
442 }
443 
444 static void i915_pmu_event_destroy(struct perf_event *event)
445 {
446 	struct drm_i915_private *i915 =
447 		container_of(event->pmu, typeof(*i915), pmu.base);
448 
449 	drm_WARN_ON(&i915->drm, event->parent);
450 
451 	drm_dev_put(&i915->drm);
452 }
453 
454 static int
455 engine_event_status(struct intel_engine_cs *engine,
456 		    enum drm_i915_pmu_engine_sample sample)
457 {
458 	switch (sample) {
459 	case I915_SAMPLE_BUSY:
460 	case I915_SAMPLE_WAIT:
461 		break;
462 	case I915_SAMPLE_SEMA:
463 		if (GRAPHICS_VER(engine->i915) < 6)
464 			return -ENODEV;
465 		break;
466 	default:
467 		return -ENOENT;
468 	}
469 
470 	return 0;
471 }
472 
473 static int
474 config_status(struct drm_i915_private *i915, u64 config)
475 {
476 	struct intel_gt *gt = to_gt(i915);
477 
478 	switch (config) {
479 	case I915_PMU_ACTUAL_FREQUENCY:
480 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
481 			/* Requires a mutex for sampling! */
482 			return -ENODEV;
483 		fallthrough;
484 	case I915_PMU_REQUESTED_FREQUENCY:
485 		if (GRAPHICS_VER(i915) < 6)
486 			return -ENODEV;
487 		break;
488 	case I915_PMU_INTERRUPTS:
489 		break;
490 	case I915_PMU_RC6_RESIDENCY:
491 		if (!gt->rc6.supported)
492 			return -ENODEV;
493 		break;
494 	case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
495 		break;
496 	default:
497 		return -ENOENT;
498 	}
499 
500 	return 0;
501 }
502 
503 static int engine_event_init(struct perf_event *event)
504 {
505 	struct drm_i915_private *i915 =
506 		container_of(event->pmu, typeof(*i915), pmu.base);
507 	struct intel_engine_cs *engine;
508 
509 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
510 					  engine_event_instance(event));
511 	if (!engine)
512 		return -ENODEV;
513 
514 	return engine_event_status(engine, engine_event_sample(event));
515 }
516 
517 static int i915_pmu_event_init(struct perf_event *event)
518 {
519 	struct drm_i915_private *i915 =
520 		container_of(event->pmu, typeof(*i915), pmu.base);
521 	struct i915_pmu *pmu = &i915->pmu;
522 	int ret;
523 
524 	if (pmu->closed)
525 		return -ENODEV;
526 
527 	if (event->attr.type != event->pmu->type)
528 		return -ENOENT;
529 
530 	/* unsupported modes and filters */
531 	if (event->attr.sample_period) /* no sampling */
532 		return -EINVAL;
533 
534 	if (has_branch_stack(event))
535 		return -EOPNOTSUPP;
536 
537 	if (event->cpu < 0)
538 		return -EINVAL;
539 
540 	/* only allow running on one cpu at a time */
541 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
542 		return -EINVAL;
543 
544 	if (is_engine_event(event))
545 		ret = engine_event_init(event);
546 	else
547 		ret = config_status(i915, event->attr.config);
548 	if (ret)
549 		return ret;
550 
551 	if (!event->parent) {
552 		drm_dev_get(&i915->drm);
553 		event->destroy = i915_pmu_event_destroy;
554 	}
555 
556 	return 0;
557 }
558 
559 static u64 __i915_pmu_event_read(struct perf_event *event)
560 {
561 	struct drm_i915_private *i915 =
562 		container_of(event->pmu, typeof(*i915), pmu.base);
563 	struct i915_pmu *pmu = &i915->pmu;
564 	u64 val = 0;
565 
566 	if (is_engine_event(event)) {
567 		u8 sample = engine_event_sample(event);
568 		struct intel_engine_cs *engine;
569 
570 		engine = intel_engine_lookup_user(i915,
571 						  engine_event_class(event),
572 						  engine_event_instance(event));
573 
574 		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
575 			/* Do nothing */
576 		} else if (sample == I915_SAMPLE_BUSY &&
577 			   intel_engine_supports_stats(engine)) {
578 			ktime_t unused;
579 
580 			val = ktime_to_ns(intel_engine_get_busy_time(engine,
581 								     &unused));
582 		} else {
583 			val = engine->pmu.sample[sample].cur;
584 		}
585 	} else {
586 		switch (event->attr.config) {
587 		case I915_PMU_ACTUAL_FREQUENCY:
588 			val =
589 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
590 				   USEC_PER_SEC /* to MHz */);
591 			break;
592 		case I915_PMU_REQUESTED_FREQUENCY:
593 			val =
594 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
595 				   USEC_PER_SEC /* to MHz */);
596 			break;
597 		case I915_PMU_INTERRUPTS:
598 			val = READ_ONCE(pmu->irq_count);
599 			break;
600 		case I915_PMU_RC6_RESIDENCY:
601 			val = get_rc6(to_gt(i915));
602 			break;
603 		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
604 			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
605 			break;
606 		}
607 	}
608 
609 	return val;
610 }
611 
612 static void i915_pmu_event_read(struct perf_event *event)
613 {
614 	struct drm_i915_private *i915 =
615 		container_of(event->pmu, typeof(*i915), pmu.base);
616 	struct hw_perf_event *hwc = &event->hw;
617 	struct i915_pmu *pmu = &i915->pmu;
618 	u64 prev, new;
619 
620 	if (pmu->closed) {
621 		event->hw.state = PERF_HES_STOPPED;
622 		return;
623 	}
624 again:
625 	prev = local64_read(&hwc->prev_count);
626 	new = __i915_pmu_event_read(event);
627 
628 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
629 		goto again;
630 
631 	local64_add(new - prev, &event->count);
632 }
633 
634 static void i915_pmu_enable(struct perf_event *event)
635 {
636 	struct drm_i915_private *i915 =
637 		container_of(event->pmu, typeof(*i915), pmu.base);
638 	struct i915_pmu *pmu = &i915->pmu;
639 	unsigned long flags;
640 	unsigned int bit;
641 
642 	bit = event_bit(event);
643 	if (bit == -1)
644 		goto update;
645 
646 	spin_lock_irqsave(&pmu->lock, flags);
647 
648 	/*
649 	 * Update the bitmask of enabled events and increment
650 	 * the event reference counter.
651 	 */
652 	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
653 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
654 	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
655 
656 	pmu->enable |= BIT_ULL(bit);
657 	pmu->enable_count[bit]++;
658 
659 	/*
660 	 * Start the sampling timer if needed and not already enabled.
661 	 */
662 	__i915_pmu_maybe_start_timer(pmu);
663 
664 	/*
665 	 * For per-engine events the bitmask and reference counting
666 	 * is stored per engine.
667 	 */
668 	if (is_engine_event(event)) {
669 		u8 sample = engine_event_sample(event);
670 		struct intel_engine_cs *engine;
671 
672 		engine = intel_engine_lookup_user(i915,
673 						  engine_event_class(event),
674 						  engine_event_instance(event));
675 
676 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
677 			     I915_ENGINE_SAMPLE_COUNT);
678 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
679 			     I915_ENGINE_SAMPLE_COUNT);
680 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
681 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
682 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
683 
684 		engine->pmu.enable |= BIT(sample);
685 		engine->pmu.enable_count[sample]++;
686 	}
687 
688 	spin_unlock_irqrestore(&pmu->lock, flags);
689 
690 update:
691 	/*
692 	 * Store the current counter value so we can report the correct delta
693 	 * for all listeners. Even when the event was already enabled and has
694 	 * an existing non-zero value.
695 	 */
696 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
697 }
698 
699 static void i915_pmu_disable(struct perf_event *event)
700 {
701 	struct drm_i915_private *i915 =
702 		container_of(event->pmu, typeof(*i915), pmu.base);
703 	unsigned int bit = event_bit(event);
704 	struct i915_pmu *pmu = &i915->pmu;
705 	unsigned long flags;
706 
707 	if (bit == -1)
708 		return;
709 
710 	spin_lock_irqsave(&pmu->lock, flags);
711 
712 	if (is_engine_event(event)) {
713 		u8 sample = engine_event_sample(event);
714 		struct intel_engine_cs *engine;
715 
716 		engine = intel_engine_lookup_user(i915,
717 						  engine_event_class(event),
718 						  engine_event_instance(event));
719 
720 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
721 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
722 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
723 
724 		/*
725 		 * Decrement the reference count and clear the enabled
726 		 * bitmask when the last listener on an event goes away.
727 		 */
728 		if (--engine->pmu.enable_count[sample] == 0)
729 			engine->pmu.enable &= ~BIT(sample);
730 	}
731 
732 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
733 	GEM_BUG_ON(pmu->enable_count[bit] == 0);
734 	/*
735 	 * Decrement the reference count and clear the enabled
736 	 * bitmask when the last listener on an event goes away.
737 	 */
738 	if (--pmu->enable_count[bit] == 0) {
739 		pmu->enable &= ~BIT_ULL(bit);
740 		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
741 	}
742 
743 	spin_unlock_irqrestore(&pmu->lock, flags);
744 }
745 
746 static void i915_pmu_event_start(struct perf_event *event, int flags)
747 {
748 	struct drm_i915_private *i915 =
749 		container_of(event->pmu, typeof(*i915), pmu.base);
750 	struct i915_pmu *pmu = &i915->pmu;
751 
752 	if (pmu->closed)
753 		return;
754 
755 	i915_pmu_enable(event);
756 	event->hw.state = 0;
757 }
758 
759 static void i915_pmu_event_stop(struct perf_event *event, int flags)
760 {
761 	if (flags & PERF_EF_UPDATE)
762 		i915_pmu_event_read(event);
763 	i915_pmu_disable(event);
764 	event->hw.state = PERF_HES_STOPPED;
765 }
766 
767 static int i915_pmu_event_add(struct perf_event *event, int flags)
768 {
769 	struct drm_i915_private *i915 =
770 		container_of(event->pmu, typeof(*i915), pmu.base);
771 	struct i915_pmu *pmu = &i915->pmu;
772 
773 	if (pmu->closed)
774 		return -ENODEV;
775 
776 	if (flags & PERF_EF_START)
777 		i915_pmu_event_start(event, flags);
778 
779 	return 0;
780 }
781 
782 static void i915_pmu_event_del(struct perf_event *event, int flags)
783 {
784 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
785 }
786 
787 static int i915_pmu_event_event_idx(struct perf_event *event)
788 {
789 	return 0;
790 }
791 
792 struct i915_str_attribute {
793 	struct device_attribute attr;
794 	const char *str;
795 };
796 
797 static ssize_t i915_pmu_format_show(struct device *dev,
798 				    struct device_attribute *attr, char *buf)
799 {
800 	struct i915_str_attribute *eattr;
801 
802 	eattr = container_of(attr, struct i915_str_attribute, attr);
803 	return sprintf(buf, "%s\n", eattr->str);
804 }
805 
806 #define I915_PMU_FORMAT_ATTR(_name, _config) \
807 	(&((struct i915_str_attribute[]) { \
808 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
809 		  .str = _config, } \
810 	})[0].attr.attr)
811 
812 static struct attribute *i915_pmu_format_attrs[] = {
813 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
814 	NULL,
815 };
816 
817 static const struct attribute_group i915_pmu_format_attr_group = {
818 	.name = "format",
819 	.attrs = i915_pmu_format_attrs,
820 };
821 
822 struct i915_ext_attribute {
823 	struct device_attribute attr;
824 	unsigned long val;
825 };
826 
827 static ssize_t i915_pmu_event_show(struct device *dev,
828 				   struct device_attribute *attr, char *buf)
829 {
830 	struct i915_ext_attribute *eattr;
831 
832 	eattr = container_of(attr, struct i915_ext_attribute, attr);
833 	return sprintf(buf, "config=0x%lx\n", eattr->val);
834 }
835 
836 static ssize_t cpumask_show(struct device *dev,
837 			    struct device_attribute *attr, char *buf)
838 {
839 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
840 }
841 
842 static DEVICE_ATTR_RO(cpumask);
843 
844 static struct attribute *i915_cpumask_attrs[] = {
845 	&dev_attr_cpumask.attr,
846 	NULL,
847 };
848 
849 static const struct attribute_group i915_pmu_cpumask_attr_group = {
850 	.attrs = i915_cpumask_attrs,
851 };
852 
853 #define __event(__config, __name, __unit) \
854 { \
855 	.config = (__config), \
856 	.name = (__name), \
857 	.unit = (__unit), \
858 }
859 
860 #define __engine_event(__sample, __name) \
861 { \
862 	.sample = (__sample), \
863 	.name = (__name), \
864 }
865 
866 static struct i915_ext_attribute *
867 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
868 {
869 	sysfs_attr_init(&attr->attr.attr);
870 	attr->attr.attr.name = name;
871 	attr->attr.attr.mode = 0444;
872 	attr->attr.show = i915_pmu_event_show;
873 	attr->val = config;
874 
875 	return ++attr;
876 }
877 
878 static struct perf_pmu_events_attr *
879 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
880 	     const char *str)
881 {
882 	sysfs_attr_init(&attr->attr.attr);
883 	attr->attr.attr.name = name;
884 	attr->attr.attr.mode = 0444;
885 	attr->attr.show = perf_event_sysfs_show;
886 	attr->event_str = str;
887 
888 	return ++attr;
889 }
890 
891 static struct attribute **
892 create_event_attributes(struct i915_pmu *pmu)
893 {
894 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
895 	static const struct {
896 		u64 config;
897 		const char *name;
898 		const char *unit;
899 	} events[] = {
900 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
901 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
902 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
903 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
904 		__event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
905 	};
906 	static const struct {
907 		enum drm_i915_pmu_engine_sample sample;
908 		char *name;
909 	} engine_events[] = {
910 		__engine_event(I915_SAMPLE_BUSY, "busy"),
911 		__engine_event(I915_SAMPLE_SEMA, "sema"),
912 		__engine_event(I915_SAMPLE_WAIT, "wait"),
913 	};
914 	unsigned int count = 0;
915 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
916 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
917 	struct attribute **attr = NULL, **attr_iter;
918 	struct intel_engine_cs *engine;
919 	unsigned int i;
920 
921 	/* Count how many counters we will be exposing. */
922 	for (i = 0; i < ARRAY_SIZE(events); i++) {
923 		if (!config_status(i915, events[i].config))
924 			count++;
925 	}
926 
927 	for_each_uabi_engine(engine, i915) {
928 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
929 			if (!engine_event_status(engine,
930 						 engine_events[i].sample))
931 				count++;
932 		}
933 	}
934 
935 	/* Allocate attribute objects and table. */
936 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
937 	if (!i915_attr)
938 		goto err_alloc;
939 
940 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
941 	if (!pmu_attr)
942 		goto err_alloc;
943 
944 	/* Max one pointer of each attribute type plus a termination entry. */
945 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
946 	if (!attr)
947 		goto err_alloc;
948 
949 	i915_iter = i915_attr;
950 	pmu_iter = pmu_attr;
951 	attr_iter = attr;
952 
953 	/* Initialize supported non-engine counters. */
954 	for (i = 0; i < ARRAY_SIZE(events); i++) {
955 		char *str;
956 
957 		if (config_status(i915, events[i].config))
958 			continue;
959 
960 		str = kstrdup(events[i].name, GFP_KERNEL);
961 		if (!str)
962 			goto err;
963 
964 		*attr_iter++ = &i915_iter->attr.attr;
965 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
966 
967 		if (events[i].unit) {
968 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
969 			if (!str)
970 				goto err;
971 
972 			*attr_iter++ = &pmu_iter->attr.attr;
973 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
974 		}
975 	}
976 
977 	/* Initialize supported engine counters. */
978 	for_each_uabi_engine(engine, i915) {
979 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
980 			char *str;
981 
982 			if (engine_event_status(engine,
983 						engine_events[i].sample))
984 				continue;
985 
986 			str = kasprintf(GFP_KERNEL, "%s-%s",
987 					engine->name, engine_events[i].name);
988 			if (!str)
989 				goto err;
990 
991 			*attr_iter++ = &i915_iter->attr.attr;
992 			i915_iter =
993 				add_i915_attr(i915_iter, str,
994 					      __I915_PMU_ENGINE(engine->uabi_class,
995 								engine->uabi_instance,
996 								engine_events[i].sample));
997 
998 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
999 					engine->name, engine_events[i].name);
1000 			if (!str)
1001 				goto err;
1002 
1003 			*attr_iter++ = &pmu_iter->attr.attr;
1004 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1005 		}
1006 	}
1007 
1008 	pmu->i915_attr = i915_attr;
1009 	pmu->pmu_attr = pmu_attr;
1010 
1011 	return attr;
1012 
1013 err:;
1014 	for (attr_iter = attr; *attr_iter; attr_iter++)
1015 		kfree((*attr_iter)->name);
1016 
1017 err_alloc:
1018 	kfree(attr);
1019 	kfree(i915_attr);
1020 	kfree(pmu_attr);
1021 
1022 	return NULL;
1023 }
1024 
1025 static void free_event_attributes(struct i915_pmu *pmu)
1026 {
1027 	struct attribute **attr_iter = pmu->events_attr_group.attrs;
1028 
1029 	for (; *attr_iter; attr_iter++)
1030 		kfree((*attr_iter)->name);
1031 
1032 	kfree(pmu->events_attr_group.attrs);
1033 	kfree(pmu->i915_attr);
1034 	kfree(pmu->pmu_attr);
1035 
1036 	pmu->events_attr_group.attrs = NULL;
1037 	pmu->i915_attr = NULL;
1038 	pmu->pmu_attr = NULL;
1039 }
1040 
1041 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1042 {
1043 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1044 
1045 	GEM_BUG_ON(!pmu->base.event_init);
1046 
1047 	/* Select the first online CPU as a designated reader. */
1048 	if (cpumask_empty(&i915_pmu_cpumask))
1049 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1050 
1051 	return 0;
1052 }
1053 
1054 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1055 {
1056 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1057 	unsigned int target = i915_pmu_target_cpu;
1058 
1059 	GEM_BUG_ON(!pmu->base.event_init);
1060 
1061 	/*
1062 	 * Unregistering an instance generates a CPU offline event which we must
1063 	 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1064 	 */
1065 	if (pmu->closed)
1066 		return 0;
1067 
1068 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1069 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1070 
1071 		/* Migrate events if there is a valid target */
1072 		if (target < nr_cpu_ids) {
1073 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1074 			i915_pmu_target_cpu = target;
1075 		}
1076 	}
1077 
1078 	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1079 		perf_pmu_migrate_context(&pmu->base, cpu, target);
1080 		pmu->cpuhp.cpu = target;
1081 	}
1082 
1083 	return 0;
1084 }
1085 
1086 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1087 
1088 int i915_pmu_init(void)
1089 {
1090 	int ret;
1091 
1092 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1093 				      "perf/x86/intel/i915:online",
1094 				      i915_pmu_cpu_online,
1095 				      i915_pmu_cpu_offline);
1096 	if (ret < 0)
1097 		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1098 			  ret);
1099 	else
1100 		cpuhp_slot = ret;
1101 
1102 	return 0;
1103 }
1104 
1105 void i915_pmu_exit(void)
1106 {
1107 	if (cpuhp_slot != CPUHP_INVALID)
1108 		cpuhp_remove_multi_state(cpuhp_slot);
1109 }
1110 
1111 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1112 {
1113 	if (cpuhp_slot == CPUHP_INVALID)
1114 		return -EINVAL;
1115 
1116 	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1117 }
1118 
1119 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1120 {
1121 	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1122 }
1123 
1124 static bool is_igp(struct drm_i915_private *i915)
1125 {
1126 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1127 
1128 	/* IGP is 0000:00:02.0 */
1129 	return pci_domain_nr(pdev->bus) == 0 &&
1130 	       pdev->bus->number == 0 &&
1131 	       PCI_SLOT(pdev->devfn) == 2 &&
1132 	       PCI_FUNC(pdev->devfn) == 0;
1133 }
1134 
1135 void i915_pmu_register(struct drm_i915_private *i915)
1136 {
1137 	struct i915_pmu *pmu = &i915->pmu;
1138 	const struct attribute_group *attr_groups[] = {
1139 		&i915_pmu_format_attr_group,
1140 		&pmu->events_attr_group,
1141 		&i915_pmu_cpumask_attr_group,
1142 		NULL
1143 	};
1144 
1145 	int ret = -ENOMEM;
1146 
1147 	if (GRAPHICS_VER(i915) <= 2) {
1148 		drm_info(&i915->drm, "PMU not supported for this GPU.");
1149 		return;
1150 	}
1151 
1152 	spin_lock_init(&pmu->lock);
1153 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1154 	pmu->timer.function = i915_sample;
1155 	pmu->cpuhp.cpu = -1;
1156 	init_rc6(pmu);
1157 
1158 	if (!is_igp(i915)) {
1159 		pmu->name = kasprintf(GFP_KERNEL,
1160 				      "i915_%s",
1161 				      dev_name(i915->drm.dev));
1162 		if (pmu->name) {
1163 			/* tools/perf reserves colons as special. */
1164 			strreplace((char *)pmu->name, ':', '_');
1165 		}
1166 	} else {
1167 		pmu->name = "i915";
1168 	}
1169 	if (!pmu->name)
1170 		goto err;
1171 
1172 	pmu->events_attr_group.name = "events";
1173 	pmu->events_attr_group.attrs = create_event_attributes(pmu);
1174 	if (!pmu->events_attr_group.attrs)
1175 		goto err_name;
1176 
1177 	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1178 					GFP_KERNEL);
1179 	if (!pmu->base.attr_groups)
1180 		goto err_attr;
1181 
1182 	pmu->base.module	= THIS_MODULE;
1183 	pmu->base.task_ctx_nr	= perf_invalid_context;
1184 	pmu->base.event_init	= i915_pmu_event_init;
1185 	pmu->base.add		= i915_pmu_event_add;
1186 	pmu->base.del		= i915_pmu_event_del;
1187 	pmu->base.start		= i915_pmu_event_start;
1188 	pmu->base.stop		= i915_pmu_event_stop;
1189 	pmu->base.read		= i915_pmu_event_read;
1190 	pmu->base.event_idx	= i915_pmu_event_event_idx;
1191 
1192 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1193 	if (ret)
1194 		goto err_groups;
1195 
1196 	ret = i915_pmu_register_cpuhp_state(pmu);
1197 	if (ret)
1198 		goto err_unreg;
1199 
1200 	return;
1201 
1202 err_unreg:
1203 	perf_pmu_unregister(&pmu->base);
1204 err_groups:
1205 	kfree(pmu->base.attr_groups);
1206 err_attr:
1207 	pmu->base.event_init = NULL;
1208 	free_event_attributes(pmu);
1209 err_name:
1210 	if (!is_igp(i915))
1211 		kfree(pmu->name);
1212 err:
1213 	drm_notice(&i915->drm, "Failed to register PMU!\n");
1214 }
1215 
1216 void i915_pmu_unregister(struct drm_i915_private *i915)
1217 {
1218 	struct i915_pmu *pmu = &i915->pmu;
1219 
1220 	if (!pmu->base.event_init)
1221 		return;
1222 
1223 	/*
1224 	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1225 	 * ensures all currently executing ones will have exited before we
1226 	 * proceed with unregistration.
1227 	 */
1228 	pmu->closed = true;
1229 	synchronize_rcu();
1230 
1231 	hrtimer_cancel(&pmu->timer);
1232 
1233 	i915_pmu_unregister_cpuhp_state(pmu);
1234 
1235 	perf_pmu_unregister(&pmu->base);
1236 	pmu->base.event_init = NULL;
1237 	kfree(pmu->base.attr_groups);
1238 	if (!is_igp(i915))
1239 		kfree(pmu->name);
1240 	free_event_attributes(pmu);
1241 }
1242