xref: /openbmc/linux/drivers/gpu/drm/i915/i915_pmu.c (revision c796f021)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/pm_runtime.h>
8 
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_regs.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_gt_regs.h"
15 #include "gt/intel_rc6.h"
16 #include "gt/intel_rps.h"
17 
18 #include "i915_drv.h"
19 #include "i915_pmu.h"
20 #include "intel_pm.h"
21 
22 /* Frequency for the sampling timer for events which need it. */
23 #define FREQUENCY 200
24 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
25 
26 #define ENGINE_SAMPLE_MASK \
27 	(BIT(I915_SAMPLE_BUSY) | \
28 	 BIT(I915_SAMPLE_WAIT) | \
29 	 BIT(I915_SAMPLE_SEMA))
30 
31 static cpumask_t i915_pmu_cpumask;
32 static unsigned int i915_pmu_target_cpu = -1;
33 
34 static u8 engine_config_sample(u64 config)
35 {
36 	return config & I915_PMU_SAMPLE_MASK;
37 }
38 
39 static u8 engine_event_sample(struct perf_event *event)
40 {
41 	return engine_config_sample(event->attr.config);
42 }
43 
44 static u8 engine_event_class(struct perf_event *event)
45 {
46 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
47 }
48 
49 static u8 engine_event_instance(struct perf_event *event)
50 {
51 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
52 }
53 
54 static bool is_engine_config(u64 config)
55 {
56 	return config < __I915_PMU_OTHER(0);
57 }
58 
59 static unsigned int other_bit(const u64 config)
60 {
61 	unsigned int val;
62 
63 	switch (config) {
64 	case I915_PMU_ACTUAL_FREQUENCY:
65 		val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
66 		break;
67 	case I915_PMU_REQUESTED_FREQUENCY:
68 		val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
69 		break;
70 	case I915_PMU_RC6_RESIDENCY:
71 		val = __I915_PMU_RC6_RESIDENCY_ENABLED;
72 		break;
73 	default:
74 		/*
75 		 * Events that do not require sampling, or tracking state
76 		 * transitions between enabled and disabled can be ignored.
77 		 */
78 		return -1;
79 	}
80 
81 	return I915_ENGINE_SAMPLE_COUNT + val;
82 }
83 
84 static unsigned int config_bit(const u64 config)
85 {
86 	if (is_engine_config(config))
87 		return engine_config_sample(config);
88 	else
89 		return other_bit(config);
90 }
91 
92 static u64 config_mask(u64 config)
93 {
94 	return BIT_ULL(config_bit(config));
95 }
96 
97 static bool is_engine_event(struct perf_event *event)
98 {
99 	return is_engine_config(event->attr.config);
100 }
101 
102 static unsigned int event_bit(struct perf_event *event)
103 {
104 	return config_bit(event->attr.config);
105 }
106 
107 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
108 {
109 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
110 	u32 enable;
111 
112 	/*
113 	 * Only some counters need the sampling timer.
114 	 *
115 	 * We start with a bitmask of all currently enabled events.
116 	 */
117 	enable = pmu->enable;
118 
119 	/*
120 	 * Mask out all the ones which do not need the timer, or in
121 	 * other words keep all the ones that could need the timer.
122 	 */
123 	enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
124 		  config_mask(I915_PMU_REQUESTED_FREQUENCY) |
125 		  ENGINE_SAMPLE_MASK;
126 
127 	/*
128 	 * When the GPU is idle per-engine counters do not need to be
129 	 * running so clear those bits out.
130 	 */
131 	if (!gpu_active)
132 		enable &= ~ENGINE_SAMPLE_MASK;
133 	/*
134 	 * Also there is software busyness tracking available we do not
135 	 * need the timer for I915_SAMPLE_BUSY counter.
136 	 */
137 	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
138 		enable &= ~BIT(I915_SAMPLE_BUSY);
139 
140 	/*
141 	 * If some bits remain it means we need the sampling timer running.
142 	 */
143 	return enable;
144 }
145 
146 static u64 __get_rc6(struct intel_gt *gt)
147 {
148 	struct drm_i915_private *i915 = gt->i915;
149 	u64 val;
150 
151 	val = intel_rc6_residency_ns(&gt->rc6,
152 				     IS_VALLEYVIEW(i915) ?
153 				     VLV_GT_RENDER_RC6 :
154 				     GEN6_GT_GFX_RC6);
155 
156 	if (HAS_RC6p(i915))
157 		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6p);
158 
159 	if (HAS_RC6pp(i915))
160 		val += intel_rc6_residency_ns(&gt->rc6, GEN6_GT_GFX_RC6pp);
161 
162 	return val;
163 }
164 
165 static inline s64 ktime_since_raw(const ktime_t kt)
166 {
167 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
168 }
169 
170 static u64 get_rc6(struct intel_gt *gt)
171 {
172 	struct drm_i915_private *i915 = gt->i915;
173 	struct i915_pmu *pmu = &i915->pmu;
174 	unsigned long flags;
175 	bool awake = false;
176 	u64 val;
177 
178 	if (intel_gt_pm_get_if_awake(gt)) {
179 		val = __get_rc6(gt);
180 		intel_gt_pm_put_async(gt);
181 		awake = true;
182 	}
183 
184 	spin_lock_irqsave(&pmu->lock, flags);
185 
186 	if (awake) {
187 		pmu->sample[__I915_SAMPLE_RC6].cur = val;
188 	} else {
189 		/*
190 		 * We think we are runtime suspended.
191 		 *
192 		 * Report the delta from when the device was suspended to now,
193 		 * on top of the last known real value, as the approximated RC6
194 		 * counter value.
195 		 */
196 		val = ktime_since_raw(pmu->sleep_last);
197 		val += pmu->sample[__I915_SAMPLE_RC6].cur;
198 	}
199 
200 	if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
201 		val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
202 	else
203 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
204 
205 	spin_unlock_irqrestore(&pmu->lock, flags);
206 
207 	return val;
208 }
209 
210 static void init_rc6(struct i915_pmu *pmu)
211 {
212 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
213 	intel_wakeref_t wakeref;
214 
215 	with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
216 		pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
217 		pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
218 					pmu->sample[__I915_SAMPLE_RC6].cur;
219 		pmu->sleep_last = ktime_get_raw();
220 	}
221 }
222 
223 static void park_rc6(struct drm_i915_private *i915)
224 {
225 	struct i915_pmu *pmu = &i915->pmu;
226 
227 	pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
228 	pmu->sleep_last = ktime_get_raw();
229 }
230 
231 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
232 {
233 	if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
234 		pmu->timer_enabled = true;
235 		pmu->timer_last = ktime_get();
236 		hrtimer_start_range_ns(&pmu->timer,
237 				       ns_to_ktime(PERIOD), 0,
238 				       HRTIMER_MODE_REL_PINNED);
239 	}
240 }
241 
242 void i915_pmu_gt_parked(struct drm_i915_private *i915)
243 {
244 	struct i915_pmu *pmu = &i915->pmu;
245 
246 	if (!pmu->base.event_init)
247 		return;
248 
249 	spin_lock_irq(&pmu->lock);
250 
251 	park_rc6(i915);
252 
253 	/*
254 	 * Signal sampling timer to stop if only engine events are enabled and
255 	 * GPU went idle.
256 	 */
257 	pmu->timer_enabled = pmu_needs_timer(pmu, false);
258 
259 	spin_unlock_irq(&pmu->lock);
260 }
261 
262 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
263 {
264 	struct i915_pmu *pmu = &i915->pmu;
265 
266 	if (!pmu->base.event_init)
267 		return;
268 
269 	spin_lock_irq(&pmu->lock);
270 
271 	/*
272 	 * Re-enable sampling timer when GPU goes active.
273 	 */
274 	__i915_pmu_maybe_start_timer(pmu);
275 
276 	spin_unlock_irq(&pmu->lock);
277 }
278 
279 static void
280 add_sample(struct i915_pmu_sample *sample, u32 val)
281 {
282 	sample->cur += val;
283 }
284 
285 static bool exclusive_mmio_access(const struct drm_i915_private *i915)
286 {
287 	/*
288 	 * We have to avoid concurrent mmio cache line access on gen7 or
289 	 * risk a machine hang. For a fun history lesson dig out the old
290 	 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
291 	 */
292 	return GRAPHICS_VER(i915) == 7;
293 }
294 
295 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
296 {
297 	struct intel_engine_pmu *pmu = &engine->pmu;
298 	bool busy;
299 	u32 val;
300 
301 	val = ENGINE_READ_FW(engine, RING_CTL);
302 	if (val == 0) /* powerwell off => engine idle */
303 		return;
304 
305 	if (val & RING_WAIT)
306 		add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
307 	if (val & RING_WAIT_SEMAPHORE)
308 		add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
309 
310 	/* No need to sample when busy stats are supported. */
311 	if (intel_engine_supports_stats(engine))
312 		return;
313 
314 	/*
315 	 * While waiting on a semaphore or event, MI_MODE reports the
316 	 * ring as idle. However, previously using the seqno, and with
317 	 * execlists sampling, we account for the ring waiting as the
318 	 * engine being busy. Therefore, we record the sample as being
319 	 * busy if either waiting or !idle.
320 	 */
321 	busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
322 	if (!busy) {
323 		val = ENGINE_READ_FW(engine, RING_MI_MODE);
324 		busy = !(val & MODE_IDLE);
325 	}
326 	if (busy)
327 		add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
328 }
329 
330 static void
331 engines_sample(struct intel_gt *gt, unsigned int period_ns)
332 {
333 	struct drm_i915_private *i915 = gt->i915;
334 	struct intel_engine_cs *engine;
335 	enum intel_engine_id id;
336 	unsigned long flags;
337 
338 	if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
339 		return;
340 
341 	if (!intel_gt_pm_is_awake(gt))
342 		return;
343 
344 	for_each_engine(engine, gt, id) {
345 		if (!intel_engine_pm_get_if_awake(engine))
346 			continue;
347 
348 		if (exclusive_mmio_access(i915)) {
349 			spin_lock_irqsave(&engine->uncore->lock, flags);
350 			engine_sample(engine, period_ns);
351 			spin_unlock_irqrestore(&engine->uncore->lock, flags);
352 		} else {
353 			engine_sample(engine, period_ns);
354 		}
355 
356 		intel_engine_pm_put_async(engine);
357 	}
358 }
359 
360 static void
361 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
362 {
363 	sample->cur += mul_u32_u32(val, mul);
364 }
365 
366 static bool frequency_sampling_enabled(struct i915_pmu *pmu)
367 {
368 	return pmu->enable &
369 	       (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
370 		config_mask(I915_PMU_REQUESTED_FREQUENCY));
371 }
372 
373 static void
374 frequency_sample(struct intel_gt *gt, unsigned int period_ns)
375 {
376 	struct drm_i915_private *i915 = gt->i915;
377 	struct intel_uncore *uncore = gt->uncore;
378 	struct i915_pmu *pmu = &i915->pmu;
379 	struct intel_rps *rps = &gt->rps;
380 
381 	if (!frequency_sampling_enabled(pmu))
382 		return;
383 
384 	/* Report 0/0 (actual/requested) frequency while parked. */
385 	if (!intel_gt_pm_get_if_awake(gt))
386 		return;
387 
388 	if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
389 		u32 val;
390 
391 		/*
392 		 * We take a quick peek here without using forcewake
393 		 * so that we don't perturb the system under observation
394 		 * (forcewake => !rc6 => increased power use). We expect
395 		 * that if the read fails because it is outside of the
396 		 * mmio power well, then it will return 0 -- in which
397 		 * case we assume the system is running at the intended
398 		 * frequency. Fortunately, the read should rarely fail!
399 		 */
400 		val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
401 		if (val)
402 			val = intel_rps_get_cagf(rps, val);
403 		else
404 			val = rps->cur_freq;
405 
406 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
407 				intel_gpu_freq(rps, val), period_ns / 1000);
408 	}
409 
410 	if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
411 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
412 				intel_rps_get_requested_frequency(rps),
413 				period_ns / 1000);
414 	}
415 
416 	intel_gt_pm_put_async(gt);
417 }
418 
419 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
420 {
421 	struct drm_i915_private *i915 =
422 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
423 	struct i915_pmu *pmu = &i915->pmu;
424 	struct intel_gt *gt = to_gt(i915);
425 	unsigned int period_ns;
426 	ktime_t now;
427 
428 	if (!READ_ONCE(pmu->timer_enabled))
429 		return HRTIMER_NORESTART;
430 
431 	now = ktime_get();
432 	period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last));
433 	pmu->timer_last = now;
434 
435 	/*
436 	 * Strictly speaking the passed in period may not be 100% accurate for
437 	 * all internal calculation, since some amount of time can be spent on
438 	 * grabbing the forcewake. However the potential error from timer call-
439 	 * back delay greatly dominates this so we keep it simple.
440 	 */
441 	engines_sample(gt, period_ns);
442 	frequency_sample(gt, period_ns);
443 
444 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
445 
446 	return HRTIMER_RESTART;
447 }
448 
449 static void i915_pmu_event_destroy(struct perf_event *event)
450 {
451 	struct drm_i915_private *i915 =
452 		container_of(event->pmu, typeof(*i915), pmu.base);
453 
454 	drm_WARN_ON(&i915->drm, event->parent);
455 
456 	drm_dev_put(&i915->drm);
457 }
458 
459 static int
460 engine_event_status(struct intel_engine_cs *engine,
461 		    enum drm_i915_pmu_engine_sample sample)
462 {
463 	switch (sample) {
464 	case I915_SAMPLE_BUSY:
465 	case I915_SAMPLE_WAIT:
466 		break;
467 	case I915_SAMPLE_SEMA:
468 		if (GRAPHICS_VER(engine->i915) < 6)
469 			return -ENODEV;
470 		break;
471 	default:
472 		return -ENOENT;
473 	}
474 
475 	return 0;
476 }
477 
478 static int
479 config_status(struct drm_i915_private *i915, u64 config)
480 {
481 	struct intel_gt *gt = to_gt(i915);
482 
483 	switch (config) {
484 	case I915_PMU_ACTUAL_FREQUENCY:
485 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
486 			/* Requires a mutex for sampling! */
487 			return -ENODEV;
488 		fallthrough;
489 	case I915_PMU_REQUESTED_FREQUENCY:
490 		if (GRAPHICS_VER(i915) < 6)
491 			return -ENODEV;
492 		break;
493 	case I915_PMU_INTERRUPTS:
494 		break;
495 	case I915_PMU_RC6_RESIDENCY:
496 		if (!gt->rc6.supported)
497 			return -ENODEV;
498 		break;
499 	case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
500 		break;
501 	default:
502 		return -ENOENT;
503 	}
504 
505 	return 0;
506 }
507 
508 static int engine_event_init(struct perf_event *event)
509 {
510 	struct drm_i915_private *i915 =
511 		container_of(event->pmu, typeof(*i915), pmu.base);
512 	struct intel_engine_cs *engine;
513 
514 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
515 					  engine_event_instance(event));
516 	if (!engine)
517 		return -ENODEV;
518 
519 	return engine_event_status(engine, engine_event_sample(event));
520 }
521 
522 static int i915_pmu_event_init(struct perf_event *event)
523 {
524 	struct drm_i915_private *i915 =
525 		container_of(event->pmu, typeof(*i915), pmu.base);
526 	struct i915_pmu *pmu = &i915->pmu;
527 	int ret;
528 
529 	if (pmu->closed)
530 		return -ENODEV;
531 
532 	if (event->attr.type != event->pmu->type)
533 		return -ENOENT;
534 
535 	/* unsupported modes and filters */
536 	if (event->attr.sample_period) /* no sampling */
537 		return -EINVAL;
538 
539 	if (has_branch_stack(event))
540 		return -EOPNOTSUPP;
541 
542 	if (event->cpu < 0)
543 		return -EINVAL;
544 
545 	/* only allow running on one cpu at a time */
546 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
547 		return -EINVAL;
548 
549 	if (is_engine_event(event))
550 		ret = engine_event_init(event);
551 	else
552 		ret = config_status(i915, event->attr.config);
553 	if (ret)
554 		return ret;
555 
556 	if (!event->parent) {
557 		drm_dev_get(&i915->drm);
558 		event->destroy = i915_pmu_event_destroy;
559 	}
560 
561 	return 0;
562 }
563 
564 static u64 __i915_pmu_event_read(struct perf_event *event)
565 {
566 	struct drm_i915_private *i915 =
567 		container_of(event->pmu, typeof(*i915), pmu.base);
568 	struct i915_pmu *pmu = &i915->pmu;
569 	u64 val = 0;
570 
571 	if (is_engine_event(event)) {
572 		u8 sample = engine_event_sample(event);
573 		struct intel_engine_cs *engine;
574 
575 		engine = intel_engine_lookup_user(i915,
576 						  engine_event_class(event),
577 						  engine_event_instance(event));
578 
579 		if (drm_WARN_ON_ONCE(&i915->drm, !engine)) {
580 			/* Do nothing */
581 		} else if (sample == I915_SAMPLE_BUSY &&
582 			   intel_engine_supports_stats(engine)) {
583 			ktime_t unused;
584 
585 			val = ktime_to_ns(intel_engine_get_busy_time(engine,
586 								     &unused));
587 		} else {
588 			val = engine->pmu.sample[sample].cur;
589 		}
590 	} else {
591 		switch (event->attr.config) {
592 		case I915_PMU_ACTUAL_FREQUENCY:
593 			val =
594 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
595 				   USEC_PER_SEC /* to MHz */);
596 			break;
597 		case I915_PMU_REQUESTED_FREQUENCY:
598 			val =
599 			   div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
600 				   USEC_PER_SEC /* to MHz */);
601 			break;
602 		case I915_PMU_INTERRUPTS:
603 			val = READ_ONCE(pmu->irq_count);
604 			break;
605 		case I915_PMU_RC6_RESIDENCY:
606 			val = get_rc6(to_gt(i915));
607 			break;
608 		case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
609 			val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
610 			break;
611 		}
612 	}
613 
614 	return val;
615 }
616 
617 static void i915_pmu_event_read(struct perf_event *event)
618 {
619 	struct drm_i915_private *i915 =
620 		container_of(event->pmu, typeof(*i915), pmu.base);
621 	struct hw_perf_event *hwc = &event->hw;
622 	struct i915_pmu *pmu = &i915->pmu;
623 	u64 prev, new;
624 
625 	if (pmu->closed) {
626 		event->hw.state = PERF_HES_STOPPED;
627 		return;
628 	}
629 again:
630 	prev = local64_read(&hwc->prev_count);
631 	new = __i915_pmu_event_read(event);
632 
633 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
634 		goto again;
635 
636 	local64_add(new - prev, &event->count);
637 }
638 
639 static void i915_pmu_enable(struct perf_event *event)
640 {
641 	struct drm_i915_private *i915 =
642 		container_of(event->pmu, typeof(*i915), pmu.base);
643 	struct i915_pmu *pmu = &i915->pmu;
644 	unsigned long flags;
645 	unsigned int bit;
646 
647 	bit = event_bit(event);
648 	if (bit == -1)
649 		goto update;
650 
651 	spin_lock_irqsave(&pmu->lock, flags);
652 
653 	/*
654 	 * Update the bitmask of enabled events and increment
655 	 * the event reference counter.
656 	 */
657 	BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS);
658 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
659 	GEM_BUG_ON(pmu->enable_count[bit] == ~0);
660 
661 	pmu->enable |= BIT_ULL(bit);
662 	pmu->enable_count[bit]++;
663 
664 	/*
665 	 * Start the sampling timer if needed and not already enabled.
666 	 */
667 	__i915_pmu_maybe_start_timer(pmu);
668 
669 	/*
670 	 * For per-engine events the bitmask and reference counting
671 	 * is stored per engine.
672 	 */
673 	if (is_engine_event(event)) {
674 		u8 sample = engine_event_sample(event);
675 		struct intel_engine_cs *engine;
676 
677 		engine = intel_engine_lookup_user(i915,
678 						  engine_event_class(event),
679 						  engine_event_instance(event));
680 
681 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
682 			     I915_ENGINE_SAMPLE_COUNT);
683 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
684 			     I915_ENGINE_SAMPLE_COUNT);
685 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
686 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
687 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
688 
689 		engine->pmu.enable |= BIT(sample);
690 		engine->pmu.enable_count[sample]++;
691 	}
692 
693 	spin_unlock_irqrestore(&pmu->lock, flags);
694 
695 update:
696 	/*
697 	 * Store the current counter value so we can report the correct delta
698 	 * for all listeners. Even when the event was already enabled and has
699 	 * an existing non-zero value.
700 	 */
701 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
702 }
703 
704 static void i915_pmu_disable(struct perf_event *event)
705 {
706 	struct drm_i915_private *i915 =
707 		container_of(event->pmu, typeof(*i915), pmu.base);
708 	unsigned int bit = event_bit(event);
709 	struct i915_pmu *pmu = &i915->pmu;
710 	unsigned long flags;
711 
712 	if (bit == -1)
713 		return;
714 
715 	spin_lock_irqsave(&pmu->lock, flags);
716 
717 	if (is_engine_event(event)) {
718 		u8 sample = engine_event_sample(event);
719 		struct intel_engine_cs *engine;
720 
721 		engine = intel_engine_lookup_user(i915,
722 						  engine_event_class(event),
723 						  engine_event_instance(event));
724 
725 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
726 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
727 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
728 
729 		/*
730 		 * Decrement the reference count and clear the enabled
731 		 * bitmask when the last listener on an event goes away.
732 		 */
733 		if (--engine->pmu.enable_count[sample] == 0)
734 			engine->pmu.enable &= ~BIT(sample);
735 	}
736 
737 	GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count));
738 	GEM_BUG_ON(pmu->enable_count[bit] == 0);
739 	/*
740 	 * Decrement the reference count and clear the enabled
741 	 * bitmask when the last listener on an event goes away.
742 	 */
743 	if (--pmu->enable_count[bit] == 0) {
744 		pmu->enable &= ~BIT_ULL(bit);
745 		pmu->timer_enabled &= pmu_needs_timer(pmu, true);
746 	}
747 
748 	spin_unlock_irqrestore(&pmu->lock, flags);
749 }
750 
751 static void i915_pmu_event_start(struct perf_event *event, int flags)
752 {
753 	struct drm_i915_private *i915 =
754 		container_of(event->pmu, typeof(*i915), pmu.base);
755 	struct i915_pmu *pmu = &i915->pmu;
756 
757 	if (pmu->closed)
758 		return;
759 
760 	i915_pmu_enable(event);
761 	event->hw.state = 0;
762 }
763 
764 static void i915_pmu_event_stop(struct perf_event *event, int flags)
765 {
766 	if (flags & PERF_EF_UPDATE)
767 		i915_pmu_event_read(event);
768 	i915_pmu_disable(event);
769 	event->hw.state = PERF_HES_STOPPED;
770 }
771 
772 static int i915_pmu_event_add(struct perf_event *event, int flags)
773 {
774 	struct drm_i915_private *i915 =
775 		container_of(event->pmu, typeof(*i915), pmu.base);
776 	struct i915_pmu *pmu = &i915->pmu;
777 
778 	if (pmu->closed)
779 		return -ENODEV;
780 
781 	if (flags & PERF_EF_START)
782 		i915_pmu_event_start(event, flags);
783 
784 	return 0;
785 }
786 
787 static void i915_pmu_event_del(struct perf_event *event, int flags)
788 {
789 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
790 }
791 
792 static int i915_pmu_event_event_idx(struct perf_event *event)
793 {
794 	return 0;
795 }
796 
797 struct i915_str_attribute {
798 	struct device_attribute attr;
799 	const char *str;
800 };
801 
802 static ssize_t i915_pmu_format_show(struct device *dev,
803 				    struct device_attribute *attr, char *buf)
804 {
805 	struct i915_str_attribute *eattr;
806 
807 	eattr = container_of(attr, struct i915_str_attribute, attr);
808 	return sprintf(buf, "%s\n", eattr->str);
809 }
810 
811 #define I915_PMU_FORMAT_ATTR(_name, _config) \
812 	(&((struct i915_str_attribute[]) { \
813 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
814 		  .str = _config, } \
815 	})[0].attr.attr)
816 
817 static struct attribute *i915_pmu_format_attrs[] = {
818 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
819 	NULL,
820 };
821 
822 static const struct attribute_group i915_pmu_format_attr_group = {
823 	.name = "format",
824 	.attrs = i915_pmu_format_attrs,
825 };
826 
827 struct i915_ext_attribute {
828 	struct device_attribute attr;
829 	unsigned long val;
830 };
831 
832 static ssize_t i915_pmu_event_show(struct device *dev,
833 				   struct device_attribute *attr, char *buf)
834 {
835 	struct i915_ext_attribute *eattr;
836 
837 	eattr = container_of(attr, struct i915_ext_attribute, attr);
838 	return sprintf(buf, "config=0x%lx\n", eattr->val);
839 }
840 
841 static ssize_t cpumask_show(struct device *dev,
842 			    struct device_attribute *attr, char *buf)
843 {
844 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
845 }
846 
847 static DEVICE_ATTR_RO(cpumask);
848 
849 static struct attribute *i915_cpumask_attrs[] = {
850 	&dev_attr_cpumask.attr,
851 	NULL,
852 };
853 
854 static const struct attribute_group i915_pmu_cpumask_attr_group = {
855 	.attrs = i915_cpumask_attrs,
856 };
857 
858 #define __event(__config, __name, __unit) \
859 { \
860 	.config = (__config), \
861 	.name = (__name), \
862 	.unit = (__unit), \
863 }
864 
865 #define __engine_event(__sample, __name) \
866 { \
867 	.sample = (__sample), \
868 	.name = (__name), \
869 }
870 
871 static struct i915_ext_attribute *
872 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
873 {
874 	sysfs_attr_init(&attr->attr.attr);
875 	attr->attr.attr.name = name;
876 	attr->attr.attr.mode = 0444;
877 	attr->attr.show = i915_pmu_event_show;
878 	attr->val = config;
879 
880 	return ++attr;
881 }
882 
883 static struct perf_pmu_events_attr *
884 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
885 	     const char *str)
886 {
887 	sysfs_attr_init(&attr->attr.attr);
888 	attr->attr.attr.name = name;
889 	attr->attr.attr.mode = 0444;
890 	attr->attr.show = perf_event_sysfs_show;
891 	attr->event_str = str;
892 
893 	return ++attr;
894 }
895 
896 static struct attribute **
897 create_event_attributes(struct i915_pmu *pmu)
898 {
899 	struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
900 	static const struct {
901 		u64 config;
902 		const char *name;
903 		const char *unit;
904 	} events[] = {
905 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
906 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
907 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
908 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
909 		__event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"),
910 	};
911 	static const struct {
912 		enum drm_i915_pmu_engine_sample sample;
913 		char *name;
914 	} engine_events[] = {
915 		__engine_event(I915_SAMPLE_BUSY, "busy"),
916 		__engine_event(I915_SAMPLE_SEMA, "sema"),
917 		__engine_event(I915_SAMPLE_WAIT, "wait"),
918 	};
919 	unsigned int count = 0;
920 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
921 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
922 	struct attribute **attr = NULL, **attr_iter;
923 	struct intel_engine_cs *engine;
924 	unsigned int i;
925 
926 	/* Count how many counters we will be exposing. */
927 	for (i = 0; i < ARRAY_SIZE(events); i++) {
928 		if (!config_status(i915, events[i].config))
929 			count++;
930 	}
931 
932 	for_each_uabi_engine(engine, i915) {
933 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
934 			if (!engine_event_status(engine,
935 						 engine_events[i].sample))
936 				count++;
937 		}
938 	}
939 
940 	/* Allocate attribute objects and table. */
941 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
942 	if (!i915_attr)
943 		goto err_alloc;
944 
945 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
946 	if (!pmu_attr)
947 		goto err_alloc;
948 
949 	/* Max one pointer of each attribute type plus a termination entry. */
950 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
951 	if (!attr)
952 		goto err_alloc;
953 
954 	i915_iter = i915_attr;
955 	pmu_iter = pmu_attr;
956 	attr_iter = attr;
957 
958 	/* Initialize supported non-engine counters. */
959 	for (i = 0; i < ARRAY_SIZE(events); i++) {
960 		char *str;
961 
962 		if (config_status(i915, events[i].config))
963 			continue;
964 
965 		str = kstrdup(events[i].name, GFP_KERNEL);
966 		if (!str)
967 			goto err;
968 
969 		*attr_iter++ = &i915_iter->attr.attr;
970 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
971 
972 		if (events[i].unit) {
973 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
974 			if (!str)
975 				goto err;
976 
977 			*attr_iter++ = &pmu_iter->attr.attr;
978 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
979 		}
980 	}
981 
982 	/* Initialize supported engine counters. */
983 	for_each_uabi_engine(engine, i915) {
984 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
985 			char *str;
986 
987 			if (engine_event_status(engine,
988 						engine_events[i].sample))
989 				continue;
990 
991 			str = kasprintf(GFP_KERNEL, "%s-%s",
992 					engine->name, engine_events[i].name);
993 			if (!str)
994 				goto err;
995 
996 			*attr_iter++ = &i915_iter->attr.attr;
997 			i915_iter =
998 				add_i915_attr(i915_iter, str,
999 					      __I915_PMU_ENGINE(engine->uabi_class,
1000 								engine->uabi_instance,
1001 								engine_events[i].sample));
1002 
1003 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
1004 					engine->name, engine_events[i].name);
1005 			if (!str)
1006 				goto err;
1007 
1008 			*attr_iter++ = &pmu_iter->attr.attr;
1009 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
1010 		}
1011 	}
1012 
1013 	pmu->i915_attr = i915_attr;
1014 	pmu->pmu_attr = pmu_attr;
1015 
1016 	return attr;
1017 
1018 err:;
1019 	for (attr_iter = attr; *attr_iter; attr_iter++)
1020 		kfree((*attr_iter)->name);
1021 
1022 err_alloc:
1023 	kfree(attr);
1024 	kfree(i915_attr);
1025 	kfree(pmu_attr);
1026 
1027 	return NULL;
1028 }
1029 
1030 static void free_event_attributes(struct i915_pmu *pmu)
1031 {
1032 	struct attribute **attr_iter = pmu->events_attr_group.attrs;
1033 
1034 	for (; *attr_iter; attr_iter++)
1035 		kfree((*attr_iter)->name);
1036 
1037 	kfree(pmu->events_attr_group.attrs);
1038 	kfree(pmu->i915_attr);
1039 	kfree(pmu->pmu_attr);
1040 
1041 	pmu->events_attr_group.attrs = NULL;
1042 	pmu->i915_attr = NULL;
1043 	pmu->pmu_attr = NULL;
1044 }
1045 
1046 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
1047 {
1048 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1049 
1050 	GEM_BUG_ON(!pmu->base.event_init);
1051 
1052 	/* Select the first online CPU as a designated reader. */
1053 	if (!cpumask_weight(&i915_pmu_cpumask))
1054 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
1055 
1056 	return 0;
1057 }
1058 
1059 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
1060 {
1061 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
1062 	unsigned int target = i915_pmu_target_cpu;
1063 
1064 	GEM_BUG_ON(!pmu->base.event_init);
1065 
1066 	/*
1067 	 * Unregistering an instance generates a CPU offline event which we must
1068 	 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1069 	 */
1070 	if (pmu->closed)
1071 		return 0;
1072 
1073 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
1074 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
1075 
1076 		/* Migrate events if there is a valid target */
1077 		if (target < nr_cpu_ids) {
1078 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1079 			i915_pmu_target_cpu = target;
1080 		}
1081 	}
1082 
1083 	if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
1084 		perf_pmu_migrate_context(&pmu->base, cpu, target);
1085 		pmu->cpuhp.cpu = target;
1086 	}
1087 
1088 	return 0;
1089 }
1090 
1091 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1092 
1093 int i915_pmu_init(void)
1094 {
1095 	int ret;
1096 
1097 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1098 				      "perf/x86/intel/i915:online",
1099 				      i915_pmu_cpu_online,
1100 				      i915_pmu_cpu_offline);
1101 	if (ret < 0)
1102 		pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1103 			  ret);
1104 	else
1105 		cpuhp_slot = ret;
1106 
1107 	return 0;
1108 }
1109 
1110 void i915_pmu_exit(void)
1111 {
1112 	if (cpuhp_slot != CPUHP_INVALID)
1113 		cpuhp_remove_multi_state(cpuhp_slot);
1114 }
1115 
1116 static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
1117 {
1118 	if (cpuhp_slot == CPUHP_INVALID)
1119 		return -EINVAL;
1120 
1121 	return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
1122 }
1123 
1124 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
1125 {
1126 	cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
1127 }
1128 
1129 static bool is_igp(struct drm_i915_private *i915)
1130 {
1131 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1132 
1133 	/* IGP is 0000:00:02.0 */
1134 	return pci_domain_nr(pdev->bus) == 0 &&
1135 	       pdev->bus->number == 0 &&
1136 	       PCI_SLOT(pdev->devfn) == 2 &&
1137 	       PCI_FUNC(pdev->devfn) == 0;
1138 }
1139 
1140 void i915_pmu_register(struct drm_i915_private *i915)
1141 {
1142 	struct i915_pmu *pmu = &i915->pmu;
1143 	const struct attribute_group *attr_groups[] = {
1144 		&i915_pmu_format_attr_group,
1145 		&pmu->events_attr_group,
1146 		&i915_pmu_cpumask_attr_group,
1147 		NULL
1148 	};
1149 
1150 	int ret = -ENOMEM;
1151 
1152 	if (GRAPHICS_VER(i915) <= 2) {
1153 		drm_info(&i915->drm, "PMU not supported for this GPU.");
1154 		return;
1155 	}
1156 
1157 	spin_lock_init(&pmu->lock);
1158 	hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1159 	pmu->timer.function = i915_sample;
1160 	pmu->cpuhp.cpu = -1;
1161 	init_rc6(pmu);
1162 
1163 	if (!is_igp(i915)) {
1164 		pmu->name = kasprintf(GFP_KERNEL,
1165 				      "i915_%s",
1166 				      dev_name(i915->drm.dev));
1167 		if (pmu->name) {
1168 			/* tools/perf reserves colons as special. */
1169 			strreplace((char *)pmu->name, ':', '_');
1170 		}
1171 	} else {
1172 		pmu->name = "i915";
1173 	}
1174 	if (!pmu->name)
1175 		goto err;
1176 
1177 	pmu->events_attr_group.name = "events";
1178 	pmu->events_attr_group.attrs = create_event_attributes(pmu);
1179 	if (!pmu->events_attr_group.attrs)
1180 		goto err_name;
1181 
1182 	pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
1183 					GFP_KERNEL);
1184 	if (!pmu->base.attr_groups)
1185 		goto err_attr;
1186 
1187 	pmu->base.module	= THIS_MODULE;
1188 	pmu->base.task_ctx_nr	= perf_invalid_context;
1189 	pmu->base.event_init	= i915_pmu_event_init;
1190 	pmu->base.add		= i915_pmu_event_add;
1191 	pmu->base.del		= i915_pmu_event_del;
1192 	pmu->base.start		= i915_pmu_event_start;
1193 	pmu->base.stop		= i915_pmu_event_stop;
1194 	pmu->base.read		= i915_pmu_event_read;
1195 	pmu->base.event_idx	= i915_pmu_event_event_idx;
1196 
1197 	ret = perf_pmu_register(&pmu->base, pmu->name, -1);
1198 	if (ret)
1199 		goto err_groups;
1200 
1201 	ret = i915_pmu_register_cpuhp_state(pmu);
1202 	if (ret)
1203 		goto err_unreg;
1204 
1205 	return;
1206 
1207 err_unreg:
1208 	perf_pmu_unregister(&pmu->base);
1209 err_groups:
1210 	kfree(pmu->base.attr_groups);
1211 err_attr:
1212 	pmu->base.event_init = NULL;
1213 	free_event_attributes(pmu);
1214 err_name:
1215 	if (!is_igp(i915))
1216 		kfree(pmu->name);
1217 err:
1218 	drm_notice(&i915->drm, "Failed to register PMU!\n");
1219 }
1220 
1221 void i915_pmu_unregister(struct drm_i915_private *i915)
1222 {
1223 	struct i915_pmu *pmu = &i915->pmu;
1224 
1225 	if (!pmu->base.event_init)
1226 		return;
1227 
1228 	/*
1229 	 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1230 	 * ensures all currently executing ones will have exited before we
1231 	 * proceed with unregistration.
1232 	 */
1233 	pmu->closed = true;
1234 	synchronize_rcu();
1235 
1236 	hrtimer_cancel(&pmu->timer);
1237 
1238 	i915_pmu_unregister_cpuhp_state(pmu);
1239 
1240 	perf_pmu_unregister(&pmu->base);
1241 	pmu->base.event_init = NULL;
1242 	kfree(pmu->base.attr_groups);
1243 	if (!is_igp(i915))
1244 		kfree(pmu->name);
1245 	free_event_attributes(pmu);
1246 }
1247