xref: /openbmc/linux/drivers/gpu/drm/i915/i915_pmu.c (revision d6e0cbb1)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include <linux/irq.h>
8 #include <linux/pm_runtime.h>
9 
10 #include "gt/intel_engine.h"
11 
12 #include "i915_drv.h"
13 #include "i915_pmu.h"
14 #include "intel_pm.h"
15 
16 /* Frequency for the sampling timer for events which need it. */
17 #define FREQUENCY 200
18 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
19 
20 #define ENGINE_SAMPLE_MASK \
21 	(BIT(I915_SAMPLE_BUSY) | \
22 	 BIT(I915_SAMPLE_WAIT) | \
23 	 BIT(I915_SAMPLE_SEMA))
24 
25 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
26 
27 static cpumask_t i915_pmu_cpumask;
28 
29 static u8 engine_config_sample(u64 config)
30 {
31 	return config & I915_PMU_SAMPLE_MASK;
32 }
33 
34 static u8 engine_event_sample(struct perf_event *event)
35 {
36 	return engine_config_sample(event->attr.config);
37 }
38 
39 static u8 engine_event_class(struct perf_event *event)
40 {
41 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
42 }
43 
44 static u8 engine_event_instance(struct perf_event *event)
45 {
46 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
47 }
48 
49 static bool is_engine_config(u64 config)
50 {
51 	return config < __I915_PMU_OTHER(0);
52 }
53 
54 static unsigned int config_enabled_bit(u64 config)
55 {
56 	if (is_engine_config(config))
57 		return engine_config_sample(config);
58 	else
59 		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
60 }
61 
62 static u64 config_enabled_mask(u64 config)
63 {
64 	return BIT_ULL(config_enabled_bit(config));
65 }
66 
67 static bool is_engine_event(struct perf_event *event)
68 {
69 	return is_engine_config(event->attr.config);
70 }
71 
72 static unsigned int event_enabled_bit(struct perf_event *event)
73 {
74 	return config_enabled_bit(event->attr.config);
75 }
76 
77 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
78 {
79 	u64 enable;
80 
81 	/*
82 	 * Only some counters need the sampling timer.
83 	 *
84 	 * We start with a bitmask of all currently enabled events.
85 	 */
86 	enable = i915->pmu.enable;
87 
88 	/*
89 	 * Mask out all the ones which do not need the timer, or in
90 	 * other words keep all the ones that could need the timer.
91 	 */
92 	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
93 		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
94 		  ENGINE_SAMPLE_MASK;
95 
96 	/*
97 	 * When the GPU is idle per-engine counters do not need to be
98 	 * running so clear those bits out.
99 	 */
100 	if (!gpu_active)
101 		enable &= ~ENGINE_SAMPLE_MASK;
102 	/*
103 	 * Also there is software busyness tracking available we do not
104 	 * need the timer for I915_SAMPLE_BUSY counter.
105 	 */
106 	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
107 		enable &= ~BIT(I915_SAMPLE_BUSY);
108 
109 	/*
110 	 * If some bits remain it means we need the sampling timer running.
111 	 */
112 	return enable;
113 }
114 
115 void i915_pmu_gt_parked(struct drm_i915_private *i915)
116 {
117 	if (!i915->pmu.base.event_init)
118 		return;
119 
120 	spin_lock_irq(&i915->pmu.lock);
121 	/*
122 	 * Signal sampling timer to stop if only engine events are enabled and
123 	 * GPU went idle.
124 	 */
125 	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
126 	spin_unlock_irq(&i915->pmu.lock);
127 }
128 
129 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
130 {
131 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
132 		i915->pmu.timer_enabled = true;
133 		i915->pmu.timer_last = ktime_get();
134 		hrtimer_start_range_ns(&i915->pmu.timer,
135 				       ns_to_ktime(PERIOD), 0,
136 				       HRTIMER_MODE_REL_PINNED);
137 	}
138 }
139 
140 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
141 {
142 	if (!i915->pmu.base.event_init)
143 		return;
144 
145 	spin_lock_irq(&i915->pmu.lock);
146 	/*
147 	 * Re-enable sampling timer when GPU goes active.
148 	 */
149 	__i915_pmu_maybe_start_timer(i915);
150 	spin_unlock_irq(&i915->pmu.lock);
151 }
152 
153 static void
154 add_sample(struct i915_pmu_sample *sample, u32 val)
155 {
156 	sample->cur += val;
157 }
158 
159 static void
160 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
161 {
162 	struct intel_engine_cs *engine;
163 	enum intel_engine_id id;
164 	intel_wakeref_t wakeref;
165 	unsigned long flags;
166 
167 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
168 		return;
169 
170 	wakeref = 0;
171 	if (READ_ONCE(dev_priv->gt.awake))
172 		wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
173 	if (!wakeref)
174 		return;
175 
176 	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
177 	for_each_engine(engine, dev_priv, id) {
178 		struct intel_engine_pmu *pmu = &engine->pmu;
179 		bool busy;
180 		u32 val;
181 
182 		val = I915_READ_FW(RING_CTL(engine->mmio_base));
183 		if (val == 0) /* powerwell off => engine idle */
184 			continue;
185 
186 		if (val & RING_WAIT)
187 			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
188 		if (val & RING_WAIT_SEMAPHORE)
189 			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
190 
191 		/*
192 		 * While waiting on a semaphore or event, MI_MODE reports the
193 		 * ring as idle. However, previously using the seqno, and with
194 		 * execlists sampling, we account for the ring waiting as the
195 		 * engine being busy. Therefore, we record the sample as being
196 		 * busy if either waiting or !idle.
197 		 */
198 		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
199 		if (!busy) {
200 			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
201 			busy = !(val & MODE_IDLE);
202 		}
203 		if (busy)
204 			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
205 	}
206 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
207 
208 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
209 }
210 
211 static void
212 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
213 {
214 	sample->cur += mul_u32_u32(val, mul);
215 }
216 
217 static void
218 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
219 {
220 	if (dev_priv->pmu.enable &
221 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
222 		u32 val;
223 
224 		val = dev_priv->gt_pm.rps.cur_freq;
225 		if (dev_priv->gt.awake) {
226 			intel_wakeref_t wakeref;
227 
228 			with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm,
229 							wakeref) {
230 				val = intel_uncore_read_notrace(&dev_priv->uncore,
231 								GEN6_RPSTAT1);
232 				val = intel_get_cagf(dev_priv, val);
233 			}
234 		}
235 
236 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
237 				intel_gpu_freq(dev_priv, val),
238 				period_ns / 1000);
239 	}
240 
241 	if (dev_priv->pmu.enable &
242 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
243 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
244 				intel_gpu_freq(dev_priv,
245 					       dev_priv->gt_pm.rps.cur_freq),
246 				period_ns / 1000);
247 	}
248 }
249 
250 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
251 {
252 	struct drm_i915_private *i915 =
253 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
254 	unsigned int period_ns;
255 	ktime_t now;
256 
257 	if (!READ_ONCE(i915->pmu.timer_enabled))
258 		return HRTIMER_NORESTART;
259 
260 	now = ktime_get();
261 	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
262 	i915->pmu.timer_last = now;
263 
264 	/*
265 	 * Strictly speaking the passed in period may not be 100% accurate for
266 	 * all internal calculation, since some amount of time can be spent on
267 	 * grabbing the forcewake. However the potential error from timer call-
268 	 * back delay greatly dominates this so we keep it simple.
269 	 */
270 	engines_sample(i915, period_ns);
271 	frequency_sample(i915, period_ns);
272 
273 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
274 
275 	return HRTIMER_RESTART;
276 }
277 
278 static u64 count_interrupts(struct drm_i915_private *i915)
279 {
280 	/* open-coded kstat_irqs() */
281 	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
282 	u64 sum = 0;
283 	int cpu;
284 
285 	if (!desc || !desc->kstat_irqs)
286 		return 0;
287 
288 	for_each_possible_cpu(cpu)
289 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
290 
291 	return sum;
292 }
293 
294 static void engine_event_destroy(struct perf_event *event)
295 {
296 	struct drm_i915_private *i915 =
297 		container_of(event->pmu, typeof(*i915), pmu.base);
298 	struct intel_engine_cs *engine;
299 
300 	engine = intel_engine_lookup_user(i915,
301 					  engine_event_class(event),
302 					  engine_event_instance(event));
303 	if (WARN_ON_ONCE(!engine))
304 		return;
305 
306 	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
307 	    intel_engine_supports_stats(engine))
308 		intel_disable_engine_stats(engine);
309 }
310 
311 static void i915_pmu_event_destroy(struct perf_event *event)
312 {
313 	WARN_ON(event->parent);
314 
315 	if (is_engine_event(event))
316 		engine_event_destroy(event);
317 }
318 
319 static int
320 engine_event_status(struct intel_engine_cs *engine,
321 		    enum drm_i915_pmu_engine_sample sample)
322 {
323 	switch (sample) {
324 	case I915_SAMPLE_BUSY:
325 	case I915_SAMPLE_WAIT:
326 		break;
327 	case I915_SAMPLE_SEMA:
328 		if (INTEL_GEN(engine->i915) < 6)
329 			return -ENODEV;
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static int
339 config_status(struct drm_i915_private *i915, u64 config)
340 {
341 	switch (config) {
342 	case I915_PMU_ACTUAL_FREQUENCY:
343 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
344 			/* Requires a mutex for sampling! */
345 			return -ENODEV;
346 		/* Fall-through. */
347 	case I915_PMU_REQUESTED_FREQUENCY:
348 		if (INTEL_GEN(i915) < 6)
349 			return -ENODEV;
350 		break;
351 	case I915_PMU_INTERRUPTS:
352 		break;
353 	case I915_PMU_RC6_RESIDENCY:
354 		if (!HAS_RC6(i915))
355 			return -ENODEV;
356 		break;
357 	default:
358 		return -ENOENT;
359 	}
360 
361 	return 0;
362 }
363 
364 static int engine_event_init(struct perf_event *event)
365 {
366 	struct drm_i915_private *i915 =
367 		container_of(event->pmu, typeof(*i915), pmu.base);
368 	struct intel_engine_cs *engine;
369 	u8 sample;
370 	int ret;
371 
372 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
373 					  engine_event_instance(event));
374 	if (!engine)
375 		return -ENODEV;
376 
377 	sample = engine_event_sample(event);
378 	ret = engine_event_status(engine, sample);
379 	if (ret)
380 		return ret;
381 
382 	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
383 		ret = intel_enable_engine_stats(engine);
384 
385 	return ret;
386 }
387 
388 static int i915_pmu_event_init(struct perf_event *event)
389 {
390 	struct drm_i915_private *i915 =
391 		container_of(event->pmu, typeof(*i915), pmu.base);
392 	int ret;
393 
394 	if (event->attr.type != event->pmu->type)
395 		return -ENOENT;
396 
397 	/* unsupported modes and filters */
398 	if (event->attr.sample_period) /* no sampling */
399 		return -EINVAL;
400 
401 	if (has_branch_stack(event))
402 		return -EOPNOTSUPP;
403 
404 	if (event->cpu < 0)
405 		return -EINVAL;
406 
407 	/* only allow running on one cpu at a time */
408 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
409 		return -EINVAL;
410 
411 	if (is_engine_event(event))
412 		ret = engine_event_init(event);
413 	else
414 		ret = config_status(i915, event->attr.config);
415 	if (ret)
416 		return ret;
417 
418 	if (!event->parent)
419 		event->destroy = i915_pmu_event_destroy;
420 
421 	return 0;
422 }
423 
424 static u64 __get_rc6(struct drm_i915_private *i915)
425 {
426 	u64 val;
427 
428 	val = intel_rc6_residency_ns(i915,
429 				     IS_VALLEYVIEW(i915) ?
430 				     VLV_GT_RENDER_RC6 :
431 				     GEN6_GT_GFX_RC6);
432 
433 	if (HAS_RC6p(i915))
434 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
435 
436 	if (HAS_RC6pp(i915))
437 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
438 
439 	return val;
440 }
441 
442 static u64 get_rc6(struct drm_i915_private *i915)
443 {
444 #if IS_ENABLED(CONFIG_PM)
445 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
446 	intel_wakeref_t wakeref;
447 	unsigned long flags;
448 	u64 val;
449 
450 	wakeref = intel_runtime_pm_get_if_in_use(rpm);
451 	if (wakeref) {
452 		val = __get_rc6(i915);
453 		intel_runtime_pm_put(rpm, wakeref);
454 
455 		/*
456 		 * If we are coming back from being runtime suspended we must
457 		 * be careful not to report a larger value than returned
458 		 * previously.
459 		 */
460 
461 		spin_lock_irqsave(&i915->pmu.lock, flags);
462 
463 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
464 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
465 			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
466 		} else {
467 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
468 		}
469 
470 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
471 	} else {
472 		struct device *kdev = rpm->kdev;
473 
474 		/*
475 		 * We are runtime suspended.
476 		 *
477 		 * Report the delta from when the device was suspended to now,
478 		 * on top of the last known real value, as the approximated RC6
479 		 * counter value.
480 		 */
481 		spin_lock_irqsave(&i915->pmu.lock, flags);
482 
483 		/*
484 		 * After the above branch intel_runtime_pm_get_if_in_use failed
485 		 * to get the runtime PM reference we cannot assume we are in
486 		 * runtime suspend since we can either: a) race with coming out
487 		 * of it before we took the power.lock, or b) there are other
488 		 * states than suspended which can bring us here.
489 		 *
490 		 * We need to double-check that we are indeed currently runtime
491 		 * suspended and if not we cannot do better than report the last
492 		 * known RC6 value.
493 		 */
494 		if (pm_runtime_status_suspended(kdev)) {
495 			val = pm_runtime_suspended_time(kdev);
496 
497 			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
498 				i915->pmu.suspended_time_last = val;
499 
500 			val -= i915->pmu.suspended_time_last;
501 			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
502 
503 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
504 		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
505 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
506 		} else {
507 			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
508 		}
509 
510 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
511 	}
512 
513 	return val;
514 #else
515 	return __get_rc6(i915);
516 #endif
517 }
518 
519 static u64 __i915_pmu_event_read(struct perf_event *event)
520 {
521 	struct drm_i915_private *i915 =
522 		container_of(event->pmu, typeof(*i915), pmu.base);
523 	u64 val = 0;
524 
525 	if (is_engine_event(event)) {
526 		u8 sample = engine_event_sample(event);
527 		struct intel_engine_cs *engine;
528 
529 		engine = intel_engine_lookup_user(i915,
530 						  engine_event_class(event),
531 						  engine_event_instance(event));
532 
533 		if (WARN_ON_ONCE(!engine)) {
534 			/* Do nothing */
535 		} else if (sample == I915_SAMPLE_BUSY &&
536 			   intel_engine_supports_stats(engine)) {
537 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
538 		} else {
539 			val = engine->pmu.sample[sample].cur;
540 		}
541 	} else {
542 		switch (event->attr.config) {
543 		case I915_PMU_ACTUAL_FREQUENCY:
544 			val =
545 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
546 				   USEC_PER_SEC /* to MHz */);
547 			break;
548 		case I915_PMU_REQUESTED_FREQUENCY:
549 			val =
550 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
551 				   USEC_PER_SEC /* to MHz */);
552 			break;
553 		case I915_PMU_INTERRUPTS:
554 			val = count_interrupts(i915);
555 			break;
556 		case I915_PMU_RC6_RESIDENCY:
557 			val = get_rc6(i915);
558 			break;
559 		}
560 	}
561 
562 	return val;
563 }
564 
565 static void i915_pmu_event_read(struct perf_event *event)
566 {
567 	struct hw_perf_event *hwc = &event->hw;
568 	u64 prev, new;
569 
570 again:
571 	prev = local64_read(&hwc->prev_count);
572 	new = __i915_pmu_event_read(event);
573 
574 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
575 		goto again;
576 
577 	local64_add(new - prev, &event->count);
578 }
579 
580 static void i915_pmu_enable(struct perf_event *event)
581 {
582 	struct drm_i915_private *i915 =
583 		container_of(event->pmu, typeof(*i915), pmu.base);
584 	unsigned int bit = event_enabled_bit(event);
585 	unsigned long flags;
586 
587 	spin_lock_irqsave(&i915->pmu.lock, flags);
588 
589 	/*
590 	 * Update the bitmask of enabled events and increment
591 	 * the event reference counter.
592 	 */
593 	BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
594 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
595 	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
596 	i915->pmu.enable |= BIT_ULL(bit);
597 	i915->pmu.enable_count[bit]++;
598 
599 	/*
600 	 * Start the sampling timer if needed and not already enabled.
601 	 */
602 	__i915_pmu_maybe_start_timer(i915);
603 
604 	/*
605 	 * For per-engine events the bitmask and reference counting
606 	 * is stored per engine.
607 	 */
608 	if (is_engine_event(event)) {
609 		u8 sample = engine_event_sample(event);
610 		struct intel_engine_cs *engine;
611 
612 		engine = intel_engine_lookup_user(i915,
613 						  engine_event_class(event),
614 						  engine_event_instance(event));
615 
616 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
617 			     I915_ENGINE_SAMPLE_COUNT);
618 		BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) !=
619 			     I915_ENGINE_SAMPLE_COUNT);
620 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
621 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
622 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
623 
624 		engine->pmu.enable |= BIT(sample);
625 		engine->pmu.enable_count[sample]++;
626 	}
627 
628 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
629 
630 	/*
631 	 * Store the current counter value so we can report the correct delta
632 	 * for all listeners. Even when the event was already enabled and has
633 	 * an existing non-zero value.
634 	 */
635 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
636 }
637 
638 static void i915_pmu_disable(struct perf_event *event)
639 {
640 	struct drm_i915_private *i915 =
641 		container_of(event->pmu, typeof(*i915), pmu.base);
642 	unsigned int bit = event_enabled_bit(event);
643 	unsigned long flags;
644 
645 	spin_lock_irqsave(&i915->pmu.lock, flags);
646 
647 	if (is_engine_event(event)) {
648 		u8 sample = engine_event_sample(event);
649 		struct intel_engine_cs *engine;
650 
651 		engine = intel_engine_lookup_user(i915,
652 						  engine_event_class(event),
653 						  engine_event_instance(event));
654 
655 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
656 		GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
657 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
658 
659 		/*
660 		 * Decrement the reference count and clear the enabled
661 		 * bitmask when the last listener on an event goes away.
662 		 */
663 		if (--engine->pmu.enable_count[sample] == 0)
664 			engine->pmu.enable &= ~BIT(sample);
665 	}
666 
667 	GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
668 	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
669 	/*
670 	 * Decrement the reference count and clear the enabled
671 	 * bitmask when the last listener on an event goes away.
672 	 */
673 	if (--i915->pmu.enable_count[bit] == 0) {
674 		i915->pmu.enable &= ~BIT_ULL(bit);
675 		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
676 	}
677 
678 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
679 }
680 
681 static void i915_pmu_event_start(struct perf_event *event, int flags)
682 {
683 	i915_pmu_enable(event);
684 	event->hw.state = 0;
685 }
686 
687 static void i915_pmu_event_stop(struct perf_event *event, int flags)
688 {
689 	if (flags & PERF_EF_UPDATE)
690 		i915_pmu_event_read(event);
691 	i915_pmu_disable(event);
692 	event->hw.state = PERF_HES_STOPPED;
693 }
694 
695 static int i915_pmu_event_add(struct perf_event *event, int flags)
696 {
697 	if (flags & PERF_EF_START)
698 		i915_pmu_event_start(event, flags);
699 
700 	return 0;
701 }
702 
703 static void i915_pmu_event_del(struct perf_event *event, int flags)
704 {
705 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
706 }
707 
708 static int i915_pmu_event_event_idx(struct perf_event *event)
709 {
710 	return 0;
711 }
712 
713 struct i915_str_attribute {
714 	struct device_attribute attr;
715 	const char *str;
716 };
717 
718 static ssize_t i915_pmu_format_show(struct device *dev,
719 				    struct device_attribute *attr, char *buf)
720 {
721 	struct i915_str_attribute *eattr;
722 
723 	eattr = container_of(attr, struct i915_str_attribute, attr);
724 	return sprintf(buf, "%s\n", eattr->str);
725 }
726 
727 #define I915_PMU_FORMAT_ATTR(_name, _config) \
728 	(&((struct i915_str_attribute[]) { \
729 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
730 		  .str = _config, } \
731 	})[0].attr.attr)
732 
733 static struct attribute *i915_pmu_format_attrs[] = {
734 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
735 	NULL,
736 };
737 
738 static const struct attribute_group i915_pmu_format_attr_group = {
739 	.name = "format",
740 	.attrs = i915_pmu_format_attrs,
741 };
742 
743 struct i915_ext_attribute {
744 	struct device_attribute attr;
745 	unsigned long val;
746 };
747 
748 static ssize_t i915_pmu_event_show(struct device *dev,
749 				   struct device_attribute *attr, char *buf)
750 {
751 	struct i915_ext_attribute *eattr;
752 
753 	eattr = container_of(attr, struct i915_ext_attribute, attr);
754 	return sprintf(buf, "config=0x%lx\n", eattr->val);
755 }
756 
757 static struct attribute_group i915_pmu_events_attr_group = {
758 	.name = "events",
759 	/* Patch in attrs at runtime. */
760 };
761 
762 static ssize_t
763 i915_pmu_get_attr_cpumask(struct device *dev,
764 			  struct device_attribute *attr,
765 			  char *buf)
766 {
767 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
768 }
769 
770 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
771 
772 static struct attribute *i915_cpumask_attrs[] = {
773 	&dev_attr_cpumask.attr,
774 	NULL,
775 };
776 
777 static const struct attribute_group i915_pmu_cpumask_attr_group = {
778 	.attrs = i915_cpumask_attrs,
779 };
780 
781 static const struct attribute_group *i915_pmu_attr_groups[] = {
782 	&i915_pmu_format_attr_group,
783 	&i915_pmu_events_attr_group,
784 	&i915_pmu_cpumask_attr_group,
785 	NULL
786 };
787 
788 #define __event(__config, __name, __unit) \
789 { \
790 	.config = (__config), \
791 	.name = (__name), \
792 	.unit = (__unit), \
793 }
794 
795 #define __engine_event(__sample, __name) \
796 { \
797 	.sample = (__sample), \
798 	.name = (__name), \
799 }
800 
801 static struct i915_ext_attribute *
802 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
803 {
804 	sysfs_attr_init(&attr->attr.attr);
805 	attr->attr.attr.name = name;
806 	attr->attr.attr.mode = 0444;
807 	attr->attr.show = i915_pmu_event_show;
808 	attr->val = config;
809 
810 	return ++attr;
811 }
812 
813 static struct perf_pmu_events_attr *
814 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
815 	     const char *str)
816 {
817 	sysfs_attr_init(&attr->attr.attr);
818 	attr->attr.attr.name = name;
819 	attr->attr.attr.mode = 0444;
820 	attr->attr.show = perf_event_sysfs_show;
821 	attr->event_str = str;
822 
823 	return ++attr;
824 }
825 
826 static struct attribute **
827 create_event_attributes(struct drm_i915_private *i915)
828 {
829 	static const struct {
830 		u64 config;
831 		const char *name;
832 		const char *unit;
833 	} events[] = {
834 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
835 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
836 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
837 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
838 	};
839 	static const struct {
840 		enum drm_i915_pmu_engine_sample sample;
841 		char *name;
842 	} engine_events[] = {
843 		__engine_event(I915_SAMPLE_BUSY, "busy"),
844 		__engine_event(I915_SAMPLE_SEMA, "sema"),
845 		__engine_event(I915_SAMPLE_WAIT, "wait"),
846 	};
847 	unsigned int count = 0;
848 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
849 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
850 	struct attribute **attr = NULL, **attr_iter;
851 	struct intel_engine_cs *engine;
852 	enum intel_engine_id id;
853 	unsigned int i;
854 
855 	/* Count how many counters we will be exposing. */
856 	for (i = 0; i < ARRAY_SIZE(events); i++) {
857 		if (!config_status(i915, events[i].config))
858 			count++;
859 	}
860 
861 	for_each_engine(engine, i915, id) {
862 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
863 			if (!engine_event_status(engine,
864 						 engine_events[i].sample))
865 				count++;
866 		}
867 	}
868 
869 	/* Allocate attribute objects and table. */
870 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
871 	if (!i915_attr)
872 		goto err_alloc;
873 
874 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
875 	if (!pmu_attr)
876 		goto err_alloc;
877 
878 	/* Max one pointer of each attribute type plus a termination entry. */
879 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
880 	if (!attr)
881 		goto err_alloc;
882 
883 	i915_iter = i915_attr;
884 	pmu_iter = pmu_attr;
885 	attr_iter = attr;
886 
887 	/* Initialize supported non-engine counters. */
888 	for (i = 0; i < ARRAY_SIZE(events); i++) {
889 		char *str;
890 
891 		if (config_status(i915, events[i].config))
892 			continue;
893 
894 		str = kstrdup(events[i].name, GFP_KERNEL);
895 		if (!str)
896 			goto err;
897 
898 		*attr_iter++ = &i915_iter->attr.attr;
899 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
900 
901 		if (events[i].unit) {
902 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
903 			if (!str)
904 				goto err;
905 
906 			*attr_iter++ = &pmu_iter->attr.attr;
907 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
908 		}
909 	}
910 
911 	/* Initialize supported engine counters. */
912 	for_each_engine(engine, i915, id) {
913 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
914 			char *str;
915 
916 			if (engine_event_status(engine,
917 						engine_events[i].sample))
918 				continue;
919 
920 			str = kasprintf(GFP_KERNEL, "%s-%s",
921 					engine->name, engine_events[i].name);
922 			if (!str)
923 				goto err;
924 
925 			*attr_iter++ = &i915_iter->attr.attr;
926 			i915_iter =
927 				add_i915_attr(i915_iter, str,
928 					      __I915_PMU_ENGINE(engine->uabi_class,
929 								engine->instance,
930 								engine_events[i].sample));
931 
932 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
933 					engine->name, engine_events[i].name);
934 			if (!str)
935 				goto err;
936 
937 			*attr_iter++ = &pmu_iter->attr.attr;
938 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
939 		}
940 	}
941 
942 	i915->pmu.i915_attr = i915_attr;
943 	i915->pmu.pmu_attr = pmu_attr;
944 
945 	return attr;
946 
947 err:;
948 	for (attr_iter = attr; *attr_iter; attr_iter++)
949 		kfree((*attr_iter)->name);
950 
951 err_alloc:
952 	kfree(attr);
953 	kfree(i915_attr);
954 	kfree(pmu_attr);
955 
956 	return NULL;
957 }
958 
959 static void free_event_attributes(struct drm_i915_private *i915)
960 {
961 	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
962 
963 	for (; *attr_iter; attr_iter++)
964 		kfree((*attr_iter)->name);
965 
966 	kfree(i915_pmu_events_attr_group.attrs);
967 	kfree(i915->pmu.i915_attr);
968 	kfree(i915->pmu.pmu_attr);
969 
970 	i915_pmu_events_attr_group.attrs = NULL;
971 	i915->pmu.i915_attr = NULL;
972 	i915->pmu.pmu_attr = NULL;
973 }
974 
975 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
976 {
977 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
978 
979 	GEM_BUG_ON(!pmu->base.event_init);
980 
981 	/* Select the first online CPU as a designated reader. */
982 	if (!cpumask_weight(&i915_pmu_cpumask))
983 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
984 
985 	return 0;
986 }
987 
988 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
989 {
990 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
991 	unsigned int target;
992 
993 	GEM_BUG_ON(!pmu->base.event_init);
994 
995 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
996 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
997 		/* Migrate events if there is a valid target */
998 		if (target < nr_cpu_ids) {
999 			cpumask_set_cpu(target, &i915_pmu_cpumask);
1000 			perf_pmu_migrate_context(&pmu->base, cpu, target);
1001 		}
1002 	}
1003 
1004 	return 0;
1005 }
1006 
1007 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1008 
1009 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1010 {
1011 	enum cpuhp_state slot;
1012 	int ret;
1013 
1014 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1015 				      "perf/x86/intel/i915:online",
1016 				      i915_pmu_cpu_online,
1017 				      i915_pmu_cpu_offline);
1018 	if (ret < 0)
1019 		return ret;
1020 
1021 	slot = ret;
1022 	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1023 	if (ret) {
1024 		cpuhp_remove_multi_state(slot);
1025 		return ret;
1026 	}
1027 
1028 	cpuhp_slot = slot;
1029 	return 0;
1030 }
1031 
1032 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1033 {
1034 	WARN_ON(cpuhp_slot == CPUHP_INVALID);
1035 	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1036 	cpuhp_remove_multi_state(cpuhp_slot);
1037 }
1038 
1039 void i915_pmu_register(struct drm_i915_private *i915)
1040 {
1041 	int ret;
1042 
1043 	if (INTEL_GEN(i915) <= 2) {
1044 		DRM_INFO("PMU not supported for this GPU.");
1045 		return;
1046 	}
1047 
1048 	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1049 	if (!i915_pmu_events_attr_group.attrs) {
1050 		ret = -ENOMEM;
1051 		goto err;
1052 	}
1053 
1054 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
1055 	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
1056 	i915->pmu.base.event_init	= i915_pmu_event_init;
1057 	i915->pmu.base.add		= i915_pmu_event_add;
1058 	i915->pmu.base.del		= i915_pmu_event_del;
1059 	i915->pmu.base.start		= i915_pmu_event_start;
1060 	i915->pmu.base.stop		= i915_pmu_event_stop;
1061 	i915->pmu.base.read		= i915_pmu_event_read;
1062 	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
1063 
1064 	spin_lock_init(&i915->pmu.lock);
1065 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1066 	i915->pmu.timer.function = i915_sample;
1067 
1068 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1069 	if (ret)
1070 		goto err;
1071 
1072 	ret = i915_pmu_register_cpuhp_state(i915);
1073 	if (ret)
1074 		goto err_unreg;
1075 
1076 	return;
1077 
1078 err_unreg:
1079 	perf_pmu_unregister(&i915->pmu.base);
1080 err:
1081 	i915->pmu.base.event_init = NULL;
1082 	free_event_attributes(i915);
1083 	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1084 }
1085 
1086 void i915_pmu_unregister(struct drm_i915_private *i915)
1087 {
1088 	if (!i915->pmu.base.event_init)
1089 		return;
1090 
1091 	WARN_ON(i915->pmu.enable);
1092 
1093 	hrtimer_cancel(&i915->pmu.timer);
1094 
1095 	i915_pmu_unregister_cpuhp_state(i915);
1096 
1097 	perf_pmu_unregister(&i915->pmu.base);
1098 	i915->pmu.base.event_init = NULL;
1099 	free_event_attributes(i915);
1100 }
1101