xref: /openbmc/linux/drivers/gpu/drm/i915/i915_pmu.c (revision ba61bb17496d1664bf7c5c2fd650d5fd78bd0a92)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include "i915_pmu.h"
8 #include "intel_ringbuffer.h"
9 #include "i915_drv.h"
10 
11 /* Frequency for the sampling timer for events which need it. */
12 #define FREQUENCY 200
13 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
14 
15 #define ENGINE_SAMPLE_MASK \
16 	(BIT(I915_SAMPLE_BUSY) | \
17 	 BIT(I915_SAMPLE_WAIT) | \
18 	 BIT(I915_SAMPLE_SEMA))
19 
20 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
21 
22 static cpumask_t i915_pmu_cpumask;
23 
24 static u8 engine_config_sample(u64 config)
25 {
26 	return config & I915_PMU_SAMPLE_MASK;
27 }
28 
29 static u8 engine_event_sample(struct perf_event *event)
30 {
31 	return engine_config_sample(event->attr.config);
32 }
33 
34 static u8 engine_event_class(struct perf_event *event)
35 {
36 	return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff;
37 }
38 
39 static u8 engine_event_instance(struct perf_event *event)
40 {
41 	return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff;
42 }
43 
44 static bool is_engine_config(u64 config)
45 {
46 	return config < __I915_PMU_OTHER(0);
47 }
48 
49 static unsigned int config_enabled_bit(u64 config)
50 {
51 	if (is_engine_config(config))
52 		return engine_config_sample(config);
53 	else
54 		return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
55 }
56 
57 static u64 config_enabled_mask(u64 config)
58 {
59 	return BIT_ULL(config_enabled_bit(config));
60 }
61 
62 static bool is_engine_event(struct perf_event *event)
63 {
64 	return is_engine_config(event->attr.config);
65 }
66 
67 static unsigned int event_enabled_bit(struct perf_event *event)
68 {
69 	return config_enabled_bit(event->attr.config);
70 }
71 
72 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
73 {
74 	u64 enable;
75 
76 	/*
77 	 * Only some counters need the sampling timer.
78 	 *
79 	 * We start with a bitmask of all currently enabled events.
80 	 */
81 	enable = i915->pmu.enable;
82 
83 	/*
84 	 * Mask out all the ones which do not need the timer, or in
85 	 * other words keep all the ones that could need the timer.
86 	 */
87 	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
88 		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
89 		  ENGINE_SAMPLE_MASK;
90 
91 	/*
92 	 * When the GPU is idle per-engine counters do not need to be
93 	 * running so clear those bits out.
94 	 */
95 	if (!gpu_active)
96 		enable &= ~ENGINE_SAMPLE_MASK;
97 	/*
98 	 * Also there is software busyness tracking available we do not
99 	 * need the timer for I915_SAMPLE_BUSY counter.
100 	 *
101 	 * Use RCS as proxy for all engines.
102 	 */
103 	else if (intel_engine_supports_stats(i915->engine[RCS]))
104 		enable &= ~BIT(I915_SAMPLE_BUSY);
105 
106 	/*
107 	 * If some bits remain it means we need the sampling timer running.
108 	 */
109 	return enable;
110 }
111 
112 void i915_pmu_gt_parked(struct drm_i915_private *i915)
113 {
114 	if (!i915->pmu.base.event_init)
115 		return;
116 
117 	spin_lock_irq(&i915->pmu.lock);
118 	/*
119 	 * Signal sampling timer to stop if only engine events are enabled and
120 	 * GPU went idle.
121 	 */
122 	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
123 	spin_unlock_irq(&i915->pmu.lock);
124 }
125 
126 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
127 {
128 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
129 		i915->pmu.timer_enabled = true;
130 		i915->pmu.timer_last = ktime_get();
131 		hrtimer_start_range_ns(&i915->pmu.timer,
132 				       ns_to_ktime(PERIOD), 0,
133 				       HRTIMER_MODE_REL_PINNED);
134 	}
135 }
136 
137 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
138 {
139 	if (!i915->pmu.base.event_init)
140 		return;
141 
142 	spin_lock_irq(&i915->pmu.lock);
143 	/*
144 	 * Re-enable sampling timer when GPU goes active.
145 	 */
146 	__i915_pmu_maybe_start_timer(i915);
147 	spin_unlock_irq(&i915->pmu.lock);
148 }
149 
150 static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
151 {
152 	if (!fw)
153 		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
154 
155 	return true;
156 }
157 
158 static void
159 add_sample(struct i915_pmu_sample *sample, u32 val)
160 {
161 	sample->cur += val;
162 }
163 
164 static void
165 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
166 {
167 	struct intel_engine_cs *engine;
168 	enum intel_engine_id id;
169 	bool fw = false;
170 
171 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
172 		return;
173 
174 	if (!dev_priv->gt.awake)
175 		return;
176 
177 	if (!intel_runtime_pm_get_if_in_use(dev_priv))
178 		return;
179 
180 	for_each_engine(engine, dev_priv, id) {
181 		u32 current_seqno = intel_engine_get_seqno(engine);
182 		u32 last_seqno = intel_engine_last_submit(engine);
183 		u32 val;
184 
185 		val = !i915_seqno_passed(current_seqno, last_seqno);
186 
187 		if (val)
188 			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
189 				   period_ns);
190 
191 		if (val && (engine->pmu.enable &
192 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
193 			fw = grab_forcewake(dev_priv, fw);
194 
195 			val = I915_READ_FW(RING_CTL(engine->mmio_base));
196 		} else {
197 			val = 0;
198 		}
199 
200 		if (val & RING_WAIT)
201 			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
202 				   period_ns);
203 
204 		if (val & RING_WAIT_SEMAPHORE)
205 			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
206 				   period_ns);
207 	}
208 
209 	if (fw)
210 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
211 
212 	intel_runtime_pm_put(dev_priv);
213 }
214 
215 static void
216 add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
217 {
218 	sample->cur += mul_u32_u32(val, mul);
219 }
220 
221 static void
222 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
223 {
224 	if (dev_priv->pmu.enable &
225 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
226 		u32 val;
227 
228 		val = dev_priv->gt_pm.rps.cur_freq;
229 		if (dev_priv->gt.awake &&
230 		    intel_runtime_pm_get_if_in_use(dev_priv)) {
231 			val = intel_get_cagf(dev_priv,
232 					     I915_READ_NOTRACE(GEN6_RPSTAT1));
233 			intel_runtime_pm_put(dev_priv);
234 		}
235 
236 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
237 				intel_gpu_freq(dev_priv, val),
238 				period_ns / 1000);
239 	}
240 
241 	if (dev_priv->pmu.enable &
242 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
243 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
244 				intel_gpu_freq(dev_priv,
245 					       dev_priv->gt_pm.rps.cur_freq),
246 				period_ns / 1000);
247 	}
248 }
249 
250 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
251 {
252 	struct drm_i915_private *i915 =
253 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
254 	unsigned int period_ns;
255 	ktime_t now;
256 
257 	if (!READ_ONCE(i915->pmu.timer_enabled))
258 		return HRTIMER_NORESTART;
259 
260 	now = ktime_get();
261 	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
262 	i915->pmu.timer_last = now;
263 
264 	/*
265 	 * Strictly speaking the passed in period may not be 100% accurate for
266 	 * all internal calculation, since some amount of time can be spent on
267 	 * grabbing the forcewake. However the potential error from timer call-
268 	 * back delay greatly dominates this so we keep it simple.
269 	 */
270 	engines_sample(i915, period_ns);
271 	frequency_sample(i915, period_ns);
272 
273 	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
274 
275 	return HRTIMER_RESTART;
276 }
277 
278 static u64 count_interrupts(struct drm_i915_private *i915)
279 {
280 	/* open-coded kstat_irqs() */
281 	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
282 	u64 sum = 0;
283 	int cpu;
284 
285 	if (!desc || !desc->kstat_irqs)
286 		return 0;
287 
288 	for_each_possible_cpu(cpu)
289 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
290 
291 	return sum;
292 }
293 
294 static void engine_event_destroy(struct perf_event *event)
295 {
296 	struct drm_i915_private *i915 =
297 		container_of(event->pmu, typeof(*i915), pmu.base);
298 	struct intel_engine_cs *engine;
299 
300 	engine = intel_engine_lookup_user(i915,
301 					  engine_event_class(event),
302 					  engine_event_instance(event));
303 	if (WARN_ON_ONCE(!engine))
304 		return;
305 
306 	if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
307 	    intel_engine_supports_stats(engine))
308 		intel_disable_engine_stats(engine);
309 }
310 
311 static void i915_pmu_event_destroy(struct perf_event *event)
312 {
313 	WARN_ON(event->parent);
314 
315 	if (is_engine_event(event))
316 		engine_event_destroy(event);
317 }
318 
319 static int
320 engine_event_status(struct intel_engine_cs *engine,
321 		    enum drm_i915_pmu_engine_sample sample)
322 {
323 	switch (sample) {
324 	case I915_SAMPLE_BUSY:
325 	case I915_SAMPLE_WAIT:
326 		break;
327 	case I915_SAMPLE_SEMA:
328 		if (INTEL_GEN(engine->i915) < 6)
329 			return -ENODEV;
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static int
339 config_status(struct drm_i915_private *i915, u64 config)
340 {
341 	switch (config) {
342 	case I915_PMU_ACTUAL_FREQUENCY:
343 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
344 			/* Requires a mutex for sampling! */
345 			return -ENODEV;
346 		/* Fall-through. */
347 	case I915_PMU_REQUESTED_FREQUENCY:
348 		if (INTEL_GEN(i915) < 6)
349 			return -ENODEV;
350 		break;
351 	case I915_PMU_INTERRUPTS:
352 		break;
353 	case I915_PMU_RC6_RESIDENCY:
354 		if (!HAS_RC6(i915))
355 			return -ENODEV;
356 		break;
357 	default:
358 		return -ENOENT;
359 	}
360 
361 	return 0;
362 }
363 
364 static int engine_event_init(struct perf_event *event)
365 {
366 	struct drm_i915_private *i915 =
367 		container_of(event->pmu, typeof(*i915), pmu.base);
368 	struct intel_engine_cs *engine;
369 	u8 sample;
370 	int ret;
371 
372 	engine = intel_engine_lookup_user(i915, engine_event_class(event),
373 					  engine_event_instance(event));
374 	if (!engine)
375 		return -ENODEV;
376 
377 	sample = engine_event_sample(event);
378 	ret = engine_event_status(engine, sample);
379 	if (ret)
380 		return ret;
381 
382 	if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
383 		ret = intel_enable_engine_stats(engine);
384 
385 	return ret;
386 }
387 
388 static int i915_pmu_event_init(struct perf_event *event)
389 {
390 	struct drm_i915_private *i915 =
391 		container_of(event->pmu, typeof(*i915), pmu.base);
392 	int ret;
393 
394 	if (event->attr.type != event->pmu->type)
395 		return -ENOENT;
396 
397 	/* unsupported modes and filters */
398 	if (event->attr.sample_period) /* no sampling */
399 		return -EINVAL;
400 
401 	if (has_branch_stack(event))
402 		return -EOPNOTSUPP;
403 
404 	if (event->cpu < 0)
405 		return -EINVAL;
406 
407 	/* only allow running on one cpu at a time */
408 	if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask))
409 		return -EINVAL;
410 
411 	if (is_engine_event(event))
412 		ret = engine_event_init(event);
413 	else
414 		ret = config_status(i915, event->attr.config);
415 	if (ret)
416 		return ret;
417 
418 	if (!event->parent)
419 		event->destroy = i915_pmu_event_destroy;
420 
421 	return 0;
422 }
423 
424 static u64 __get_rc6(struct drm_i915_private *i915)
425 {
426 	u64 val;
427 
428 	val = intel_rc6_residency_ns(i915,
429 				     IS_VALLEYVIEW(i915) ?
430 				     VLV_GT_RENDER_RC6 :
431 				     GEN6_GT_GFX_RC6);
432 
433 	if (HAS_RC6p(i915))
434 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
435 
436 	if (HAS_RC6pp(i915))
437 		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
438 
439 	return val;
440 }
441 
442 static u64 get_rc6(struct drm_i915_private *i915)
443 {
444 #if IS_ENABLED(CONFIG_PM)
445 	unsigned long flags;
446 	u64 val;
447 
448 	if (intel_runtime_pm_get_if_in_use(i915)) {
449 		val = __get_rc6(i915);
450 		intel_runtime_pm_put(i915);
451 
452 		/*
453 		 * If we are coming back from being runtime suspended we must
454 		 * be careful not to report a larger value than returned
455 		 * previously.
456 		 */
457 
458 		spin_lock_irqsave(&i915->pmu.lock, flags);
459 
460 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
461 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
462 			i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
463 		} else {
464 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
465 		}
466 
467 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
468 	} else {
469 		struct pci_dev *pdev = i915->drm.pdev;
470 		struct device *kdev = &pdev->dev;
471 
472 		/*
473 		 * We are runtime suspended.
474 		 *
475 		 * Report the delta from when the device was suspended to now,
476 		 * on top of the last known real value, as the approximated RC6
477 		 * counter value.
478 		 */
479 		spin_lock_irqsave(&i915->pmu.lock, flags);
480 		spin_lock(&kdev->power.lock);
481 
482 		/*
483 		 * After the above branch intel_runtime_pm_get_if_in_use failed
484 		 * to get the runtime PM reference we cannot assume we are in
485 		 * runtime suspend since we can either: a) race with coming out
486 		 * of it before we took the power.lock, or b) there are other
487 		 * states than suspended which can bring us here.
488 		 *
489 		 * We need to double-check that we are indeed currently runtime
490 		 * suspended and if not we cannot do better than report the last
491 		 * known RC6 value.
492 		 */
493 		if (kdev->power.runtime_status == RPM_SUSPENDED) {
494 			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
495 				i915->pmu.suspended_jiffies_last =
496 						  kdev->power.suspended_jiffies;
497 
498 			val = kdev->power.suspended_jiffies -
499 			      i915->pmu.suspended_jiffies_last;
500 			val += jiffies - kdev->power.accounting_timestamp;
501 
502 			val = jiffies_to_nsecs(val);
503 			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
504 
505 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
506 		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
507 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
508 		} else {
509 			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
510 		}
511 
512 		spin_unlock(&kdev->power.lock);
513 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
514 	}
515 
516 	return val;
517 #else
518 	return __get_rc6(i915);
519 #endif
520 }
521 
522 static u64 __i915_pmu_event_read(struct perf_event *event)
523 {
524 	struct drm_i915_private *i915 =
525 		container_of(event->pmu, typeof(*i915), pmu.base);
526 	u64 val = 0;
527 
528 	if (is_engine_event(event)) {
529 		u8 sample = engine_event_sample(event);
530 		struct intel_engine_cs *engine;
531 
532 		engine = intel_engine_lookup_user(i915,
533 						  engine_event_class(event),
534 						  engine_event_instance(event));
535 
536 		if (WARN_ON_ONCE(!engine)) {
537 			/* Do nothing */
538 		} else if (sample == I915_SAMPLE_BUSY &&
539 			   intel_engine_supports_stats(engine)) {
540 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
541 		} else {
542 			val = engine->pmu.sample[sample].cur;
543 		}
544 	} else {
545 		switch (event->attr.config) {
546 		case I915_PMU_ACTUAL_FREQUENCY:
547 			val =
548 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
549 				   USEC_PER_SEC /* to MHz */);
550 			break;
551 		case I915_PMU_REQUESTED_FREQUENCY:
552 			val =
553 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
554 				   USEC_PER_SEC /* to MHz */);
555 			break;
556 		case I915_PMU_INTERRUPTS:
557 			val = count_interrupts(i915);
558 			break;
559 		case I915_PMU_RC6_RESIDENCY:
560 			val = get_rc6(i915);
561 			break;
562 		}
563 	}
564 
565 	return val;
566 }
567 
568 static void i915_pmu_event_read(struct perf_event *event)
569 {
570 	struct hw_perf_event *hwc = &event->hw;
571 	u64 prev, new;
572 
573 again:
574 	prev = local64_read(&hwc->prev_count);
575 	new = __i915_pmu_event_read(event);
576 
577 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
578 		goto again;
579 
580 	local64_add(new - prev, &event->count);
581 }
582 
583 static void i915_pmu_enable(struct perf_event *event)
584 {
585 	struct drm_i915_private *i915 =
586 		container_of(event->pmu, typeof(*i915), pmu.base);
587 	unsigned int bit = event_enabled_bit(event);
588 	unsigned long flags;
589 
590 	spin_lock_irqsave(&i915->pmu.lock, flags);
591 
592 	/*
593 	 * Update the bitmask of enabled events and increment
594 	 * the event reference counter.
595 	 */
596 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
597 	GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
598 	i915->pmu.enable |= BIT_ULL(bit);
599 	i915->pmu.enable_count[bit]++;
600 
601 	/*
602 	 * Start the sampling timer if needed and not already enabled.
603 	 */
604 	__i915_pmu_maybe_start_timer(i915);
605 
606 	/*
607 	 * For per-engine events the bitmask and reference counting
608 	 * is stored per engine.
609 	 */
610 	if (is_engine_event(event)) {
611 		u8 sample = engine_event_sample(event);
612 		struct intel_engine_cs *engine;
613 
614 		engine = intel_engine_lookup_user(i915,
615 						  engine_event_class(event),
616 						  engine_event_instance(event));
617 		GEM_BUG_ON(!engine);
618 		engine->pmu.enable |= BIT(sample);
619 
620 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
621 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
622 		engine->pmu.enable_count[sample]++;
623 	}
624 
625 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
626 
627 	/*
628 	 * Store the current counter value so we can report the correct delta
629 	 * for all listeners. Even when the event was already enabled and has
630 	 * an existing non-zero value.
631 	 */
632 	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
633 }
634 
635 static void i915_pmu_disable(struct perf_event *event)
636 {
637 	struct drm_i915_private *i915 =
638 		container_of(event->pmu, typeof(*i915), pmu.base);
639 	unsigned int bit = event_enabled_bit(event);
640 	unsigned long flags;
641 
642 	spin_lock_irqsave(&i915->pmu.lock, flags);
643 
644 	if (is_engine_event(event)) {
645 		u8 sample = engine_event_sample(event);
646 		struct intel_engine_cs *engine;
647 
648 		engine = intel_engine_lookup_user(i915,
649 						  engine_event_class(event),
650 						  engine_event_instance(event));
651 		GEM_BUG_ON(!engine);
652 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
653 		GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
654 		/*
655 		 * Decrement the reference count and clear the enabled
656 		 * bitmask when the last listener on an event goes away.
657 		 */
658 		if (--engine->pmu.enable_count[sample] == 0)
659 			engine->pmu.enable &= ~BIT(sample);
660 	}
661 
662 	GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
663 	GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
664 	/*
665 	 * Decrement the reference count and clear the enabled
666 	 * bitmask when the last listener on an event goes away.
667 	 */
668 	if (--i915->pmu.enable_count[bit] == 0) {
669 		i915->pmu.enable &= ~BIT_ULL(bit);
670 		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
671 	}
672 
673 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
674 }
675 
676 static void i915_pmu_event_start(struct perf_event *event, int flags)
677 {
678 	i915_pmu_enable(event);
679 	event->hw.state = 0;
680 }
681 
682 static void i915_pmu_event_stop(struct perf_event *event, int flags)
683 {
684 	if (flags & PERF_EF_UPDATE)
685 		i915_pmu_event_read(event);
686 	i915_pmu_disable(event);
687 	event->hw.state = PERF_HES_STOPPED;
688 }
689 
690 static int i915_pmu_event_add(struct perf_event *event, int flags)
691 {
692 	if (flags & PERF_EF_START)
693 		i915_pmu_event_start(event, flags);
694 
695 	return 0;
696 }
697 
698 static void i915_pmu_event_del(struct perf_event *event, int flags)
699 {
700 	i915_pmu_event_stop(event, PERF_EF_UPDATE);
701 }
702 
703 static int i915_pmu_event_event_idx(struct perf_event *event)
704 {
705 	return 0;
706 }
707 
708 struct i915_str_attribute {
709 	struct device_attribute attr;
710 	const char *str;
711 };
712 
713 static ssize_t i915_pmu_format_show(struct device *dev,
714 				    struct device_attribute *attr, char *buf)
715 {
716 	struct i915_str_attribute *eattr;
717 
718 	eattr = container_of(attr, struct i915_str_attribute, attr);
719 	return sprintf(buf, "%s\n", eattr->str);
720 }
721 
722 #define I915_PMU_FORMAT_ATTR(_name, _config) \
723 	(&((struct i915_str_attribute[]) { \
724 		{ .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
725 		  .str = _config, } \
726 	})[0].attr.attr)
727 
728 static struct attribute *i915_pmu_format_attrs[] = {
729 	I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"),
730 	NULL,
731 };
732 
733 static const struct attribute_group i915_pmu_format_attr_group = {
734 	.name = "format",
735 	.attrs = i915_pmu_format_attrs,
736 };
737 
738 struct i915_ext_attribute {
739 	struct device_attribute attr;
740 	unsigned long val;
741 };
742 
743 static ssize_t i915_pmu_event_show(struct device *dev,
744 				   struct device_attribute *attr, char *buf)
745 {
746 	struct i915_ext_attribute *eattr;
747 
748 	eattr = container_of(attr, struct i915_ext_attribute, attr);
749 	return sprintf(buf, "config=0x%lx\n", eattr->val);
750 }
751 
752 static struct attribute_group i915_pmu_events_attr_group = {
753 	.name = "events",
754 	/* Patch in attrs at runtime. */
755 };
756 
757 static ssize_t
758 i915_pmu_get_attr_cpumask(struct device *dev,
759 			  struct device_attribute *attr,
760 			  char *buf)
761 {
762 	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
763 }
764 
765 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL);
766 
767 static struct attribute *i915_cpumask_attrs[] = {
768 	&dev_attr_cpumask.attr,
769 	NULL,
770 };
771 
772 static const struct attribute_group i915_pmu_cpumask_attr_group = {
773 	.attrs = i915_cpumask_attrs,
774 };
775 
776 static const struct attribute_group *i915_pmu_attr_groups[] = {
777 	&i915_pmu_format_attr_group,
778 	&i915_pmu_events_attr_group,
779 	&i915_pmu_cpumask_attr_group,
780 	NULL
781 };
782 
783 #define __event(__config, __name, __unit) \
784 { \
785 	.config = (__config), \
786 	.name = (__name), \
787 	.unit = (__unit), \
788 }
789 
790 #define __engine_event(__sample, __name) \
791 { \
792 	.sample = (__sample), \
793 	.name = (__name), \
794 }
795 
796 static struct i915_ext_attribute *
797 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config)
798 {
799 	sysfs_attr_init(&attr->attr.attr);
800 	attr->attr.attr.name = name;
801 	attr->attr.attr.mode = 0444;
802 	attr->attr.show = i915_pmu_event_show;
803 	attr->val = config;
804 
805 	return ++attr;
806 }
807 
808 static struct perf_pmu_events_attr *
809 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
810 	     const char *str)
811 {
812 	sysfs_attr_init(&attr->attr.attr);
813 	attr->attr.attr.name = name;
814 	attr->attr.attr.mode = 0444;
815 	attr->attr.show = perf_event_sysfs_show;
816 	attr->event_str = str;
817 
818 	return ++attr;
819 }
820 
821 static struct attribute **
822 create_event_attributes(struct drm_i915_private *i915)
823 {
824 	static const struct {
825 		u64 config;
826 		const char *name;
827 		const char *unit;
828 	} events[] = {
829 		__event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
830 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
831 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
832 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
833 	};
834 	static const struct {
835 		enum drm_i915_pmu_engine_sample sample;
836 		char *name;
837 	} engine_events[] = {
838 		__engine_event(I915_SAMPLE_BUSY, "busy"),
839 		__engine_event(I915_SAMPLE_SEMA, "sema"),
840 		__engine_event(I915_SAMPLE_WAIT, "wait"),
841 	};
842 	unsigned int count = 0;
843 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
844 	struct i915_ext_attribute *i915_attr = NULL, *i915_iter;
845 	struct attribute **attr = NULL, **attr_iter;
846 	struct intel_engine_cs *engine;
847 	enum intel_engine_id id;
848 	unsigned int i;
849 
850 	/* Count how many counters we will be exposing. */
851 	for (i = 0; i < ARRAY_SIZE(events); i++) {
852 		if (!config_status(i915, events[i].config))
853 			count++;
854 	}
855 
856 	for_each_engine(engine, i915, id) {
857 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
858 			if (!engine_event_status(engine,
859 						 engine_events[i].sample))
860 				count++;
861 		}
862 	}
863 
864 	/* Allocate attribute objects and table. */
865 	i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL);
866 	if (!i915_attr)
867 		goto err_alloc;
868 
869 	pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL);
870 	if (!pmu_attr)
871 		goto err_alloc;
872 
873 	/* Max one pointer of each attribute type plus a termination entry. */
874 	attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL);
875 	if (!attr)
876 		goto err_alloc;
877 
878 	i915_iter = i915_attr;
879 	pmu_iter = pmu_attr;
880 	attr_iter = attr;
881 
882 	/* Initialize supported non-engine counters. */
883 	for (i = 0; i < ARRAY_SIZE(events); i++) {
884 		char *str;
885 
886 		if (config_status(i915, events[i].config))
887 			continue;
888 
889 		str = kstrdup(events[i].name, GFP_KERNEL);
890 		if (!str)
891 			goto err;
892 
893 		*attr_iter++ = &i915_iter->attr.attr;
894 		i915_iter = add_i915_attr(i915_iter, str, events[i].config);
895 
896 		if (events[i].unit) {
897 			str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name);
898 			if (!str)
899 				goto err;
900 
901 			*attr_iter++ = &pmu_iter->attr.attr;
902 			pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit);
903 		}
904 	}
905 
906 	/* Initialize supported engine counters. */
907 	for_each_engine(engine, i915, id) {
908 		for (i = 0; i < ARRAY_SIZE(engine_events); i++) {
909 			char *str;
910 
911 			if (engine_event_status(engine,
912 						engine_events[i].sample))
913 				continue;
914 
915 			str = kasprintf(GFP_KERNEL, "%s-%s",
916 					engine->name, engine_events[i].name);
917 			if (!str)
918 				goto err;
919 
920 			*attr_iter++ = &i915_iter->attr.attr;
921 			i915_iter =
922 				add_i915_attr(i915_iter, str,
923 					      __I915_PMU_ENGINE(engine->uabi_class,
924 								engine->instance,
925 								engine_events[i].sample));
926 
927 			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
928 					engine->name, engine_events[i].name);
929 			if (!str)
930 				goto err;
931 
932 			*attr_iter++ = &pmu_iter->attr.attr;
933 			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
934 		}
935 	}
936 
937 	i915->pmu.i915_attr = i915_attr;
938 	i915->pmu.pmu_attr = pmu_attr;
939 
940 	return attr;
941 
942 err:;
943 	for (attr_iter = attr; *attr_iter; attr_iter++)
944 		kfree((*attr_iter)->name);
945 
946 err_alloc:
947 	kfree(attr);
948 	kfree(i915_attr);
949 	kfree(pmu_attr);
950 
951 	return NULL;
952 }
953 
954 static void free_event_attributes(struct drm_i915_private *i915)
955 {
956 	struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
957 
958 	for (; *attr_iter; attr_iter++)
959 		kfree((*attr_iter)->name);
960 
961 	kfree(i915_pmu_events_attr_group.attrs);
962 	kfree(i915->pmu.i915_attr);
963 	kfree(i915->pmu.pmu_attr);
964 
965 	i915_pmu_events_attr_group.attrs = NULL;
966 	i915->pmu.i915_attr = NULL;
967 	i915->pmu.pmu_attr = NULL;
968 }
969 
970 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
971 {
972 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
973 
974 	GEM_BUG_ON(!pmu->base.event_init);
975 
976 	/* Select the first online CPU as a designated reader. */
977 	if (!cpumask_weight(&i915_pmu_cpumask))
978 		cpumask_set_cpu(cpu, &i915_pmu_cpumask);
979 
980 	return 0;
981 }
982 
983 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
984 {
985 	struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
986 	unsigned int target;
987 
988 	GEM_BUG_ON(!pmu->base.event_init);
989 
990 	if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
991 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
992 		/* Migrate events if there is a valid target */
993 		if (target < nr_cpu_ids) {
994 			cpumask_set_cpu(target, &i915_pmu_cpumask);
995 			perf_pmu_migrate_context(&pmu->base, cpu, target);
996 		}
997 	}
998 
999 	return 0;
1000 }
1001 
1002 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
1003 
1004 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915)
1005 {
1006 	enum cpuhp_state slot;
1007 	int ret;
1008 
1009 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
1010 				      "perf/x86/intel/i915:online",
1011 				      i915_pmu_cpu_online,
1012 				      i915_pmu_cpu_offline);
1013 	if (ret < 0)
1014 		return ret;
1015 
1016 	slot = ret;
1017 	ret = cpuhp_state_add_instance(slot, &i915->pmu.node);
1018 	if (ret) {
1019 		cpuhp_remove_multi_state(slot);
1020 		return ret;
1021 	}
1022 
1023 	cpuhp_slot = slot;
1024 	return 0;
1025 }
1026 
1027 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
1028 {
1029 	WARN_ON(cpuhp_slot == CPUHP_INVALID);
1030 	WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node));
1031 	cpuhp_remove_multi_state(cpuhp_slot);
1032 }
1033 
1034 void i915_pmu_register(struct drm_i915_private *i915)
1035 {
1036 	int ret;
1037 
1038 	if (INTEL_GEN(i915) <= 2) {
1039 		DRM_INFO("PMU not supported for this GPU.");
1040 		return;
1041 	}
1042 
1043 	i915_pmu_events_attr_group.attrs = create_event_attributes(i915);
1044 	if (!i915_pmu_events_attr_group.attrs) {
1045 		ret = -ENOMEM;
1046 		goto err;
1047 	}
1048 
1049 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
1050 	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
1051 	i915->pmu.base.event_init	= i915_pmu_event_init;
1052 	i915->pmu.base.add		= i915_pmu_event_add;
1053 	i915->pmu.base.del		= i915_pmu_event_del;
1054 	i915->pmu.base.start		= i915_pmu_event_start;
1055 	i915->pmu.base.stop		= i915_pmu_event_stop;
1056 	i915->pmu.base.read		= i915_pmu_event_read;
1057 	i915->pmu.base.event_idx	= i915_pmu_event_event_idx;
1058 
1059 	spin_lock_init(&i915->pmu.lock);
1060 	hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1061 	i915->pmu.timer.function = i915_sample;
1062 
1063 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
1064 	if (ret)
1065 		goto err;
1066 
1067 	ret = i915_pmu_register_cpuhp_state(i915);
1068 	if (ret)
1069 		goto err_unreg;
1070 
1071 	return;
1072 
1073 err_unreg:
1074 	perf_pmu_unregister(&i915->pmu.base);
1075 err:
1076 	i915->pmu.base.event_init = NULL;
1077 	free_event_attributes(i915);
1078 	DRM_NOTE("Failed to register PMU! (err=%d)\n", ret);
1079 }
1080 
1081 void i915_pmu_unregister(struct drm_i915_private *i915)
1082 {
1083 	if (!i915->pmu.base.event_init)
1084 		return;
1085 
1086 	WARN_ON(i915->pmu.enable);
1087 
1088 	hrtimer_cancel(&i915->pmu.timer);
1089 
1090 	i915_pmu_unregister_cpuhp_state(i915);
1091 
1092 	perf_pmu_unregister(&i915->pmu.base);
1093 	i915->pmu.base.event_init = NULL;
1094 	free_event_attributes(i915);
1095 }
1096