1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Performance event support for s390x - CPU-measurement Counter Facility
4 *
5 * Copyright IBM Corp. 2012, 2023
6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
7 * Thomas Richter <tmricht@linux.ibm.com>
8 */
9 #define KMSG_COMPONENT "cpum_cf"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/percpu.h>
15 #include <linux/notifier.h>
16 #include <linux/init.h>
17 #include <linux/export.h>
18 #include <linux/miscdevice.h>
19 #include <linux/perf_event.h>
20
21 #include <asm/cpu_mf.h>
22 #include <asm/hwctrset.h>
23 #include <asm/debug.h>
24
25 enum cpumf_ctr_set {
26 CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
27 CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
28 CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
29 CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
30 CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
31
32 /* Maximum number of counter sets */
33 CPUMF_CTR_SET_MAX,
34 };
35
36 #define CPUMF_LCCTL_ENABLE_SHIFT 16
37 #define CPUMF_LCCTL_ACTCTL_SHIFT 0
38
ctr_set_enable(u64 * state,u64 ctrsets)39 static inline void ctr_set_enable(u64 *state, u64 ctrsets)
40 {
41 *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
42 }
43
ctr_set_disable(u64 * state,u64 ctrsets)44 static inline void ctr_set_disable(u64 *state, u64 ctrsets)
45 {
46 *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
47 }
48
ctr_set_start(u64 * state,u64 ctrsets)49 static inline void ctr_set_start(u64 *state, u64 ctrsets)
50 {
51 *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
52 }
53
ctr_set_stop(u64 * state,u64 ctrsets)54 static inline void ctr_set_stop(u64 *state, u64 ctrsets)
55 {
56 *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
57 }
58
ctr_stcctm(enum cpumf_ctr_set set,u64 range,u64 * dest)59 static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
60 {
61 switch (set) {
62 case CPUMF_CTR_SET_BASIC:
63 return stcctm(BASIC, range, dest);
64 case CPUMF_CTR_SET_USER:
65 return stcctm(PROBLEM_STATE, range, dest);
66 case CPUMF_CTR_SET_CRYPTO:
67 return stcctm(CRYPTO_ACTIVITY, range, dest);
68 case CPUMF_CTR_SET_EXT:
69 return stcctm(EXTENDED, range, dest);
70 case CPUMF_CTR_SET_MT_DIAG:
71 return stcctm(MT_DIAG_CLEARING, range, dest);
72 case CPUMF_CTR_SET_MAX:
73 return 3;
74 }
75 return 3;
76 }
77
78 struct cpu_cf_events {
79 refcount_t refcnt; /* Reference count */
80 atomic_t ctr_set[CPUMF_CTR_SET_MAX];
81 u64 state; /* For perf_event_open SVC */
82 u64 dev_state; /* For /dev/hwctr */
83 unsigned int flags;
84 size_t used; /* Bytes used in data */
85 size_t usedss; /* Bytes used in start/stop */
86 unsigned char start[PAGE_SIZE]; /* Counter set at event add */
87 unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
88 unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
89 unsigned int sets; /* # Counter set saved in memory */
90 };
91
92 static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
93 static debug_info_t *cf_dbg;
94
95 /*
96 * The CPU Measurement query counter information instruction contains
97 * information which varies per machine generation, but is constant and
98 * does not change when running on a particular machine, such as counter
99 * first and second version number. This is needed to determine the size
100 * of counter sets. Extract this information at device driver initialization.
101 */
102 static struct cpumf_ctr_info cpumf_ctr_info;
103
104 struct cpu_cf_ptr {
105 struct cpu_cf_events *cpucf;
106 };
107
108 static struct cpu_cf_root { /* Anchor to per CPU data */
109 refcount_t refcnt; /* Overall active events */
110 struct cpu_cf_ptr __percpu *cfptr;
111 } cpu_cf_root;
112
113 /*
114 * Serialize event initialization and event removal. Both are called from
115 * user space in task context with perf_event_open() and close()
116 * system calls.
117 *
118 * This mutex serializes functions cpum_cf_alloc_cpu() called at event
119 * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
120 * called at event removal via call back function hw_perf_event_destroy()
121 * when the event is deleted. They are serialized to enforce correct
122 * bookkeeping of pointer and reference counts anchored by
123 * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
124 * per CPU pointers stored in cpu_cf_root::cfptr.
125 */
126 static DEFINE_MUTEX(pmc_reserve_mutex);
127
128 /*
129 * Get pointer to per-cpu structure.
130 *
131 * Function get_cpu_cfhw() is called from
132 * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
133 * CPU hot plug remove can not happen. Event removal requires a close()
134 * first.
135 *
136 * Function this_cpu_cfhw() is called from perf common code functions:
137 * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
138 * All functions execute with interrupts disabled on that particular CPU.
139 * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
140 *
141 * Therefore it is safe to access the CPU specific pointer to the event.
142 */
get_cpu_cfhw(int cpu)143 static struct cpu_cf_events *get_cpu_cfhw(int cpu)
144 {
145 struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
146
147 if (p) {
148 struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
149
150 return q->cpucf;
151 }
152 return NULL;
153 }
154
this_cpu_cfhw(void)155 static struct cpu_cf_events *this_cpu_cfhw(void)
156 {
157 return get_cpu_cfhw(smp_processor_id());
158 }
159
160 /* Disable counter sets on dedicated CPU */
cpum_cf_reset_cpu(void * flags)161 static void cpum_cf_reset_cpu(void *flags)
162 {
163 lcctl(0);
164 }
165
166 /* Free per CPU data when the last event is removed. */
cpum_cf_free_root(void)167 static void cpum_cf_free_root(void)
168 {
169 if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
170 return;
171 free_percpu(cpu_cf_root.cfptr);
172 cpu_cf_root.cfptr = NULL;
173 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
174 on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
175 debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
176 __func__, refcount_read(&cpu_cf_root.refcnt),
177 !cpu_cf_root.cfptr);
178 }
179
180 /*
181 * On initialization of first event also allocate per CPU data dynamically.
182 * Start with an array of pointers, the array size is the maximum number of
183 * CPUs possible, which might be larger than the number of CPUs currently
184 * online.
185 */
cpum_cf_alloc_root(void)186 static int cpum_cf_alloc_root(void)
187 {
188 int rc = 0;
189
190 if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
191 return rc;
192
193 /* The memory is already zeroed. */
194 cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
195 if (cpu_cf_root.cfptr) {
196 refcount_set(&cpu_cf_root.refcnt, 1);
197 on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
198 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
199 } else {
200 rc = -ENOMEM;
201 }
202
203 return rc;
204 }
205
206 /* Free CPU counter data structure for a PMU */
cpum_cf_free_cpu(int cpu)207 static void cpum_cf_free_cpu(int cpu)
208 {
209 struct cpu_cf_events *cpuhw;
210 struct cpu_cf_ptr *p;
211
212 mutex_lock(&pmc_reserve_mutex);
213 /*
214 * When invoked via CPU hotplug handler, there might be no events
215 * installed or that particular CPU might not have an
216 * event installed. This anchor pointer can be NULL!
217 */
218 if (!cpu_cf_root.cfptr)
219 goto out;
220 p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
221 cpuhw = p->cpucf;
222 /*
223 * Might be zero when called from CPU hotplug handler and no event
224 * installed on that CPU, but on different CPUs.
225 */
226 if (!cpuhw)
227 goto out;
228
229 if (refcount_dec_and_test(&cpuhw->refcnt)) {
230 kfree(cpuhw);
231 p->cpucf = NULL;
232 }
233 cpum_cf_free_root();
234 out:
235 mutex_unlock(&pmc_reserve_mutex);
236 }
237
238 /* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
cpum_cf_alloc_cpu(int cpu)239 static int cpum_cf_alloc_cpu(int cpu)
240 {
241 struct cpu_cf_events *cpuhw;
242 struct cpu_cf_ptr *p;
243 int rc;
244
245 mutex_lock(&pmc_reserve_mutex);
246 rc = cpum_cf_alloc_root();
247 if (rc)
248 goto unlock;
249 p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
250 cpuhw = p->cpucf;
251
252 if (!cpuhw) {
253 cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
254 if (cpuhw) {
255 p->cpucf = cpuhw;
256 refcount_set(&cpuhw->refcnt, 1);
257 } else {
258 rc = -ENOMEM;
259 }
260 } else {
261 refcount_inc(&cpuhw->refcnt);
262 }
263 if (rc) {
264 /*
265 * Error in allocation of event, decrement anchor. Since
266 * cpu_cf_event in not created, its destroy() function is not
267 * invoked. Adjust the reference counter for the anchor.
268 */
269 cpum_cf_free_root();
270 }
271 unlock:
272 mutex_unlock(&pmc_reserve_mutex);
273 return rc;
274 }
275
276 /*
277 * Create/delete per CPU data structures for /dev/hwctr interface and events
278 * created by perf_event_open().
279 * If cpu is -1, track task on all available CPUs. This requires
280 * allocation of hardware data structures for all CPUs. This setup handles
281 * perf_event_open() with task context and /dev/hwctr interface.
282 * If cpu is non-zero install event on this CPU only. This setup handles
283 * perf_event_open() with CPU context.
284 */
cpum_cf_alloc(int cpu)285 static int cpum_cf_alloc(int cpu)
286 {
287 cpumask_var_t mask;
288 int rc;
289
290 if (cpu == -1) {
291 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
292 return -ENOMEM;
293 for_each_online_cpu(cpu) {
294 rc = cpum_cf_alloc_cpu(cpu);
295 if (rc) {
296 for_each_cpu(cpu, mask)
297 cpum_cf_free_cpu(cpu);
298 break;
299 }
300 cpumask_set_cpu(cpu, mask);
301 }
302 free_cpumask_var(mask);
303 } else {
304 rc = cpum_cf_alloc_cpu(cpu);
305 }
306 return rc;
307 }
308
cpum_cf_free(int cpu)309 static void cpum_cf_free(int cpu)
310 {
311 if (cpu == -1) {
312 for_each_online_cpu(cpu)
313 cpum_cf_free_cpu(cpu);
314 } else {
315 cpum_cf_free_cpu(cpu);
316 }
317 }
318
319 #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
320 /* interval in seconds */
321
322 /* Counter sets are stored as data stream in a page sized memory buffer and
323 * exported to user space via raw data attached to the event sample data.
324 * Each counter set starts with an eight byte header consisting of:
325 * - a two byte eye catcher (0xfeef)
326 * - a one byte counter set number
327 * - a two byte counter set size (indicates the number of counters in this set)
328 * - a three byte reserved value (must be zero) to make the header the same
329 * size as a counter value.
330 * All counter values are eight byte in size.
331 *
332 * All counter sets are followed by a 64 byte trailer.
333 * The trailer consists of a:
334 * - flag field indicating valid fields when corresponding bit set
335 * - the counter facility first and second version number
336 * - the CPU speed if nonzero
337 * - the time stamp the counter sets have been collected
338 * - the time of day (TOD) base value
339 * - the machine type.
340 *
341 * The counter sets are saved when the process is prepared to be executed on a
342 * CPU and saved again when the process is going to be removed from a CPU.
343 * The difference of both counter sets are calculated and stored in the event
344 * sample data area.
345 */
346 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
347 unsigned int def:16; /* 0-15 Data Entry Format */
348 unsigned int set:16; /* 16-31 Counter set identifier */
349 unsigned int ctr:16; /* 32-47 Number of stored counters */
350 unsigned int res1:16; /* 48-63 Reserved */
351 };
352
353 struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
354 /* 0 - 7 */
355 union {
356 struct {
357 unsigned int clock_base:1; /* TOD clock base set */
358 unsigned int speed:1; /* CPU speed set */
359 /* Measurement alerts */
360 unsigned int mtda:1; /* Loss of MT ctr. data alert */
361 unsigned int caca:1; /* Counter auth. change alert */
362 unsigned int lcda:1; /* Loss of counter data alert */
363 };
364 unsigned long flags; /* 0-63 All indicators */
365 };
366 /* 8 - 15 */
367 unsigned int cfvn:16; /* 64-79 Ctr First Version */
368 unsigned int csvn:16; /* 80-95 Ctr Second Version */
369 unsigned int cpu_speed:32; /* 96-127 CPU speed */
370 /* 16 - 23 */
371 unsigned long timestamp; /* 128-191 Timestamp (TOD) */
372 /* 24 - 55 */
373 union {
374 struct {
375 unsigned long progusage1;
376 unsigned long progusage2;
377 unsigned long progusage3;
378 unsigned long tod_base;
379 };
380 unsigned long progusage[4];
381 };
382 /* 56 - 63 */
383 unsigned int mach_type:16; /* Machine type */
384 unsigned int res1:16; /* Reserved */
385 unsigned int res2:32; /* Reserved */
386 };
387
388 /* Create the trailer data at the end of a page. */
cfdiag_trailer(struct cf_trailer_entry * te)389 static void cfdiag_trailer(struct cf_trailer_entry *te)
390 {
391 struct cpuid cpuid;
392
393 te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */
394 te->csvn = cpumf_ctr_info.csvn;
395
396 get_cpu_id(&cpuid); /* Machine type */
397 te->mach_type = cpuid.machine;
398 te->cpu_speed = cfdiag_cpu_speed;
399 if (te->cpu_speed)
400 te->speed = 1;
401 te->clock_base = 1; /* Save clock base */
402 te->tod_base = tod_clock_base.tod;
403 te->timestamp = get_tod_clock_fast();
404 }
405
406 /*
407 * The number of counters per counter set varies between machine generations,
408 * but is constant when running on a particular machine generation.
409 * Determine each counter set size at device driver initialization and
410 * retrieve it later.
411 */
412 static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)413 static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
414 {
415 size_t ctrset_size = 0;
416
417 switch (ctrset) {
418 case CPUMF_CTR_SET_BASIC:
419 if (cpumf_ctr_info.cfvn >= 1)
420 ctrset_size = 6;
421 break;
422 case CPUMF_CTR_SET_USER:
423 if (cpumf_ctr_info.cfvn == 1)
424 ctrset_size = 6;
425 else if (cpumf_ctr_info.cfvn >= 3)
426 ctrset_size = 2;
427 break;
428 case CPUMF_CTR_SET_CRYPTO:
429 if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
430 ctrset_size = 16;
431 else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
432 ctrset_size = 20;
433 break;
434 case CPUMF_CTR_SET_EXT:
435 if (cpumf_ctr_info.csvn == 1)
436 ctrset_size = 32;
437 else if (cpumf_ctr_info.csvn == 2)
438 ctrset_size = 48;
439 else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
440 ctrset_size = 128;
441 else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
442 ctrset_size = 160;
443 break;
444 case CPUMF_CTR_SET_MT_DIAG:
445 if (cpumf_ctr_info.csvn > 3)
446 ctrset_size = 48;
447 break;
448 case CPUMF_CTR_SET_MAX:
449 break;
450 }
451 cpumf_ctr_setsizes[ctrset] = ctrset_size;
452 }
453
454 /*
455 * Return the maximum possible counter set size (in number of 8 byte counters)
456 * depending on type and model number.
457 */
cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)458 static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
459 {
460 return cpumf_ctr_setsizes[ctrset];
461 }
462
463 /* Read a counter set. The counter set number determines the counter set and
464 * the CPUM-CF first and second version number determine the number of
465 * available counters in each counter set.
466 * Each counter set starts with header containing the counter set number and
467 * the number of eight byte counters.
468 *
469 * The functions returns the number of bytes occupied by this counter set
470 * including the header.
471 * If there is no counter in the counter set, this counter set is useless and
472 * zero is returned on this case.
473 *
474 * Note that the counter sets may not be enabled or active and the stcctm
475 * instruction might return error 3. Depending on error_ok value this is ok,
476 * for example when called from cpumf_pmu_start() call back function.
477 */
cfdiag_getctrset(struct cf_ctrset_entry * ctrdata,int ctrset,size_t room,bool error_ok)478 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
479 size_t room, bool error_ok)
480 {
481 size_t ctrset_size, need = 0;
482 int rc = 3; /* Assume write failure */
483
484 ctrdata->def = CF_DIAG_CTRSET_DEF;
485 ctrdata->set = ctrset;
486 ctrdata->res1 = 0;
487 ctrset_size = cpum_cf_read_setsize(ctrset);
488
489 if (ctrset_size) { /* Save data */
490 need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
491 if (need <= room) {
492 rc = ctr_stcctm(ctrset, ctrset_size,
493 (u64 *)(ctrdata + 1));
494 }
495 if (rc != 3 || error_ok)
496 ctrdata->ctr = ctrset_size;
497 else
498 need = 0;
499 }
500
501 return need;
502 }
503
504 static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
505 [CPUMF_CTR_SET_BASIC] = 0x02,
506 [CPUMF_CTR_SET_USER] = 0x04,
507 [CPUMF_CTR_SET_CRYPTO] = 0x08,
508 [CPUMF_CTR_SET_EXT] = 0x01,
509 [CPUMF_CTR_SET_MT_DIAG] = 0x20,
510 };
511
512 /* Read out all counter sets and save them in the provided data buffer.
513 * The last 64 byte host an artificial trailer entry.
514 */
cfdiag_getctr(void * data,size_t sz,unsigned long auth,bool error_ok)515 static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
516 bool error_ok)
517 {
518 struct cf_trailer_entry *trailer;
519 size_t offset = 0, done;
520 int i;
521
522 memset(data, 0, sz);
523 sz -= sizeof(*trailer); /* Always room for trailer */
524 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
525 struct cf_ctrset_entry *ctrdata = data + offset;
526
527 if (!(auth & cpumf_ctr_ctl[i]))
528 continue; /* Counter set not authorized */
529
530 done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
531 offset += done;
532 }
533 trailer = data + offset;
534 cfdiag_trailer(trailer);
535 return offset + sizeof(*trailer);
536 }
537
538 /* Calculate the difference for each counter in a counter set. */
cfdiag_diffctrset(u64 * pstart,u64 * pstop,int counters)539 static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
540 {
541 for (; --counters >= 0; ++pstart, ++pstop)
542 if (*pstop >= *pstart)
543 *pstop -= *pstart;
544 else
545 *pstop = *pstart - *pstop + 1;
546 }
547
548 /* Scan the counter sets and calculate the difference of each counter
549 * in each set. The result is the increment of each counter during the
550 * period the counter set has been activated.
551 *
552 * Return true on success.
553 */
cfdiag_diffctr(struct cpu_cf_events * cpuhw,unsigned long auth)554 static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
555 {
556 struct cf_trailer_entry *trailer_start, *trailer_stop;
557 struct cf_ctrset_entry *ctrstart, *ctrstop;
558 size_t offset = 0;
559 int i;
560
561 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
562 ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
563 ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
564
565 /* Counter set not authorized */
566 if (!(auth & cpumf_ctr_ctl[i]))
567 continue;
568 /* Counter set size zero was not saved */
569 if (!cpum_cf_read_setsize(i))
570 continue;
571
572 if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
573 pr_err_once("cpum_cf_diag counter set compare error "
574 "in set %i\n", ctrstart->set);
575 return 0;
576 }
577 if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
578 cfdiag_diffctrset((u64 *)(ctrstart + 1),
579 (u64 *)(ctrstop + 1), ctrstart->ctr);
580 offset += ctrstart->ctr * sizeof(u64) +
581 sizeof(*ctrstart);
582 }
583 }
584
585 /* Save time_stamp from start of event in stop's trailer */
586 trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
587 trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
588 trailer_stop->progusage[0] = trailer_start->timestamp;
589
590 return 1;
591 }
592
get_counter_set(u64 event)593 static enum cpumf_ctr_set get_counter_set(u64 event)
594 {
595 int set = CPUMF_CTR_SET_MAX;
596
597 if (event < 32)
598 set = CPUMF_CTR_SET_BASIC;
599 else if (event < 64)
600 set = CPUMF_CTR_SET_USER;
601 else if (event < 128)
602 set = CPUMF_CTR_SET_CRYPTO;
603 else if (event < 288)
604 set = CPUMF_CTR_SET_EXT;
605 else if (event >= 448 && event < 496)
606 set = CPUMF_CTR_SET_MT_DIAG;
607
608 return set;
609 }
610
validate_ctr_version(const u64 config,enum cpumf_ctr_set set)611 static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
612 {
613 u16 mtdiag_ctl;
614 int err = 0;
615
616 /* check required version for counter sets */
617 switch (set) {
618 case CPUMF_CTR_SET_BASIC:
619 case CPUMF_CTR_SET_USER:
620 if (cpumf_ctr_info.cfvn < 1)
621 err = -EOPNOTSUPP;
622 break;
623 case CPUMF_CTR_SET_CRYPTO:
624 if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
625 config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
626 err = -EOPNOTSUPP;
627 break;
628 case CPUMF_CTR_SET_EXT:
629 if (cpumf_ctr_info.csvn < 1)
630 err = -EOPNOTSUPP;
631 if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
632 (cpumf_ctr_info.csvn == 2 && config > 175) ||
633 (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
634 config > 255) ||
635 (cpumf_ctr_info.csvn >= 6 && config > 287))
636 err = -EOPNOTSUPP;
637 break;
638 case CPUMF_CTR_SET_MT_DIAG:
639 if (cpumf_ctr_info.csvn <= 3)
640 err = -EOPNOTSUPP;
641 /*
642 * MT-diagnostic counters are read-only. The counter set
643 * is automatically enabled and activated on all CPUs with
644 * multithreading (SMT). Deactivation of multithreading
645 * also disables the counter set. State changes are ignored
646 * by lcctl(). Because Linux controls SMT enablement through
647 * a kernel parameter only, the counter set is either disabled
648 * or enabled and active.
649 *
650 * Thus, the counters can only be used if SMT is on and the
651 * counter set is enabled and active.
652 */
653 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
654 if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
655 (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
656 (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
657 err = -EOPNOTSUPP;
658 break;
659 case CPUMF_CTR_SET_MAX:
660 err = -EOPNOTSUPP;
661 }
662
663 return err;
664 }
665
666 /*
667 * Change the CPUMF state to active.
668 * Enable and activate the CPU-counter sets according
669 * to the per-cpu control state.
670 */
cpumf_pmu_enable(struct pmu * pmu)671 static void cpumf_pmu_enable(struct pmu *pmu)
672 {
673 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
674 int err;
675
676 if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
677 return;
678
679 err = lcctl(cpuhw->state | cpuhw->dev_state);
680 if (err)
681 pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
682 else
683 cpuhw->flags |= PMU_F_ENABLED;
684 }
685
686 /*
687 * Change the CPUMF state to inactive.
688 * Disable and enable (inactive) the CPU-counter sets according
689 * to the per-cpu control state.
690 */
cpumf_pmu_disable(struct pmu * pmu)691 static void cpumf_pmu_disable(struct pmu *pmu)
692 {
693 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
694 u64 inactive;
695 int err;
696
697 if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
698 return;
699
700 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
701 inactive |= cpuhw->dev_state;
702 err = lcctl(inactive);
703 if (err)
704 pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
705 else
706 cpuhw->flags &= ~PMU_F_ENABLED;
707 }
708
709 /* Release the PMU if event is the last perf event */
hw_perf_event_destroy(struct perf_event * event)710 static void hw_perf_event_destroy(struct perf_event *event)
711 {
712 cpum_cf_free(event->cpu);
713 }
714
715 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
716 static const int cpumf_generic_events_basic[] = {
717 [PERF_COUNT_HW_CPU_CYCLES] = 0,
718 [PERF_COUNT_HW_INSTRUCTIONS] = 1,
719 [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
720 [PERF_COUNT_HW_CACHE_MISSES] = -1,
721 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
722 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
723 [PERF_COUNT_HW_BUS_CYCLES] = -1,
724 };
725 /* CPUMF <-> perf event mappings for userspace (problem-state set) */
726 static const int cpumf_generic_events_user[] = {
727 [PERF_COUNT_HW_CPU_CYCLES] = 32,
728 [PERF_COUNT_HW_INSTRUCTIONS] = 33,
729 [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
730 [PERF_COUNT_HW_CACHE_MISSES] = -1,
731 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
732 [PERF_COUNT_HW_BRANCH_MISSES] = -1,
733 [PERF_COUNT_HW_BUS_CYCLES] = -1,
734 };
735
is_userspace_event(u64 ev)736 static int is_userspace_event(u64 ev)
737 {
738 return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
739 cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
740 }
741
__hw_perf_event_init(struct perf_event * event,unsigned int type)742 static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
743 {
744 struct perf_event_attr *attr = &event->attr;
745 struct hw_perf_event *hwc = &event->hw;
746 enum cpumf_ctr_set set;
747 u64 ev;
748
749 switch (type) {
750 case PERF_TYPE_RAW:
751 /* Raw events are used to access counters directly,
752 * hence do not permit excludes */
753 if (attr->exclude_kernel || attr->exclude_user ||
754 attr->exclude_hv)
755 return -EOPNOTSUPP;
756 ev = attr->config;
757 break;
758
759 case PERF_TYPE_HARDWARE:
760 if (is_sampling_event(event)) /* No sampling support */
761 return -ENOENT;
762 ev = attr->config;
763 if (!attr->exclude_user && attr->exclude_kernel) {
764 /*
765 * Count user space (problem-state) only
766 * Handle events 32 and 33 as 0:u and 1:u
767 */
768 if (!is_userspace_event(ev)) {
769 if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
770 return -EOPNOTSUPP;
771 ev = cpumf_generic_events_user[ev];
772 }
773 } else if (!attr->exclude_kernel && attr->exclude_user) {
774 /* No support for kernel space counters only */
775 return -EOPNOTSUPP;
776 } else {
777 /* Count user and kernel space, incl. events 32 + 33 */
778 if (!is_userspace_event(ev)) {
779 if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
780 return -EOPNOTSUPP;
781 ev = cpumf_generic_events_basic[ev];
782 }
783 }
784 break;
785
786 default:
787 return -ENOENT;
788 }
789
790 if (ev == -1)
791 return -ENOENT;
792
793 if (ev > PERF_CPUM_CF_MAX_CTR)
794 return -ENOENT;
795
796 /* Obtain the counter set to which the specified counter belongs */
797 set = get_counter_set(ev);
798 switch (set) {
799 case CPUMF_CTR_SET_BASIC:
800 case CPUMF_CTR_SET_USER:
801 case CPUMF_CTR_SET_CRYPTO:
802 case CPUMF_CTR_SET_EXT:
803 case CPUMF_CTR_SET_MT_DIAG:
804 /*
805 * Use the hardware perf event structure to store the
806 * counter number in the 'config' member and the counter
807 * set number in the 'config_base' as bit mask.
808 * It is later used to enable/disable the counter(s).
809 */
810 hwc->config = ev;
811 hwc->config_base = cpumf_ctr_ctl[set];
812 break;
813 case CPUMF_CTR_SET_MAX:
814 /* The counter could not be associated to a counter set */
815 return -EINVAL;
816 }
817
818 /* Initialize for using the CPU-measurement counter facility */
819 if (cpum_cf_alloc(event->cpu))
820 return -ENOMEM;
821 event->destroy = hw_perf_event_destroy;
822
823 /*
824 * Finally, validate version and authorization of the counter set.
825 * If the particular CPU counter set is not authorized,
826 * return with -ENOENT in order to fall back to other
827 * PMUs that might suffice the event request.
828 */
829 if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
830 return -ENOENT;
831 return validate_ctr_version(hwc->config, set);
832 }
833
834 /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
835 * attribute::type values:
836 * - PERF_TYPE_HARDWARE:
837 * - pmu->type:
838 * Handle both type of invocations identical. They address the same hardware.
839 * The result is different when event modifiers exclude_kernel and/or
840 * exclude_user are also set.
841 */
cpumf_pmu_event_type(struct perf_event * event)842 static int cpumf_pmu_event_type(struct perf_event *event)
843 {
844 u64 ev = event->attr.config;
845
846 if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
847 cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
848 cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
849 cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
850 return PERF_TYPE_HARDWARE;
851 return PERF_TYPE_RAW;
852 }
853
cpumf_pmu_event_init(struct perf_event * event)854 static int cpumf_pmu_event_init(struct perf_event *event)
855 {
856 unsigned int type = event->attr.type;
857 int err;
858
859 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
860 err = __hw_perf_event_init(event, type);
861 else if (event->pmu->type == type)
862 /* Registered as unknown PMU */
863 err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
864 else
865 return -ENOENT;
866
867 if (unlikely(err) && event->destroy)
868 event->destroy(event);
869
870 return err;
871 }
872
hw_perf_event_reset(struct perf_event * event)873 static int hw_perf_event_reset(struct perf_event *event)
874 {
875 u64 prev, new;
876 int err;
877
878 do {
879 prev = local64_read(&event->hw.prev_count);
880 err = ecctr(event->hw.config, &new);
881 if (err) {
882 if (err != 3)
883 break;
884 /* The counter is not (yet) available. This
885 * might happen if the counter set to which
886 * this counter belongs is in the disabled
887 * state.
888 */
889 new = 0;
890 }
891 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
892
893 return err;
894 }
895
hw_perf_event_update(struct perf_event * event)896 static void hw_perf_event_update(struct perf_event *event)
897 {
898 u64 prev, new, delta;
899 int err;
900
901 do {
902 prev = local64_read(&event->hw.prev_count);
903 err = ecctr(event->hw.config, &new);
904 if (err)
905 return;
906 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
907
908 delta = (prev <= new) ? new - prev
909 : (-1ULL - prev) + new + 1; /* overflow */
910 local64_add(delta, &event->count);
911 }
912
cpumf_pmu_read(struct perf_event * event)913 static void cpumf_pmu_read(struct perf_event *event)
914 {
915 if (event->hw.state & PERF_HES_STOPPED)
916 return;
917
918 hw_perf_event_update(event);
919 }
920
cpumf_pmu_start(struct perf_event * event,int flags)921 static void cpumf_pmu_start(struct perf_event *event, int flags)
922 {
923 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
924 struct hw_perf_event *hwc = &event->hw;
925 int i;
926
927 if (!(hwc->state & PERF_HES_STOPPED))
928 return;
929
930 hwc->state = 0;
931
932 /* (Re-)enable and activate the counter set */
933 ctr_set_enable(&cpuhw->state, hwc->config_base);
934 ctr_set_start(&cpuhw->state, hwc->config_base);
935
936 /* The counter set to which this counter belongs can be already active.
937 * Because all counters in a set are active, the event->hw.prev_count
938 * needs to be synchronized. At this point, the counter set can be in
939 * the inactive or disabled state.
940 */
941 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
942 cpuhw->usedss = cfdiag_getctr(cpuhw->start,
943 sizeof(cpuhw->start),
944 hwc->config_base, true);
945 } else {
946 hw_perf_event_reset(event);
947 }
948
949 /* Increment refcount for counter sets */
950 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
951 if ((hwc->config_base & cpumf_ctr_ctl[i]))
952 atomic_inc(&cpuhw->ctr_set[i]);
953 }
954
955 /* Create perf event sample with the counter sets as raw data. The sample
956 * is then pushed to the event subsystem and the function checks for
957 * possible event overflows. If an event overflow occurs, the PMU is
958 * stopped.
959 *
960 * Return non-zero if an event overflow occurred.
961 */
cfdiag_push_sample(struct perf_event * event,struct cpu_cf_events * cpuhw)962 static int cfdiag_push_sample(struct perf_event *event,
963 struct cpu_cf_events *cpuhw)
964 {
965 struct perf_sample_data data;
966 struct perf_raw_record raw;
967 struct pt_regs regs;
968 int overflow;
969
970 /* Setup perf sample */
971 perf_sample_data_init(&data, 0, event->hw.last_period);
972 memset(®s, 0, sizeof(regs));
973 memset(&raw, 0, sizeof(raw));
974
975 if (event->attr.sample_type & PERF_SAMPLE_CPU)
976 data.cpu_entry.cpu = event->cpu;
977 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
978 raw.frag.size = cpuhw->usedss;
979 raw.frag.data = cpuhw->stop;
980 perf_sample_save_raw_data(&data, &raw);
981 }
982
983 overflow = perf_event_overflow(event, &data, ®s);
984 if (overflow)
985 event->pmu->stop(event, 0);
986
987 perf_event_update_userpage(event);
988 return overflow;
989 }
990
cpumf_pmu_stop(struct perf_event * event,int flags)991 static void cpumf_pmu_stop(struct perf_event *event, int flags)
992 {
993 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
994 struct hw_perf_event *hwc = &event->hw;
995 int i;
996
997 if (!(hwc->state & PERF_HES_STOPPED)) {
998 /* Decrement reference count for this counter set and if this
999 * is the last used counter in the set, clear activation
1000 * control and set the counter set state to inactive.
1001 */
1002 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1003 if (!(hwc->config_base & cpumf_ctr_ctl[i]))
1004 continue;
1005 if (!atomic_dec_return(&cpuhw->ctr_set[i]))
1006 ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
1007 }
1008 hwc->state |= PERF_HES_STOPPED;
1009 }
1010
1011 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1012 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
1013 local64_inc(&event->count);
1014 cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
1015 sizeof(cpuhw->stop),
1016 event->hw.config_base,
1017 false);
1018 if (cfdiag_diffctr(cpuhw, event->hw.config_base))
1019 cfdiag_push_sample(event, cpuhw);
1020 } else {
1021 hw_perf_event_update(event);
1022 }
1023 hwc->state |= PERF_HES_UPTODATE;
1024 }
1025 }
1026
cpumf_pmu_add(struct perf_event * event,int flags)1027 static int cpumf_pmu_add(struct perf_event *event, int flags)
1028 {
1029 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1030
1031 ctr_set_enable(&cpuhw->state, event->hw.config_base);
1032 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1033
1034 if (flags & PERF_EF_START)
1035 cpumf_pmu_start(event, PERF_EF_RELOAD);
1036
1037 return 0;
1038 }
1039
cpumf_pmu_del(struct perf_event * event,int flags)1040 static void cpumf_pmu_del(struct perf_event *event, int flags)
1041 {
1042 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1043 int i;
1044
1045 cpumf_pmu_stop(event, PERF_EF_UPDATE);
1046
1047 /* Check if any counter in the counter set is still used. If not used,
1048 * change the counter set to the disabled state. This also clears the
1049 * content of all counters in the set.
1050 *
1051 * When a new perf event has been added but not yet started, this can
1052 * clear enable control and resets all counters in a set. Therefore,
1053 * cpumf_pmu_start() always has to reenable a counter set.
1054 */
1055 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
1056 if (!atomic_read(&cpuhw->ctr_set[i]))
1057 ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
1058 }
1059
1060 /* Performance monitoring unit for s390x */
1061 static struct pmu cpumf_pmu = {
1062 .task_ctx_nr = perf_sw_context,
1063 .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
1064 .pmu_enable = cpumf_pmu_enable,
1065 .pmu_disable = cpumf_pmu_disable,
1066 .event_init = cpumf_pmu_event_init,
1067 .add = cpumf_pmu_add,
1068 .del = cpumf_pmu_del,
1069 .start = cpumf_pmu_start,
1070 .stop = cpumf_pmu_stop,
1071 .read = cpumf_pmu_read,
1072 };
1073
1074 static struct cfset_session { /* CPUs and counter set bit mask */
1075 struct list_head head; /* Head of list of active processes */
1076 } cfset_session = {
1077 .head = LIST_HEAD_INIT(cfset_session.head)
1078 };
1079
1080 static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
1081 /*
1082 * Synchronize access to device /dev/hwc. This mutex protects against
1083 * concurrent access to functions cfset_open() and cfset_release().
1084 * Same for CPU hotplug add and remove events triggering
1085 * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
1086 * It also serializes concurrent device ioctl access from multiple
1087 * processes accessing /dev/hwc.
1088 *
1089 * The mutex protects concurrent access to the /dev/hwctr session management
1090 * struct cfset_session and reference counting variable cfset_opencnt.
1091 */
1092 static DEFINE_MUTEX(cfset_ctrset_mutex);
1093
1094 /*
1095 * CPU hotplug handles only /dev/hwctr device.
1096 * For perf_event_open() the CPU hotplug handling is done on kernel common
1097 * code:
1098 * - CPU add: Nothing is done since a file descriptor can not be created
1099 * and returned to the user.
1100 * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
1101 * pmu_delete(). The event itself is removed when the file descriptor is
1102 * closed.
1103 */
1104 static int cfset_online_cpu(unsigned int cpu);
1105
cpum_cf_online_cpu(unsigned int cpu)1106 static int cpum_cf_online_cpu(unsigned int cpu)
1107 {
1108 int rc = 0;
1109
1110 /*
1111 * Ignore notification for perf_event_open().
1112 * Handle only /dev/hwctr device sessions.
1113 */
1114 mutex_lock(&cfset_ctrset_mutex);
1115 if (refcount_read(&cfset_opencnt)) {
1116 rc = cpum_cf_alloc_cpu(cpu);
1117 if (!rc)
1118 cfset_online_cpu(cpu);
1119 }
1120 mutex_unlock(&cfset_ctrset_mutex);
1121 return rc;
1122 }
1123
1124 static int cfset_offline_cpu(unsigned int cpu);
1125
cpum_cf_offline_cpu(unsigned int cpu)1126 static int cpum_cf_offline_cpu(unsigned int cpu)
1127 {
1128 /*
1129 * During task exit processing of grouped perf events triggered by CPU
1130 * hotplug processing, pmu_disable() is called as part of perf context
1131 * removal process. Therefore do not trigger event removal now for
1132 * perf_event_open() created events. Perf common code triggers event
1133 * destruction when the event file descriptor is closed.
1134 *
1135 * Handle only /dev/hwctr device sessions.
1136 */
1137 mutex_lock(&cfset_ctrset_mutex);
1138 if (refcount_read(&cfset_opencnt)) {
1139 cfset_offline_cpu(cpu);
1140 cpum_cf_free_cpu(cpu);
1141 }
1142 mutex_unlock(&cfset_ctrset_mutex);
1143 return 0;
1144 }
1145
1146 /* Return true if store counter set multiple instruction is available */
stccm_avail(void)1147 static inline int stccm_avail(void)
1148 {
1149 return test_facility(142);
1150 }
1151
1152 /* CPU-measurement alerts for the counter facility */
cpumf_measurement_alert(struct ext_code ext_code,unsigned int alert,unsigned long unused)1153 static void cpumf_measurement_alert(struct ext_code ext_code,
1154 unsigned int alert, unsigned long unused)
1155 {
1156 struct cpu_cf_events *cpuhw;
1157
1158 if (!(alert & CPU_MF_INT_CF_MASK))
1159 return;
1160
1161 inc_irq_stat(IRQEXT_CMC);
1162
1163 /*
1164 * Measurement alerts are shared and might happen when the PMU
1165 * is not reserved. Ignore these alerts in this case.
1166 */
1167 cpuhw = this_cpu_cfhw();
1168 if (!cpuhw)
1169 return;
1170
1171 /* counter authorization change alert */
1172 if (alert & CPU_MF_INT_CF_CACA)
1173 qctri(&cpumf_ctr_info);
1174
1175 /* loss of counter data alert */
1176 if (alert & CPU_MF_INT_CF_LCDA)
1177 pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
1178
1179 /* loss of MT counter data alert */
1180 if (alert & CPU_MF_INT_CF_MTDA)
1181 pr_warn("CPU[%i] MT counter data was lost\n",
1182 smp_processor_id());
1183 }
1184
1185 static int cfset_init(void);
cpumf_pmu_init(void)1186 static int __init cpumf_pmu_init(void)
1187 {
1188 int rc;
1189
1190 /* Extract counter measurement facility information */
1191 if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
1192 return -ENODEV;
1193
1194 /* Determine and store counter set sizes for later reference */
1195 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1196 cpum_cf_make_setsize(rc);
1197
1198 /*
1199 * Clear bit 15 of cr0 to unauthorize problem-state to
1200 * extract measurement counters
1201 */
1202 ctl_clear_bit(0, 48);
1203
1204 /* register handler for measurement-alert interruptions */
1205 rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
1206 cpumf_measurement_alert);
1207 if (rc) {
1208 pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
1209 return rc;
1210 }
1211
1212 /* Setup s390dbf facility */
1213 cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
1214 if (!cf_dbg) {
1215 pr_err("Registration of s390dbf(cpum_cf) failed\n");
1216 rc = -ENOMEM;
1217 goto out1;
1218 }
1219 debug_register_view(cf_dbg, &debug_sprintf_view);
1220
1221 cpumf_pmu.attr_groups = cpumf_cf_event_group();
1222 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
1223 if (rc) {
1224 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
1225 goto out2;
1226 } else if (stccm_avail()) { /* Setup counter set device */
1227 cfset_init();
1228 }
1229
1230 rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
1231 "perf/s390/cf:online",
1232 cpum_cf_online_cpu, cpum_cf_offline_cpu);
1233 return rc;
1234
1235 out2:
1236 debug_unregister_view(cf_dbg, &debug_sprintf_view);
1237 debug_unregister(cf_dbg);
1238 out1:
1239 unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
1240 return rc;
1241 }
1242
1243 /* Support for the CPU Measurement Facility counter set extraction using
1244 * device /dev/hwctr. This allows user space programs to extract complete
1245 * counter set via normal file operations.
1246 */
1247
1248 struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
1249 unsigned int sets; /* Counter set bit mask */
1250 atomic_t cpus_ack; /* # CPUs successfully executed func */
1251 };
1252
1253 struct cfset_request { /* CPUs and counter set bit mask */
1254 unsigned long ctrset; /* Bit mask of counter set to read */
1255 cpumask_t mask; /* CPU mask to read from */
1256 struct list_head node; /* Chain to cfset_session.head */
1257 };
1258
cfset_session_init(void)1259 static void cfset_session_init(void)
1260 {
1261 INIT_LIST_HEAD(&cfset_session.head);
1262 }
1263
1264 /* Remove current request from global bookkeeping. Maintain a counter set bit
1265 * mask on a per CPU basis.
1266 * Done in process context under mutex protection.
1267 */
cfset_session_del(struct cfset_request * p)1268 static void cfset_session_del(struct cfset_request *p)
1269 {
1270 list_del(&p->node);
1271 }
1272
1273 /* Add current request to global bookkeeping. Maintain a counter set bit mask
1274 * on a per CPU basis.
1275 * Done in process context under mutex protection.
1276 */
cfset_session_add(struct cfset_request * p)1277 static void cfset_session_add(struct cfset_request *p)
1278 {
1279 list_add(&p->node, &cfset_session.head);
1280 }
1281
1282 /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
1283 * path is currently used.
1284 * The cpu_cf_events::dev_state is used to denote counter sets in use by this
1285 * interface. It is always or'ed in. If this interface is not active, its
1286 * value is zero and no additional counter sets will be included.
1287 *
1288 * The cpu_cf_events::state is used by the perf_event_open SVC and remains
1289 * unchanged.
1290 *
1291 * perf_pmu_enable() and perf_pmu_enable() and its call backs
1292 * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the
1293 * performance measurement subsystem to enable per process
1294 * CPU Measurement counter facility.
1295 * The XXX_enable() and XXX_disable functions are used to turn off
1296 * x86 performance monitoring interrupt (PMI) during scheduling.
1297 * s390 uses these calls to temporarily stop and resume the active CPU
1298 * counters sets during scheduling.
1299 *
1300 * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
1301 * device access. The perf_event_open() SVC interface makes a lot of effort
1302 * to only run the counters while the calling process is actively scheduled
1303 * to run.
1304 * When /dev/hwctr interface is also used at the same time, the counter sets
1305 * will keep running, even when the process is scheduled off a CPU.
1306 * However this is not a problem and does not lead to wrong counter values
1307 * for the perf_event_open() SVC. The current counter value will be recorded
1308 * during schedule-in. At schedule-out time the current counter value is
1309 * extracted again and the delta is calculated and added to the event.
1310 */
1311 /* Stop all counter sets via ioctl interface */
cfset_ioctl_off(void * parm)1312 static void cfset_ioctl_off(void *parm)
1313 {
1314 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1315 struct cfset_call_on_cpu_parm *p = parm;
1316 int rc;
1317
1318 /* Check if any counter set used by /dev/hwctr */
1319 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1320 if ((p->sets & cpumf_ctr_ctl[rc])) {
1321 if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
1322 ctr_set_disable(&cpuhw->dev_state,
1323 cpumf_ctr_ctl[rc]);
1324 ctr_set_stop(&cpuhw->dev_state,
1325 cpumf_ctr_ctl[rc]);
1326 }
1327 }
1328 /* Keep perf_event_open counter sets */
1329 rc = lcctl(cpuhw->dev_state | cpuhw->state);
1330 if (rc)
1331 pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
1332 cpuhw->state, S390_HWCTR_DEVICE, rc);
1333 if (!cpuhw->dev_state)
1334 cpuhw->flags &= ~PMU_F_IN_USE;
1335 }
1336
1337 /* Start counter sets on particular CPU */
cfset_ioctl_on(void * parm)1338 static void cfset_ioctl_on(void *parm)
1339 {
1340 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1341 struct cfset_call_on_cpu_parm *p = parm;
1342 int rc;
1343
1344 cpuhw->flags |= PMU_F_IN_USE;
1345 ctr_set_enable(&cpuhw->dev_state, p->sets);
1346 ctr_set_start(&cpuhw->dev_state, p->sets);
1347 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1348 if ((p->sets & cpumf_ctr_ctl[rc]))
1349 atomic_inc(&cpuhw->ctr_set[rc]);
1350 rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */
1351 if (!rc)
1352 atomic_inc(&p->cpus_ack);
1353 else
1354 pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
1355 cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
1356 }
1357
cfset_release_cpu(void * p)1358 static void cfset_release_cpu(void *p)
1359 {
1360 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1361 int rc;
1362
1363 cpuhw->dev_state = 0;
1364 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
1365 if (rc)
1366 pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
1367 cpuhw->state, S390_HWCTR_DEVICE, rc);
1368 }
1369
1370 /* This modifies the process CPU mask to adopt it to the currently online
1371 * CPUs. Offline CPUs can not be addresses. This call terminates the access
1372 * and is usually followed by close() or a new iotcl(..., START, ...) which
1373 * creates a new request structure.
1374 */
cfset_all_stop(struct cfset_request * req)1375 static void cfset_all_stop(struct cfset_request *req)
1376 {
1377 struct cfset_call_on_cpu_parm p = {
1378 .sets = req->ctrset,
1379 };
1380
1381 cpumask_and(&req->mask, &req->mask, cpu_online_mask);
1382 on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
1383 }
1384
1385 /* Release function is also called when application gets terminated without
1386 * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
1387 */
cfset_release(struct inode * inode,struct file * file)1388 static int cfset_release(struct inode *inode, struct file *file)
1389 {
1390 mutex_lock(&cfset_ctrset_mutex);
1391 /* Open followed by close/exit has no private_data */
1392 if (file->private_data) {
1393 cfset_all_stop(file->private_data);
1394 cfset_session_del(file->private_data);
1395 kfree(file->private_data);
1396 file->private_data = NULL;
1397 }
1398 if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
1399 on_each_cpu(cfset_release_cpu, NULL, 1);
1400 cpum_cf_free(-1);
1401 }
1402 mutex_unlock(&cfset_ctrset_mutex);
1403 return 0;
1404 }
1405
1406 /*
1407 * Open via /dev/hwctr device. Allocate all per CPU resources on the first
1408 * open of the device. The last close releases all per CPU resources.
1409 * Parallel perf_event_open system calls also use per CPU resources.
1410 * These invocations are handled via reference counting on the per CPU data
1411 * structures.
1412 */
cfset_open(struct inode * inode,struct file * file)1413 static int cfset_open(struct inode *inode, struct file *file)
1414 {
1415 int rc = 0;
1416
1417 if (!perfmon_capable())
1418 return -EPERM;
1419 file->private_data = NULL;
1420
1421 mutex_lock(&cfset_ctrset_mutex);
1422 if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
1423 rc = cpum_cf_alloc(-1);
1424 if (!rc) {
1425 cfset_session_init();
1426 refcount_set(&cfset_opencnt, 1);
1427 }
1428 }
1429 mutex_unlock(&cfset_ctrset_mutex);
1430
1431 /* nonseekable_open() never fails */
1432 return rc ?: nonseekable_open(inode, file);
1433 }
1434
cfset_all_start(struct cfset_request * req)1435 static int cfset_all_start(struct cfset_request *req)
1436 {
1437 struct cfset_call_on_cpu_parm p = {
1438 .sets = req->ctrset,
1439 .cpus_ack = ATOMIC_INIT(0),
1440 };
1441 cpumask_var_t mask;
1442 int rc = 0;
1443
1444 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1445 return -ENOMEM;
1446 cpumask_and(mask, &req->mask, cpu_online_mask);
1447 on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
1448 if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
1449 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
1450 rc = -EIO;
1451 }
1452 free_cpumask_var(mask);
1453 return rc;
1454 }
1455
1456 /* Return the maximum required space for all possible CPUs in case one
1457 * CPU will be onlined during the START, READ, STOP cycles.
1458 * To find out the size of the counter sets, any one CPU will do. They
1459 * all have the same counter sets.
1460 */
cfset_needspace(unsigned int sets)1461 static size_t cfset_needspace(unsigned int sets)
1462 {
1463 size_t bytes = 0;
1464 int i;
1465
1466 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1467 if (!(sets & cpumf_ctr_ctl[i]))
1468 continue;
1469 bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
1470 sizeof(((struct s390_ctrset_setdata *)0)->set) +
1471 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
1472 }
1473 bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
1474 (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
1475 sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
1476 return bytes;
1477 }
1478
cfset_all_copy(unsigned long arg,cpumask_t * mask)1479 static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
1480 {
1481 struct s390_ctrset_read __user *ctrset_read;
1482 unsigned int cpu, cpus, rc = 0;
1483 void __user *uptr;
1484
1485 ctrset_read = (struct s390_ctrset_read __user *)arg;
1486 uptr = ctrset_read->data;
1487 for_each_cpu(cpu, mask) {
1488 struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
1489 struct s390_ctrset_cpudata __user *ctrset_cpudata;
1490
1491 ctrset_cpudata = uptr;
1492 rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
1493 rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
1494 rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
1495 cpuhw->used);
1496 if (rc) {
1497 rc = -EFAULT;
1498 goto out;
1499 }
1500 uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
1501 cond_resched();
1502 }
1503 cpus = cpumask_weight(mask);
1504 if (put_user(cpus, &ctrset_read->no_cpus))
1505 rc = -EFAULT;
1506 out:
1507 return rc;
1508 }
1509
cfset_cpuset_read(struct s390_ctrset_setdata * p,int ctrset,int ctrset_size,size_t room)1510 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
1511 int ctrset_size, size_t room)
1512 {
1513 size_t need = 0;
1514 int rc = -1;
1515
1516 need = sizeof(*p) + sizeof(u64) * ctrset_size;
1517 if (need <= room) {
1518 p->set = cpumf_ctr_ctl[ctrset];
1519 p->no_cnts = ctrset_size;
1520 rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
1521 if (rc == 3) /* Nothing stored */
1522 need = 0;
1523 }
1524 return need;
1525 }
1526
1527 /* Read all counter sets. */
cfset_cpu_read(void * parm)1528 static void cfset_cpu_read(void *parm)
1529 {
1530 struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1531 struct cfset_call_on_cpu_parm *p = parm;
1532 int set, set_size;
1533 size_t space;
1534
1535 /* No data saved yet */
1536 cpuhw->used = 0;
1537 cpuhw->sets = 0;
1538 memset(cpuhw->data, 0, sizeof(cpuhw->data));
1539
1540 /* Scan the counter sets */
1541 for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
1542 struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
1543 cpuhw->used;
1544
1545 if (!(p->sets & cpumf_ctr_ctl[set]))
1546 continue; /* Counter set not in list */
1547 set_size = cpum_cf_read_setsize(set);
1548 space = sizeof(cpuhw->data) - cpuhw->used;
1549 space = cfset_cpuset_read(sp, set, set_size, space);
1550 if (space) {
1551 cpuhw->used += space;
1552 cpuhw->sets += 1;
1553 }
1554 }
1555 }
1556
cfset_all_read(unsigned long arg,struct cfset_request * req)1557 static int cfset_all_read(unsigned long arg, struct cfset_request *req)
1558 {
1559 struct cfset_call_on_cpu_parm p;
1560 cpumask_var_t mask;
1561 int rc;
1562
1563 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1564 return -ENOMEM;
1565
1566 p.sets = req->ctrset;
1567 cpumask_and(mask, &req->mask, cpu_online_mask);
1568 on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
1569 rc = cfset_all_copy(arg, mask);
1570 free_cpumask_var(mask);
1571 return rc;
1572 }
1573
cfset_ioctl_read(unsigned long arg,struct cfset_request * req)1574 static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
1575 {
1576 int ret = -ENODATA;
1577
1578 if (req && req->ctrset)
1579 ret = cfset_all_read(arg, req);
1580 return ret;
1581 }
1582
cfset_ioctl_stop(struct file * file)1583 static long cfset_ioctl_stop(struct file *file)
1584 {
1585 struct cfset_request *req = file->private_data;
1586 int ret = -ENXIO;
1587
1588 if (req) {
1589 cfset_all_stop(req);
1590 cfset_session_del(req);
1591 kfree(req);
1592 file->private_data = NULL;
1593 ret = 0;
1594 }
1595 return ret;
1596 }
1597
cfset_ioctl_start(unsigned long arg,struct file * file)1598 static long cfset_ioctl_start(unsigned long arg, struct file *file)
1599 {
1600 struct s390_ctrset_start __user *ustart;
1601 struct s390_ctrset_start start;
1602 struct cfset_request *preq;
1603 void __user *umask;
1604 unsigned int len;
1605 int ret = 0;
1606 size_t need;
1607
1608 if (file->private_data)
1609 return -EBUSY;
1610 ustart = (struct s390_ctrset_start __user *)arg;
1611 if (copy_from_user(&start, ustart, sizeof(start)))
1612 return -EFAULT;
1613 if (start.version != S390_HWCTR_START_VERSION)
1614 return -EINVAL;
1615 if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
1616 cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
1617 cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
1618 cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
1619 cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
1620 return -EINVAL; /* Invalid counter set */
1621 if (!start.counter_sets)
1622 return -EINVAL; /* No counter set at all? */
1623
1624 preq = kzalloc(sizeof(*preq), GFP_KERNEL);
1625 if (!preq)
1626 return -ENOMEM;
1627 cpumask_clear(&preq->mask);
1628 len = min_t(u64, start.cpumask_len, cpumask_size());
1629 umask = (void __user *)start.cpumask;
1630 if (copy_from_user(&preq->mask, umask, len)) {
1631 kfree(preq);
1632 return -EFAULT;
1633 }
1634 if (cpumask_empty(&preq->mask)) {
1635 kfree(preq);
1636 return -EINVAL;
1637 }
1638 need = cfset_needspace(start.counter_sets);
1639 if (put_user(need, &ustart->data_bytes)) {
1640 kfree(preq);
1641 return -EFAULT;
1642 }
1643 preq->ctrset = start.counter_sets;
1644 ret = cfset_all_start(preq);
1645 if (!ret) {
1646 cfset_session_add(preq);
1647 file->private_data = preq;
1648 } else {
1649 kfree(preq);
1650 }
1651 return ret;
1652 }
1653
1654 /* Entry point to the /dev/hwctr device interface.
1655 * The ioctl system call supports three subcommands:
1656 * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
1657 * counter set keeps running until explicitly stopped. Returns the number
1658 * of bytes needed to store the counter values. If another S390_HWCTR_START
1659 * ioctl subcommand is called without a previous S390_HWCTR_STOP stop
1660 * command on the same file descriptor, -EBUSY is returned.
1661 * S390_HWCTR_READ: Read the counter set values from specified CPU list given
1662 * with the S390_HWCTR_START command.
1663 * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
1664 * previous S390_HWCTR_START subcommand.
1665 */
cfset_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1666 static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1667 {
1668 int ret;
1669
1670 cpus_read_lock();
1671 mutex_lock(&cfset_ctrset_mutex);
1672 switch (cmd) {
1673 case S390_HWCTR_START:
1674 ret = cfset_ioctl_start(arg, file);
1675 break;
1676 case S390_HWCTR_STOP:
1677 ret = cfset_ioctl_stop(file);
1678 break;
1679 case S390_HWCTR_READ:
1680 ret = cfset_ioctl_read(arg, file->private_data);
1681 break;
1682 default:
1683 ret = -ENOTTY;
1684 break;
1685 }
1686 mutex_unlock(&cfset_ctrset_mutex);
1687 cpus_read_unlock();
1688 return ret;
1689 }
1690
1691 static const struct file_operations cfset_fops = {
1692 .owner = THIS_MODULE,
1693 .open = cfset_open,
1694 .release = cfset_release,
1695 .unlocked_ioctl = cfset_ioctl,
1696 .compat_ioctl = cfset_ioctl,
1697 .llseek = no_llseek
1698 };
1699
1700 static struct miscdevice cfset_dev = {
1701 .name = S390_HWCTR_DEVICE,
1702 .minor = MISC_DYNAMIC_MINOR,
1703 .fops = &cfset_fops,
1704 .mode = 0666,
1705 };
1706
1707 /* Hotplug add of a CPU. Scan through all active processes and add
1708 * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
1709 */
cfset_online_cpu(unsigned int cpu)1710 static int cfset_online_cpu(unsigned int cpu)
1711 {
1712 struct cfset_call_on_cpu_parm p;
1713 struct cfset_request *rp;
1714
1715 if (!list_empty(&cfset_session.head)) {
1716 list_for_each_entry(rp, &cfset_session.head, node) {
1717 p.sets = rp->ctrset;
1718 cfset_ioctl_on(&p);
1719 cpumask_set_cpu(cpu, &rp->mask);
1720 }
1721 }
1722 return 0;
1723 }
1724
1725 /* Hotplug remove of a CPU. Scan through all active processes and clear
1726 * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
1727 * Adjust reference counts.
1728 */
cfset_offline_cpu(unsigned int cpu)1729 static int cfset_offline_cpu(unsigned int cpu)
1730 {
1731 struct cfset_call_on_cpu_parm p;
1732 struct cfset_request *rp;
1733
1734 if (!list_empty(&cfset_session.head)) {
1735 list_for_each_entry(rp, &cfset_session.head, node) {
1736 p.sets = rp->ctrset;
1737 cfset_ioctl_off(&p);
1738 cpumask_clear_cpu(cpu, &rp->mask);
1739 }
1740 }
1741 return 0;
1742 }
1743
cfdiag_read(struct perf_event * event)1744 static void cfdiag_read(struct perf_event *event)
1745 {
1746 }
1747
get_authctrsets(void)1748 static int get_authctrsets(void)
1749 {
1750 unsigned long auth = 0;
1751 enum cpumf_ctr_set i;
1752
1753 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1754 if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
1755 auth |= cpumf_ctr_ctl[i];
1756 }
1757 return auth;
1758 }
1759
1760 /* Setup the event. Test for authorized counter sets and only include counter
1761 * sets which are authorized at the time of the setup. Including unauthorized
1762 * counter sets result in specification exception (and panic).
1763 */
cfdiag_event_init2(struct perf_event * event)1764 static int cfdiag_event_init2(struct perf_event *event)
1765 {
1766 struct perf_event_attr *attr = &event->attr;
1767 int err = 0;
1768
1769 /* Set sample_period to indicate sampling */
1770 event->hw.config = attr->config;
1771 event->hw.sample_period = attr->sample_period;
1772 local64_set(&event->hw.period_left, event->hw.sample_period);
1773 local64_set(&event->count, 0);
1774 event->hw.last_period = event->hw.sample_period;
1775
1776 /* Add all authorized counter sets to config_base. The
1777 * the hardware init function is either called per-cpu or just once
1778 * for all CPUS (event->cpu == -1). This depends on the whether
1779 * counting is started for all CPUs or on a per workload base where
1780 * the perf event moves from one CPU to another CPU.
1781 * Checking the authorization on any CPU is fine as the hardware
1782 * applies the same authorization settings to all CPUs.
1783 */
1784 event->hw.config_base = get_authctrsets();
1785
1786 /* No authorized counter sets, nothing to count/sample */
1787 if (!event->hw.config_base)
1788 err = -EINVAL;
1789
1790 return err;
1791 }
1792
cfdiag_event_init(struct perf_event * event)1793 static int cfdiag_event_init(struct perf_event *event)
1794 {
1795 struct perf_event_attr *attr = &event->attr;
1796 int err = -ENOENT;
1797
1798 if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
1799 event->attr.type != event->pmu->type)
1800 goto out;
1801
1802 /* Raw events are used to access counters directly,
1803 * hence do not permit excludes.
1804 * This event is useless without PERF_SAMPLE_RAW to return counter set
1805 * values as raw data.
1806 */
1807 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
1808 !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
1809 err = -EOPNOTSUPP;
1810 goto out;
1811 }
1812
1813 /* Initialize for using the CPU-measurement counter facility */
1814 if (cpum_cf_alloc(event->cpu))
1815 return -ENOMEM;
1816 event->destroy = hw_perf_event_destroy;
1817
1818 err = cfdiag_event_init2(event);
1819 if (unlikely(err))
1820 event->destroy(event);
1821 out:
1822 return err;
1823 }
1824
1825 /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
1826 * to collect the complete counter sets for a scheduled process. Target
1827 * are complete counter sets attached as raw data to the artificial event.
1828 * This results in complete counter sets available when a process is
1829 * scheduled. Contains the delta of every counter while the process was
1830 * running.
1831 */
1832 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
1833
1834 static struct attribute *cfdiag_events_attr[] = {
1835 CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
1836 NULL,
1837 };
1838
1839 PMU_FORMAT_ATTR(event, "config:0-63");
1840
1841 static struct attribute *cfdiag_format_attr[] = {
1842 &format_attr_event.attr,
1843 NULL,
1844 };
1845
1846 static struct attribute_group cfdiag_events_group = {
1847 .name = "events",
1848 .attrs = cfdiag_events_attr,
1849 };
1850 static struct attribute_group cfdiag_format_group = {
1851 .name = "format",
1852 .attrs = cfdiag_format_attr,
1853 };
1854 static const struct attribute_group *cfdiag_attr_groups[] = {
1855 &cfdiag_events_group,
1856 &cfdiag_format_group,
1857 NULL,
1858 };
1859
1860 /* Performance monitoring unit for event CF_DIAG. Since this event
1861 * is also started and stopped via the perf_event_open() system call, use
1862 * the same event enable/disable call back functions. They do not
1863 * have a pointer to the perf_event strcture as first parameter.
1864 *
1865 * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
1866 * Reuse them and distinguish the event (always first parameter) via
1867 * 'config' member.
1868 */
1869 static struct pmu cf_diag = {
1870 .task_ctx_nr = perf_sw_context,
1871 .event_init = cfdiag_event_init,
1872 .pmu_enable = cpumf_pmu_enable,
1873 .pmu_disable = cpumf_pmu_disable,
1874 .add = cpumf_pmu_add,
1875 .del = cpumf_pmu_del,
1876 .start = cpumf_pmu_start,
1877 .stop = cpumf_pmu_stop,
1878 .read = cfdiag_read,
1879
1880 .attr_groups = cfdiag_attr_groups
1881 };
1882
1883 /* Calculate memory needed to store all counter sets together with header and
1884 * trailer data. This is independent of the counter set authorization which
1885 * can vary depending on the configuration.
1886 */
cfdiag_maxsize(struct cpumf_ctr_info * info)1887 static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
1888 {
1889 size_t max_size = sizeof(struct cf_trailer_entry);
1890 enum cpumf_ctr_set i;
1891
1892 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1893 size_t size = cpum_cf_read_setsize(i);
1894
1895 if (size)
1896 max_size += size * sizeof(u64) +
1897 sizeof(struct cf_ctrset_entry);
1898 }
1899 return max_size;
1900 }
1901
1902 /* Get the CPU speed, try sampling facility first and CPU attributes second. */
cfdiag_get_cpu_speed(void)1903 static void cfdiag_get_cpu_speed(void)
1904 {
1905 unsigned long mhz;
1906
1907 if (cpum_sf_avail()) { /* Sampling facility first */
1908 struct hws_qsi_info_block si;
1909
1910 memset(&si, 0, sizeof(si));
1911 if (!qsi(&si)) {
1912 cfdiag_cpu_speed = si.cpu_speed;
1913 return;
1914 }
1915 }
1916
1917 /* Fallback: CPU speed extract static part. Used in case
1918 * CPU Measurement Sampling Facility is turned off.
1919 */
1920 mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
1921 if (mhz != -1UL)
1922 cfdiag_cpu_speed = mhz & 0xffffffff;
1923 }
1924
cfset_init(void)1925 static int cfset_init(void)
1926 {
1927 size_t need;
1928 int rc;
1929
1930 cfdiag_get_cpu_speed();
1931 /* Make sure the counter set data fits into predefined buffer. */
1932 need = cfdiag_maxsize(&cpumf_ctr_info);
1933 if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
1934 pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
1935 need);
1936 return -ENOMEM;
1937 }
1938
1939 rc = misc_register(&cfset_dev);
1940 if (rc) {
1941 pr_err("Registration of /dev/%s failed rc=%i\n",
1942 cfset_dev.name, rc);
1943 goto out;
1944 }
1945
1946 rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
1947 if (rc) {
1948 misc_deregister(&cfset_dev);
1949 pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
1950 rc);
1951 }
1952 out:
1953 return rc;
1954 }
1955
1956 device_initcall(cpumf_pmu_init);
1957