1a17ae4c3SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2212188a5SHendrik Brueckner /* 3212188a5SHendrik Brueckner * Performance event support for s390x - CPU-measurement Counter Facility 4212188a5SHendrik Brueckner * 51e99c242SThomas Richter * Copyright IBM Corp. 2012, 2023 646a984ffSThomas Richter * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7a029a4eaSThomas Richter * Thomas Richter <tmricht@linux.ibm.com> 8212188a5SHendrik Brueckner */ 9212188a5SHendrik Brueckner #define KMSG_COMPONENT "cpum_cf" 10212188a5SHendrik Brueckner #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11212188a5SHendrik Brueckner 12212188a5SHendrik Brueckner #include <linux/kernel.h> 13212188a5SHendrik Brueckner #include <linux/kernel_stat.h> 14212188a5SHendrik Brueckner #include <linux/percpu.h> 15212188a5SHendrik Brueckner #include <linux/notifier.h> 16212188a5SHendrik Brueckner #include <linux/init.h> 17212188a5SHendrik Brueckner #include <linux/export.h> 18a029a4eaSThomas Richter #include <linux/miscdevice.h> 191e99c242SThomas Richter #include <linux/perf_event.h> 20a029a4eaSThomas Richter 211e99c242SThomas Richter #include <asm/cpu_mf.h> 22a029a4eaSThomas Richter #include <asm/hwctrset.h> 23a029a4eaSThomas Richter #include <asm/debug.h> 24a029a4eaSThomas Richter 251e99c242SThomas Richter enum cpumf_ctr_set { 261e99c242SThomas Richter CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ 271e99c242SThomas Richter CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ 281e99c242SThomas Richter CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ 291e99c242SThomas Richter CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ 301e99c242SThomas Richter CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ 311e99c242SThomas Richter 321e99c242SThomas Richter /* Maximum number of counter sets */ 331e99c242SThomas Richter CPUMF_CTR_SET_MAX, 341e99c242SThomas Richter }; 351e99c242SThomas Richter 361e99c242SThomas Richter #define CPUMF_LCCTL_ENABLE_SHIFT 16 371e99c242SThomas Richter #define CPUMF_LCCTL_ACTCTL_SHIFT 0 381e99c242SThomas Richter 391e99c242SThomas Richter static inline void ctr_set_enable(u64 *state, u64 ctrsets) 401e99c242SThomas Richter { 411e99c242SThomas Richter *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; 421e99c242SThomas Richter } 431e99c242SThomas Richter 441e99c242SThomas Richter static inline void ctr_set_disable(u64 *state, u64 ctrsets) 451e99c242SThomas Richter { 461e99c242SThomas Richter *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); 471e99c242SThomas Richter } 481e99c242SThomas Richter 491e99c242SThomas Richter static inline void ctr_set_start(u64 *state, u64 ctrsets) 501e99c242SThomas Richter { 511e99c242SThomas Richter *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; 521e99c242SThomas Richter } 531e99c242SThomas Richter 541e99c242SThomas Richter static inline void ctr_set_stop(u64 *state, u64 ctrsets) 551e99c242SThomas Richter { 561e99c242SThomas Richter *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); 571e99c242SThomas Richter } 581e99c242SThomas Richter 591e99c242SThomas Richter static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) 601e99c242SThomas Richter { 611e99c242SThomas Richter switch (set) { 621e99c242SThomas Richter case CPUMF_CTR_SET_BASIC: 631e99c242SThomas Richter return stcctm(BASIC, range, dest); 641e99c242SThomas Richter case CPUMF_CTR_SET_USER: 651e99c242SThomas Richter return stcctm(PROBLEM_STATE, range, dest); 661e99c242SThomas Richter case CPUMF_CTR_SET_CRYPTO: 671e99c242SThomas Richter return stcctm(CRYPTO_ACTIVITY, range, dest); 681e99c242SThomas Richter case CPUMF_CTR_SET_EXT: 691e99c242SThomas Richter return stcctm(EXTENDED, range, dest); 701e99c242SThomas Richter case CPUMF_CTR_SET_MT_DIAG: 711e99c242SThomas Richter return stcctm(MT_DIAG_CLEARING, range, dest); 721e99c242SThomas Richter case CPUMF_CTR_SET_MAX: 731e99c242SThomas Richter return 3; 741e99c242SThomas Richter } 751e99c242SThomas Richter return 3; 761e99c242SThomas Richter } 771e99c242SThomas Richter 781e99c242SThomas Richter struct cpu_cf_events { 791e99c242SThomas Richter atomic_t ctr_set[CPUMF_CTR_SET_MAX]; 801e99c242SThomas Richter u64 state; /* For perf_event_open SVC */ 811e99c242SThomas Richter u64 dev_state; /* For /dev/hwctr */ 821e99c242SThomas Richter unsigned int flags; 831e99c242SThomas Richter size_t used; /* Bytes used in data */ 841e99c242SThomas Richter size_t usedss; /* Bytes used in start/stop */ 851e99c242SThomas Richter unsigned char start[PAGE_SIZE]; /* Counter set at event add */ 861e99c242SThomas Richter unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ 871e99c242SThomas Richter unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ 881e99c242SThomas Richter unsigned int sets; /* # Counter set saved in memory */ 891e99c242SThomas Richter }; 901e99c242SThomas Richter 911e99c242SThomas Richter /* Per-CPU event structure for the counter facility */ 921e99c242SThomas Richter static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); 931e99c242SThomas Richter 94a029a4eaSThomas Richter static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 95a029a4eaSThomas Richter static debug_info_t *cf_dbg; 96a029a4eaSThomas Richter 97*46c4d945SThomas Richter /* 98*46c4d945SThomas Richter * The CPU Measurement query counter information instruction contains 99*46c4d945SThomas Richter * information which varies per machine generation, but is constant and 100*46c4d945SThomas Richter * does not change when running on a particular machine, such as counter 101*46c4d945SThomas Richter * first and second version number. This is needed to determine the size 102*46c4d945SThomas Richter * of counter sets. Extract this information at device driver initialization. 103*46c4d945SThomas Richter */ 104*46c4d945SThomas Richter static struct cpumf_ctr_info cpumf_ctr_info; 105*46c4d945SThomas Richter 106a029a4eaSThomas Richter #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 107a029a4eaSThomas Richter /* interval in seconds */ 108a029a4eaSThomas Richter 109a029a4eaSThomas Richter /* Counter sets are stored as data stream in a page sized memory buffer and 110a029a4eaSThomas Richter * exported to user space via raw data attached to the event sample data. 111a029a4eaSThomas Richter * Each counter set starts with an eight byte header consisting of: 112a029a4eaSThomas Richter * - a two byte eye catcher (0xfeef) 113a029a4eaSThomas Richter * - a one byte counter set number 114a029a4eaSThomas Richter * - a two byte counter set size (indicates the number of counters in this set) 115a029a4eaSThomas Richter * - a three byte reserved value (must be zero) to make the header the same 116a029a4eaSThomas Richter * size as a counter value. 117a029a4eaSThomas Richter * All counter values are eight byte in size. 118a029a4eaSThomas Richter * 119a029a4eaSThomas Richter * All counter sets are followed by a 64 byte trailer. 120a029a4eaSThomas Richter * The trailer consists of a: 121a029a4eaSThomas Richter * - flag field indicating valid fields when corresponding bit set 122a029a4eaSThomas Richter * - the counter facility first and second version number 123a029a4eaSThomas Richter * - the CPU speed if nonzero 124a029a4eaSThomas Richter * - the time stamp the counter sets have been collected 125a029a4eaSThomas Richter * - the time of day (TOD) base value 126a029a4eaSThomas Richter * - the machine type. 127a029a4eaSThomas Richter * 128a029a4eaSThomas Richter * The counter sets are saved when the process is prepared to be executed on a 129a029a4eaSThomas Richter * CPU and saved again when the process is going to be removed from a CPU. 130a029a4eaSThomas Richter * The difference of both counter sets are calculated and stored in the event 131a029a4eaSThomas Richter * sample data area. 132a029a4eaSThomas Richter */ 133a029a4eaSThomas Richter struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 134a029a4eaSThomas Richter unsigned int def:16; /* 0-15 Data Entry Format */ 135a029a4eaSThomas Richter unsigned int set:16; /* 16-31 Counter set identifier */ 136a029a4eaSThomas Richter unsigned int ctr:16; /* 32-47 Number of stored counters */ 137a029a4eaSThomas Richter unsigned int res1:16; /* 48-63 Reserved */ 138a029a4eaSThomas Richter }; 139a029a4eaSThomas Richter 140a029a4eaSThomas Richter struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 141a029a4eaSThomas Richter /* 0 - 7 */ 142a029a4eaSThomas Richter union { 143a029a4eaSThomas Richter struct { 144a029a4eaSThomas Richter unsigned int clock_base:1; /* TOD clock base set */ 145a029a4eaSThomas Richter unsigned int speed:1; /* CPU speed set */ 146a029a4eaSThomas Richter /* Measurement alerts */ 147a029a4eaSThomas Richter unsigned int mtda:1; /* Loss of MT ctr. data alert */ 148a029a4eaSThomas Richter unsigned int caca:1; /* Counter auth. change alert */ 149a029a4eaSThomas Richter unsigned int lcda:1; /* Loss of counter data alert */ 150a029a4eaSThomas Richter }; 151a029a4eaSThomas Richter unsigned long flags; /* 0-63 All indicators */ 152a029a4eaSThomas Richter }; 153a029a4eaSThomas Richter /* 8 - 15 */ 154a029a4eaSThomas Richter unsigned int cfvn:16; /* 64-79 Ctr First Version */ 155a029a4eaSThomas Richter unsigned int csvn:16; /* 80-95 Ctr Second Version */ 156a029a4eaSThomas Richter unsigned int cpu_speed:32; /* 96-127 CPU speed */ 157a029a4eaSThomas Richter /* 16 - 23 */ 158a029a4eaSThomas Richter unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 159a029a4eaSThomas Richter /* 24 - 55 */ 160a029a4eaSThomas Richter union { 161a029a4eaSThomas Richter struct { 162a029a4eaSThomas Richter unsigned long progusage1; 163a029a4eaSThomas Richter unsigned long progusage2; 164a029a4eaSThomas Richter unsigned long progusage3; 165a029a4eaSThomas Richter unsigned long tod_base; 166a029a4eaSThomas Richter }; 167a029a4eaSThomas Richter unsigned long progusage[4]; 168a029a4eaSThomas Richter }; 169a029a4eaSThomas Richter /* 56 - 63 */ 170a029a4eaSThomas Richter unsigned int mach_type:16; /* Machine type */ 171a029a4eaSThomas Richter unsigned int res1:16; /* Reserved */ 172a029a4eaSThomas Richter unsigned int res2:32; /* Reserved */ 173a029a4eaSThomas Richter }; 174a029a4eaSThomas Richter 175a029a4eaSThomas Richter /* Create the trailer data at the end of a page. */ 176a029a4eaSThomas Richter static void cfdiag_trailer(struct cf_trailer_entry *te) 177a029a4eaSThomas Richter { 178a029a4eaSThomas Richter struct cpuid cpuid; 179a029a4eaSThomas Richter 180*46c4d945SThomas Richter te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ 181*46c4d945SThomas Richter te->csvn = cpumf_ctr_info.csvn; 182a029a4eaSThomas Richter 183a029a4eaSThomas Richter get_cpu_id(&cpuid); /* Machine type */ 184a029a4eaSThomas Richter te->mach_type = cpuid.machine; 185a029a4eaSThomas Richter te->cpu_speed = cfdiag_cpu_speed; 186a029a4eaSThomas Richter if (te->cpu_speed) 187a029a4eaSThomas Richter te->speed = 1; 188a029a4eaSThomas Richter te->clock_base = 1; /* Save clock base */ 189a029a4eaSThomas Richter te->tod_base = tod_clock_base.tod; 190a029a4eaSThomas Richter te->timestamp = get_tod_clock_fast(); 191a029a4eaSThomas Richter } 192a029a4eaSThomas Richter 193345d2a4dSThomas Richter /* 194*46c4d945SThomas Richter * The number of counters per counter set varies between machine generations, 195*46c4d945SThomas Richter * but is constant when running on a particular machine generation. 196*46c4d945SThomas Richter * Determine each counter set size at device driver initialization and 197*46c4d945SThomas Richter * retrieve it later. 198345d2a4dSThomas Richter */ 199*46c4d945SThomas Richter static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; 200*46c4d945SThomas Richter static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) 201345d2a4dSThomas Richter { 202345d2a4dSThomas Richter size_t ctrset_size = 0; 203345d2a4dSThomas Richter 204345d2a4dSThomas Richter switch (ctrset) { 205345d2a4dSThomas Richter case CPUMF_CTR_SET_BASIC: 206*46c4d945SThomas Richter if (cpumf_ctr_info.cfvn >= 1) 207345d2a4dSThomas Richter ctrset_size = 6; 208345d2a4dSThomas Richter break; 209345d2a4dSThomas Richter case CPUMF_CTR_SET_USER: 210*46c4d945SThomas Richter if (cpumf_ctr_info.cfvn == 1) 211345d2a4dSThomas Richter ctrset_size = 6; 212*46c4d945SThomas Richter else if (cpumf_ctr_info.cfvn >= 3) 213345d2a4dSThomas Richter ctrset_size = 2; 214345d2a4dSThomas Richter break; 215345d2a4dSThomas Richter case CPUMF_CTR_SET_CRYPTO: 216*46c4d945SThomas Richter if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) 217345d2a4dSThomas Richter ctrset_size = 16; 218*46c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) 219345d2a4dSThomas Richter ctrset_size = 20; 220345d2a4dSThomas Richter break; 221345d2a4dSThomas Richter case CPUMF_CTR_SET_EXT: 222*46c4d945SThomas Richter if (cpumf_ctr_info.csvn == 1) 223345d2a4dSThomas Richter ctrset_size = 32; 224*46c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 2) 225345d2a4dSThomas Richter ctrset_size = 48; 226*46c4d945SThomas Richter else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) 227345d2a4dSThomas Richter ctrset_size = 128; 228*46c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) 229345d2a4dSThomas Richter ctrset_size = 160; 230345d2a4dSThomas Richter break; 231345d2a4dSThomas Richter case CPUMF_CTR_SET_MT_DIAG: 232*46c4d945SThomas Richter if (cpumf_ctr_info.csvn > 3) 233345d2a4dSThomas Richter ctrset_size = 48; 234345d2a4dSThomas Richter break; 235345d2a4dSThomas Richter case CPUMF_CTR_SET_MAX: 236345d2a4dSThomas Richter break; 237345d2a4dSThomas Richter } 238*46c4d945SThomas Richter cpumf_ctr_setsizes[ctrset] = ctrset_size; 239*46c4d945SThomas Richter } 240345d2a4dSThomas Richter 241*46c4d945SThomas Richter /* 242*46c4d945SThomas Richter * Return the maximum possible counter set size (in number of 8 byte counters) 243*46c4d945SThomas Richter * depending on type and model number. 244*46c4d945SThomas Richter */ 245*46c4d945SThomas Richter static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) 246*46c4d945SThomas Richter { 247*46c4d945SThomas Richter return cpumf_ctr_setsizes[ctrset]; 248345d2a4dSThomas Richter } 249345d2a4dSThomas Richter 250a029a4eaSThomas Richter /* Read a counter set. The counter set number determines the counter set and 251a029a4eaSThomas Richter * the CPUM-CF first and second version number determine the number of 252a029a4eaSThomas Richter * available counters in each counter set. 253a029a4eaSThomas Richter * Each counter set starts with header containing the counter set number and 254a029a4eaSThomas Richter * the number of eight byte counters. 255a029a4eaSThomas Richter * 256a029a4eaSThomas Richter * The functions returns the number of bytes occupied by this counter set 257a029a4eaSThomas Richter * including the header. 258a029a4eaSThomas Richter * If there is no counter in the counter set, this counter set is useless and 259a029a4eaSThomas Richter * zero is returned on this case. 260a029a4eaSThomas Richter * 261a029a4eaSThomas Richter * Note that the counter sets may not be enabled or active and the stcctm 262a029a4eaSThomas Richter * instruction might return error 3. Depending on error_ok value this is ok, 263a029a4eaSThomas Richter * for example when called from cpumf_pmu_start() call back function. 264a029a4eaSThomas Richter */ 265a029a4eaSThomas Richter static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 266a029a4eaSThomas Richter size_t room, bool error_ok) 267a029a4eaSThomas Richter { 268a029a4eaSThomas Richter size_t ctrset_size, need = 0; 269a029a4eaSThomas Richter int rc = 3; /* Assume write failure */ 270a029a4eaSThomas Richter 271a029a4eaSThomas Richter ctrdata->def = CF_DIAG_CTRSET_DEF; 272a029a4eaSThomas Richter ctrdata->set = ctrset; 273a029a4eaSThomas Richter ctrdata->res1 = 0; 274*46c4d945SThomas Richter ctrset_size = cpum_cf_read_setsize(ctrset); 275a029a4eaSThomas Richter 276a029a4eaSThomas Richter if (ctrset_size) { /* Save data */ 277a029a4eaSThomas Richter need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 278a029a4eaSThomas Richter if (need <= room) { 279a029a4eaSThomas Richter rc = ctr_stcctm(ctrset, ctrset_size, 280a029a4eaSThomas Richter (u64 *)(ctrdata + 1)); 281a029a4eaSThomas Richter } 282a029a4eaSThomas Richter if (rc != 3 || error_ok) 283a029a4eaSThomas Richter ctrdata->ctr = ctrset_size; 284a029a4eaSThomas Richter else 285a029a4eaSThomas Richter need = 0; 286a029a4eaSThomas Richter } 287a029a4eaSThomas Richter 288a029a4eaSThomas Richter return need; 289a029a4eaSThomas Richter } 290a029a4eaSThomas Richter 2915dddfaacSHeiko Carstens static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { 2925dddfaacSHeiko Carstens [CPUMF_CTR_SET_BASIC] = 0x02, 2935dddfaacSHeiko Carstens [CPUMF_CTR_SET_USER] = 0x04, 2945dddfaacSHeiko Carstens [CPUMF_CTR_SET_CRYPTO] = 0x08, 2955dddfaacSHeiko Carstens [CPUMF_CTR_SET_EXT] = 0x01, 2965dddfaacSHeiko Carstens [CPUMF_CTR_SET_MT_DIAG] = 0x20, 2975dddfaacSHeiko Carstens }; 2985dddfaacSHeiko Carstens 299a029a4eaSThomas Richter /* Read out all counter sets and save them in the provided data buffer. 300a029a4eaSThomas Richter * The last 64 byte host an artificial trailer entry. 301a029a4eaSThomas Richter */ 302a029a4eaSThomas Richter static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 303a029a4eaSThomas Richter bool error_ok) 304a029a4eaSThomas Richter { 305a029a4eaSThomas Richter struct cf_trailer_entry *trailer; 306a029a4eaSThomas Richter size_t offset = 0, done; 307a029a4eaSThomas Richter int i; 308a029a4eaSThomas Richter 309a029a4eaSThomas Richter memset(data, 0, sz); 310a029a4eaSThomas Richter sz -= sizeof(*trailer); /* Always room for trailer */ 311a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 312a029a4eaSThomas Richter struct cf_ctrset_entry *ctrdata = data + offset; 313a029a4eaSThomas Richter 314a029a4eaSThomas Richter if (!(auth & cpumf_ctr_ctl[i])) 315a029a4eaSThomas Richter continue; /* Counter set not authorized */ 316a029a4eaSThomas Richter 317a029a4eaSThomas Richter done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 318a029a4eaSThomas Richter offset += done; 319a029a4eaSThomas Richter } 320a029a4eaSThomas Richter trailer = data + offset; 321a029a4eaSThomas Richter cfdiag_trailer(trailer); 322a029a4eaSThomas Richter return offset + sizeof(*trailer); 323a029a4eaSThomas Richter } 324a029a4eaSThomas Richter 325a029a4eaSThomas Richter /* Calculate the difference for each counter in a counter set. */ 326a029a4eaSThomas Richter static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 327a029a4eaSThomas Richter { 328a029a4eaSThomas Richter for (; --counters >= 0; ++pstart, ++pstop) 329a029a4eaSThomas Richter if (*pstop >= *pstart) 330a029a4eaSThomas Richter *pstop -= *pstart; 331a029a4eaSThomas Richter else 332a029a4eaSThomas Richter *pstop = *pstart - *pstop + 1; 333a029a4eaSThomas Richter } 334a029a4eaSThomas Richter 335a029a4eaSThomas Richter /* Scan the counter sets and calculate the difference of each counter 336a029a4eaSThomas Richter * in each set. The result is the increment of each counter during the 337a029a4eaSThomas Richter * period the counter set has been activated. 338a029a4eaSThomas Richter * 339a029a4eaSThomas Richter * Return true on success. 340a029a4eaSThomas Richter */ 341a029a4eaSThomas Richter static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 342a029a4eaSThomas Richter { 343a029a4eaSThomas Richter struct cf_trailer_entry *trailer_start, *trailer_stop; 344a029a4eaSThomas Richter struct cf_ctrset_entry *ctrstart, *ctrstop; 345a029a4eaSThomas Richter size_t offset = 0; 346a029a4eaSThomas Richter 347a029a4eaSThomas Richter auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 348a029a4eaSThomas Richter do { 349a029a4eaSThomas Richter ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 350a029a4eaSThomas Richter ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 351a029a4eaSThomas Richter 352a029a4eaSThomas Richter if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 353a029a4eaSThomas Richter pr_err_once("cpum_cf_diag counter set compare error " 354a029a4eaSThomas Richter "in set %i\n", ctrstart->set); 355a029a4eaSThomas Richter return 0; 356a029a4eaSThomas Richter } 357a029a4eaSThomas Richter auth &= ~cpumf_ctr_ctl[ctrstart->set]; 358a029a4eaSThomas Richter if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 359a029a4eaSThomas Richter cfdiag_diffctrset((u64 *)(ctrstart + 1), 360a029a4eaSThomas Richter (u64 *)(ctrstop + 1), ctrstart->ctr); 361a029a4eaSThomas Richter offset += ctrstart->ctr * sizeof(u64) + 362a029a4eaSThomas Richter sizeof(*ctrstart); 363a029a4eaSThomas Richter } 364a029a4eaSThomas Richter } while (ctrstart->def && auth); 365a029a4eaSThomas Richter 366a029a4eaSThomas Richter /* Save time_stamp from start of event in stop's trailer */ 367a029a4eaSThomas Richter trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 368a029a4eaSThomas Richter trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 369a029a4eaSThomas Richter trailer_stop->progusage[0] = trailer_start->timestamp; 370a029a4eaSThomas Richter 371a029a4eaSThomas Richter return 1; 372a029a4eaSThomas Richter } 373212188a5SHendrik Brueckner 374ee699f32SHendrik Brueckner static enum cpumf_ctr_set get_counter_set(u64 event) 375212188a5SHendrik Brueckner { 376ee699f32SHendrik Brueckner int set = CPUMF_CTR_SET_MAX; 377212188a5SHendrik Brueckner 378212188a5SHendrik Brueckner if (event < 32) 379212188a5SHendrik Brueckner set = CPUMF_CTR_SET_BASIC; 380212188a5SHendrik Brueckner else if (event < 64) 381212188a5SHendrik Brueckner set = CPUMF_CTR_SET_USER; 382212188a5SHendrik Brueckner else if (event < 128) 383212188a5SHendrik Brueckner set = CPUMF_CTR_SET_CRYPTO; 38446a984ffSThomas Richter else if (event < 288) 385212188a5SHendrik Brueckner set = CPUMF_CTR_SET_EXT; 386ee699f32SHendrik Brueckner else if (event >= 448 && event < 496) 387ee699f32SHendrik Brueckner set = CPUMF_CTR_SET_MT_DIAG; 388212188a5SHendrik Brueckner 389212188a5SHendrik Brueckner return set; 390212188a5SHendrik Brueckner } 391212188a5SHendrik Brueckner 392a029a4eaSThomas Richter static int validate_ctr_version(const struct hw_perf_event *hwc, 393a029a4eaSThomas Richter enum cpumf_ctr_set set) 394212188a5SHendrik Brueckner { 395ee699f32SHendrik Brueckner u16 mtdiag_ctl; 396*46c4d945SThomas Richter int err = 0; 397212188a5SHendrik Brueckner 398212188a5SHendrik Brueckner /* check required version for counter sets */ 399a029a4eaSThomas Richter switch (set) { 400212188a5SHendrik Brueckner case CPUMF_CTR_SET_BASIC: 401212188a5SHendrik Brueckner case CPUMF_CTR_SET_USER: 402*46c4d945SThomas Richter if (cpumf_ctr_info.cfvn < 1) 403212188a5SHendrik Brueckner err = -EOPNOTSUPP; 404212188a5SHendrik Brueckner break; 405212188a5SHendrik Brueckner case CPUMF_CTR_SET_CRYPTO: 406*46c4d945SThomas Richter if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && 40746a984ffSThomas Richter hwc->config > 79) || 408*46c4d945SThomas Richter (cpumf_ctr_info.csvn >= 6 && hwc->config > 83)) 40946a984ffSThomas Richter err = -EOPNOTSUPP; 41046a984ffSThomas Richter break; 411212188a5SHendrik Brueckner case CPUMF_CTR_SET_EXT: 412*46c4d945SThomas Richter if (cpumf_ctr_info.csvn < 1) 413212188a5SHendrik Brueckner err = -EOPNOTSUPP; 414*46c4d945SThomas Richter if ((cpumf_ctr_info.csvn == 1 && hwc->config > 159) || 415*46c4d945SThomas Richter (cpumf_ctr_info.csvn == 2 && hwc->config > 175) || 416*46c4d945SThomas Richter (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 41746a984ffSThomas Richter && hwc->config > 255) || 418*46c4d945SThomas Richter (cpumf_ctr_info.csvn >= 6 && hwc->config > 287)) 419f47586b2SHendrik Brueckner err = -EOPNOTSUPP; 420212188a5SHendrik Brueckner break; 421ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MT_DIAG: 422*46c4d945SThomas Richter if (cpumf_ctr_info.csvn <= 3) 423ee699f32SHendrik Brueckner err = -EOPNOTSUPP; 424ee699f32SHendrik Brueckner /* 425ee699f32SHendrik Brueckner * MT-diagnostic counters are read-only. The counter set 426ee699f32SHendrik Brueckner * is automatically enabled and activated on all CPUs with 427ee699f32SHendrik Brueckner * multithreading (SMT). Deactivation of multithreading 428ee699f32SHendrik Brueckner * also disables the counter set. State changes are ignored 429ee699f32SHendrik Brueckner * by lcctl(). Because Linux controls SMT enablement through 430ee699f32SHendrik Brueckner * a kernel parameter only, the counter set is either disabled 431ee699f32SHendrik Brueckner * or enabled and active. 432ee699f32SHendrik Brueckner * 433ee699f32SHendrik Brueckner * Thus, the counters can only be used if SMT is on and the 434ee699f32SHendrik Brueckner * counter set is enabled and active. 435ee699f32SHendrik Brueckner */ 43630e145f8SHendrik Brueckner mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 437*46c4d945SThomas Richter if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && 438*46c4d945SThomas Richter (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && 439*46c4d945SThomas Richter (cpumf_ctr_info.act_ctl & mtdiag_ctl))) 440ee699f32SHendrik Brueckner err = -EOPNOTSUPP; 441ee699f32SHendrik Brueckner break; 442a029a4eaSThomas Richter case CPUMF_CTR_SET_MAX: 443a029a4eaSThomas Richter err = -EOPNOTSUPP; 444212188a5SHendrik Brueckner } 445212188a5SHendrik Brueckner 446212188a5SHendrik Brueckner return err; 447212188a5SHendrik Brueckner } 448212188a5SHendrik Brueckner 449212188a5SHendrik Brueckner static int validate_ctr_auth(const struct hw_perf_event *hwc) 450212188a5SHendrik Brueckner { 451*46c4d945SThomas Richter int err = -ENOENT; 452212188a5SHendrik Brueckner 45358f8e9daSHendrik Brueckner /* Check authorization for cpu counter sets. 45458f8e9daSHendrik Brueckner * If the particular CPU counter set is not authorized, 45558f8e9daSHendrik Brueckner * return with -ENOENT in order to fall back to other 45658f8e9daSHendrik Brueckner * PMUs that might suffice the event request. 45758f8e9daSHendrik Brueckner */ 458*46c4d945SThomas Richter if ((hwc->config_base & cpumf_ctr_info.auth_ctl)) 459*46c4d945SThomas Richter err = 0; 460212188a5SHendrik Brueckner 461212188a5SHendrik Brueckner return err; 462212188a5SHendrik Brueckner } 463212188a5SHendrik Brueckner 464212188a5SHendrik Brueckner /* 465212188a5SHendrik Brueckner * Change the CPUMF state to active. 466212188a5SHendrik Brueckner * Enable and activate the CPU-counter sets according 467212188a5SHendrik Brueckner * to the per-cpu control state. 468212188a5SHendrik Brueckner */ 469212188a5SHendrik Brueckner static void cpumf_pmu_enable(struct pmu *pmu) 470212188a5SHendrik Brueckner { 471f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 472212188a5SHendrik Brueckner int err; 473212188a5SHendrik Brueckner 474212188a5SHendrik Brueckner if (cpuhw->flags & PMU_F_ENABLED) 475212188a5SHendrik Brueckner return; 476212188a5SHendrik Brueckner 477a029a4eaSThomas Richter err = lcctl(cpuhw->state | cpuhw->dev_state); 478c01f2a5fSThomas Richter if (err) 479c01f2a5fSThomas Richter pr_err("Enabling the performance measuring unit failed with rc=%x\n", err); 480c01f2a5fSThomas Richter else 481212188a5SHendrik Brueckner cpuhw->flags |= PMU_F_ENABLED; 482212188a5SHendrik Brueckner } 483212188a5SHendrik Brueckner 484212188a5SHendrik Brueckner /* 485212188a5SHendrik Brueckner * Change the CPUMF state to inactive. 486212188a5SHendrik Brueckner * Disable and enable (inactive) the CPU-counter sets according 487212188a5SHendrik Brueckner * to the per-cpu control state. 488212188a5SHendrik Brueckner */ 489212188a5SHendrik Brueckner static void cpumf_pmu_disable(struct pmu *pmu) 490212188a5SHendrik Brueckner { 491f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 492212188a5SHendrik Brueckner int err; 493212188a5SHendrik Brueckner u64 inactive; 494212188a5SHendrik Brueckner 495212188a5SHendrik Brueckner if (!(cpuhw->flags & PMU_F_ENABLED)) 496212188a5SHendrik Brueckner return; 497212188a5SHendrik Brueckner 498212188a5SHendrik Brueckner inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 499a029a4eaSThomas Richter inactive |= cpuhw->dev_state; 500212188a5SHendrik Brueckner err = lcctl(inactive); 501c01f2a5fSThomas Richter if (err) 502c01f2a5fSThomas Richter pr_err("Disabling the performance measuring unit failed with rc=%x\n", err); 503c01f2a5fSThomas Richter else 504212188a5SHendrik Brueckner cpuhw->flags &= ~PMU_F_ENABLED; 505212188a5SHendrik Brueckner } 506212188a5SHendrik Brueckner 5070d5f0dc8SThomas Richter #define PMC_INIT 0UL 5080d5f0dc8SThomas Richter #define PMC_RELEASE 1UL 5091e99c242SThomas Richter 5101e99c242SThomas Richter static void cpum_cf_setup_cpu(void *flags) 5111e99c242SThomas Richter { 5121e99c242SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 5131e99c242SThomas Richter 5140d5f0dc8SThomas Richter switch ((unsigned long)flags) { 5151e99c242SThomas Richter case PMC_INIT: 5161e99c242SThomas Richter cpuhw->flags |= PMU_F_RESERVED; 5171e99c242SThomas Richter break; 5181e99c242SThomas Richter 5191e99c242SThomas Richter case PMC_RELEASE: 5201e99c242SThomas Richter cpuhw->flags &= ~PMU_F_RESERVED; 5211e99c242SThomas Richter break; 5221e99c242SThomas Richter } 5231e99c242SThomas Richter 5241e99c242SThomas Richter /* Disable CPU counter sets */ 5251e99c242SThomas Richter lcctl(0); 5261e99c242SThomas Richter debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", 5271e99c242SThomas Richter __func__, *(int *)flags, cpuhw->flags, 5281e99c242SThomas Richter cpuhw->state); 5291e99c242SThomas Richter } 5301e99c242SThomas Richter 5311e99c242SThomas Richter /* Initialize the CPU-measurement counter facility */ 5321e99c242SThomas Richter static int __kernel_cpumcf_begin(void) 5331e99c242SThomas Richter { 5340d5f0dc8SThomas Richter on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); 5351e99c242SThomas Richter irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); 5361e99c242SThomas Richter 5371e99c242SThomas Richter return 0; 5381e99c242SThomas Richter } 5391e99c242SThomas Richter 5401e99c242SThomas Richter /* Release the CPU-measurement counter facility */ 5411e99c242SThomas Richter static void __kernel_cpumcf_end(void) 5421e99c242SThomas Richter { 5430d5f0dc8SThomas Richter on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); 5441e99c242SThomas Richter irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); 5451e99c242SThomas Richter } 546212188a5SHendrik Brueckner 547212188a5SHendrik Brueckner /* Number of perf events counting hardware events */ 548212188a5SHendrik Brueckner static atomic_t num_events = ATOMIC_INIT(0); 549212188a5SHendrik Brueckner /* Used to avoid races in calling reserve/release_cpumf_hardware */ 550212188a5SHendrik Brueckner static DEFINE_MUTEX(pmc_reserve_mutex); 551212188a5SHendrik Brueckner 552212188a5SHendrik Brueckner /* Release the PMU if event is the last perf event */ 553212188a5SHendrik Brueckner static void hw_perf_event_destroy(struct perf_event *event) 554212188a5SHendrik Brueckner { 555212188a5SHendrik Brueckner mutex_lock(&pmc_reserve_mutex); 556212188a5SHendrik Brueckner if (atomic_dec_return(&num_events) == 0) 5573d33345aSHendrik Brueckner __kernel_cpumcf_end(); 558212188a5SHendrik Brueckner mutex_unlock(&pmc_reserve_mutex); 559212188a5SHendrik Brueckner } 560212188a5SHendrik Brueckner 561212188a5SHendrik Brueckner /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 562212188a5SHendrik Brueckner static const int cpumf_generic_events_basic[] = { 563212188a5SHendrik Brueckner [PERF_COUNT_HW_CPU_CYCLES] = 0, 564212188a5SHendrik Brueckner [PERF_COUNT_HW_INSTRUCTIONS] = 1, 565212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 566212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_MISSES] = -1, 567212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 568212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_MISSES] = -1, 569212188a5SHendrik Brueckner [PERF_COUNT_HW_BUS_CYCLES] = -1, 570212188a5SHendrik Brueckner }; 571212188a5SHendrik Brueckner /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 572212188a5SHendrik Brueckner static const int cpumf_generic_events_user[] = { 573212188a5SHendrik Brueckner [PERF_COUNT_HW_CPU_CYCLES] = 32, 574212188a5SHendrik Brueckner [PERF_COUNT_HW_INSTRUCTIONS] = 33, 575212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 576212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_MISSES] = -1, 577212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 578212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_MISSES] = -1, 579212188a5SHendrik Brueckner [PERF_COUNT_HW_BUS_CYCLES] = -1, 580212188a5SHendrik Brueckner }; 581212188a5SHendrik Brueckner 582a029a4eaSThomas Richter static void cpumf_hw_inuse(void) 583a029a4eaSThomas Richter { 584a029a4eaSThomas Richter mutex_lock(&pmc_reserve_mutex); 585a029a4eaSThomas Richter if (atomic_inc_return(&num_events) == 1) 586a029a4eaSThomas Richter __kernel_cpumcf_begin(); 587a029a4eaSThomas Richter mutex_unlock(&pmc_reserve_mutex); 588a029a4eaSThomas Richter } 589a029a4eaSThomas Richter 59091d5364dSThomas Richter static int is_userspace_event(u64 ev) 59191d5364dSThomas Richter { 59291d5364dSThomas Richter return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 59391d5364dSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; 59491d5364dSThomas Richter } 59591d5364dSThomas Richter 5966a82e23fSThomas Richter static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 597212188a5SHendrik Brueckner { 598212188a5SHendrik Brueckner struct perf_event_attr *attr = &event->attr; 599212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 600ee699f32SHendrik Brueckner enum cpumf_ctr_set set; 60147b74785SThomas Richter int err = 0; 602212188a5SHendrik Brueckner u64 ev; 603212188a5SHendrik Brueckner 6046a82e23fSThomas Richter switch (type) { 605212188a5SHendrik Brueckner case PERF_TYPE_RAW: 606212188a5SHendrik Brueckner /* Raw events are used to access counters directly, 607212188a5SHendrik Brueckner * hence do not permit excludes */ 608212188a5SHendrik Brueckner if (attr->exclude_kernel || attr->exclude_user || 609212188a5SHendrik Brueckner attr->exclude_hv) 610212188a5SHendrik Brueckner return -EOPNOTSUPP; 611212188a5SHendrik Brueckner ev = attr->config; 612212188a5SHendrik Brueckner break; 613212188a5SHendrik Brueckner 614212188a5SHendrik Brueckner case PERF_TYPE_HARDWARE: 615613a41b0SThomas Richter if (is_sampling_event(event)) /* No sampling support */ 616613a41b0SThomas Richter return -ENOENT; 617212188a5SHendrik Brueckner ev = attr->config; 618212188a5SHendrik Brueckner if (!attr->exclude_user && attr->exclude_kernel) { 61991d5364dSThomas Richter /* 62091d5364dSThomas Richter * Count user space (problem-state) only 62191d5364dSThomas Richter * Handle events 32 and 33 as 0:u and 1:u 62291d5364dSThomas Richter */ 62391d5364dSThomas Richter if (!is_userspace_event(ev)) { 624212188a5SHendrik Brueckner if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 625212188a5SHendrik Brueckner return -EOPNOTSUPP; 626212188a5SHendrik Brueckner ev = cpumf_generic_events_user[ev]; 62791d5364dSThomas Richter } 628212188a5SHendrik Brueckner } else if (!attr->exclude_kernel && attr->exclude_user) { 62991d5364dSThomas Richter /* No support for kernel space counters only */ 630212188a5SHendrik Brueckner return -EOPNOTSUPP; 63191d5364dSThomas Richter } else { 63291d5364dSThomas Richter /* Count user and kernel space, incl. events 32 + 33 */ 63391d5364dSThomas Richter if (!is_userspace_event(ev)) { 634212188a5SHendrik Brueckner if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 635212188a5SHendrik Brueckner return -EOPNOTSUPP; 636212188a5SHendrik Brueckner ev = cpumf_generic_events_basic[ev]; 637212188a5SHendrik Brueckner } 63891d5364dSThomas Richter } 639212188a5SHendrik Brueckner break; 640212188a5SHendrik Brueckner 641212188a5SHendrik Brueckner default: 642212188a5SHendrik Brueckner return -ENOENT; 643212188a5SHendrik Brueckner } 644212188a5SHendrik Brueckner 645212188a5SHendrik Brueckner if (ev == -1) 646212188a5SHendrik Brueckner return -ENOENT; 647212188a5SHendrik Brueckner 64820ba46daSHendrik Brueckner if (ev > PERF_CPUM_CF_MAX_CTR) 6490bb2ae1bSThomas Richter return -ENOENT; 650212188a5SHendrik Brueckner 651ee699f32SHendrik Brueckner /* Obtain the counter set to which the specified counter belongs */ 652ee699f32SHendrik Brueckner set = get_counter_set(ev); 653ee699f32SHendrik Brueckner switch (set) { 654ee699f32SHendrik Brueckner case CPUMF_CTR_SET_BASIC: 655ee699f32SHendrik Brueckner case CPUMF_CTR_SET_USER: 656ee699f32SHendrik Brueckner case CPUMF_CTR_SET_CRYPTO: 657ee699f32SHendrik Brueckner case CPUMF_CTR_SET_EXT: 658ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MT_DIAG: 659ee699f32SHendrik Brueckner /* 660ee699f32SHendrik Brueckner * Use the hardware perf event structure to store the 661ee699f32SHendrik Brueckner * counter number in the 'config' member and the counter 662a029a4eaSThomas Richter * set number in the 'config_base' as bit mask. 663a029a4eaSThomas Richter * It is later used to enable/disable the counter(s). 664212188a5SHendrik Brueckner */ 665212188a5SHendrik Brueckner hwc->config = ev; 666a029a4eaSThomas Richter hwc->config_base = cpumf_ctr_ctl[set]; 667ee699f32SHendrik Brueckner break; 668ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MAX: 669ee699f32SHendrik Brueckner /* The counter could not be associated to a counter set */ 670ee699f32SHendrik Brueckner return -EINVAL; 6711c0a9c79SJiapeng Chong } 672212188a5SHendrik Brueckner 673212188a5SHendrik Brueckner /* Initialize for using the CPU-measurement counter facility */ 674a029a4eaSThomas Richter cpumf_hw_inuse(); 675212188a5SHendrik Brueckner event->destroy = hw_perf_event_destroy; 676212188a5SHendrik Brueckner 677212188a5SHendrik Brueckner /* Finally, validate version and authorization of the counter set */ 678212188a5SHendrik Brueckner err = validate_ctr_auth(hwc); 679212188a5SHendrik Brueckner if (!err) 680a029a4eaSThomas Richter err = validate_ctr_version(hwc, set); 681212188a5SHendrik Brueckner 682212188a5SHendrik Brueckner return err; 683212188a5SHendrik Brueckner } 684212188a5SHendrik Brueckner 685be857b7fSThomas Richter /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different 686be857b7fSThomas Richter * attribute::type values: 687be857b7fSThomas Richter * - PERF_TYPE_HARDWARE: 688be857b7fSThomas Richter * - pmu->type: 689be857b7fSThomas Richter * Handle both type of invocations identical. They address the same hardware. 690be857b7fSThomas Richter * The result is different when event modifiers exclude_kernel and/or 691be857b7fSThomas Richter * exclude_user are also set. 692be857b7fSThomas Richter */ 693be857b7fSThomas Richter static int cpumf_pmu_event_type(struct perf_event *event) 694be857b7fSThomas Richter { 695be857b7fSThomas Richter u64 ev = event->attr.config; 696be857b7fSThomas Richter 697be857b7fSThomas Richter if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || 698be857b7fSThomas Richter cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || 699be857b7fSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 700be857b7fSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) 701be857b7fSThomas Richter return PERF_TYPE_HARDWARE; 702be857b7fSThomas Richter return PERF_TYPE_RAW; 703be857b7fSThomas Richter } 704be857b7fSThomas Richter 705212188a5SHendrik Brueckner static int cpumf_pmu_event_init(struct perf_event *event) 706212188a5SHendrik Brueckner { 7076a82e23fSThomas Richter unsigned int type = event->attr.type; 708212188a5SHendrik Brueckner int err; 709212188a5SHendrik Brueckner 7106a82e23fSThomas Richter if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 7116a82e23fSThomas Richter err = __hw_perf_event_init(event, type); 7126a82e23fSThomas Richter else if (event->pmu->type == type) 7136a82e23fSThomas Richter /* Registered as unknown PMU */ 714be857b7fSThomas Richter err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); 7156a82e23fSThomas Richter else 716212188a5SHendrik Brueckner return -ENOENT; 717212188a5SHendrik Brueckner 718212188a5SHendrik Brueckner if (unlikely(err) && event->destroy) 719212188a5SHendrik Brueckner event->destroy(event); 720212188a5SHendrik Brueckner 721212188a5SHendrik Brueckner return err; 722212188a5SHendrik Brueckner } 723212188a5SHendrik Brueckner 724212188a5SHendrik Brueckner static int hw_perf_event_reset(struct perf_event *event) 725212188a5SHendrik Brueckner { 726212188a5SHendrik Brueckner u64 prev, new; 727212188a5SHendrik Brueckner int err; 728212188a5SHendrik Brueckner 729212188a5SHendrik Brueckner do { 730212188a5SHendrik Brueckner prev = local64_read(&event->hw.prev_count); 731212188a5SHendrik Brueckner err = ecctr(event->hw.config, &new); 732212188a5SHendrik Brueckner if (err) { 733212188a5SHendrik Brueckner if (err != 3) 734212188a5SHendrik Brueckner break; 735212188a5SHendrik Brueckner /* The counter is not (yet) available. This 736212188a5SHendrik Brueckner * might happen if the counter set to which 737212188a5SHendrik Brueckner * this counter belongs is in the disabled 738212188a5SHendrik Brueckner * state. 739212188a5SHendrik Brueckner */ 740212188a5SHendrik Brueckner new = 0; 741212188a5SHendrik Brueckner } 742212188a5SHendrik Brueckner } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 743212188a5SHendrik Brueckner 744212188a5SHendrik Brueckner return err; 745212188a5SHendrik Brueckner } 746212188a5SHendrik Brueckner 747485527baSHendrik Brueckner static void hw_perf_event_update(struct perf_event *event) 748212188a5SHendrik Brueckner { 749212188a5SHendrik Brueckner u64 prev, new, delta; 750212188a5SHendrik Brueckner int err; 751212188a5SHendrik Brueckner 752212188a5SHendrik Brueckner do { 753212188a5SHendrik Brueckner prev = local64_read(&event->hw.prev_count); 754212188a5SHendrik Brueckner err = ecctr(event->hw.config, &new); 755212188a5SHendrik Brueckner if (err) 756485527baSHendrik Brueckner return; 757212188a5SHendrik Brueckner } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 758212188a5SHendrik Brueckner 759212188a5SHendrik Brueckner delta = (prev <= new) ? new - prev 760212188a5SHendrik Brueckner : (-1ULL - prev) + new + 1; /* overflow */ 761212188a5SHendrik Brueckner local64_add(delta, &event->count); 762212188a5SHendrik Brueckner } 763212188a5SHendrik Brueckner 764212188a5SHendrik Brueckner static void cpumf_pmu_read(struct perf_event *event) 765212188a5SHendrik Brueckner { 766212188a5SHendrik Brueckner if (event->hw.state & PERF_HES_STOPPED) 767212188a5SHendrik Brueckner return; 768212188a5SHendrik Brueckner 769212188a5SHendrik Brueckner hw_perf_event_update(event); 770212188a5SHendrik Brueckner } 771212188a5SHendrik Brueckner 772212188a5SHendrik Brueckner static void cpumf_pmu_start(struct perf_event *event, int flags) 773212188a5SHendrik Brueckner { 774f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 775212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 776a029a4eaSThomas Richter int i; 777212188a5SHendrik Brueckner 77815e5b53fSThomas Richter if (!(hwc->state & PERF_HES_STOPPED)) 779212188a5SHendrik Brueckner return; 780212188a5SHendrik Brueckner 781212188a5SHendrik Brueckner hwc->state = 0; 782212188a5SHendrik Brueckner 783212188a5SHendrik Brueckner /* (Re-)enable and activate the counter set */ 784212188a5SHendrik Brueckner ctr_set_enable(&cpuhw->state, hwc->config_base); 785212188a5SHendrik Brueckner ctr_set_start(&cpuhw->state, hwc->config_base); 786212188a5SHendrik Brueckner 787212188a5SHendrik Brueckner /* The counter set to which this counter belongs can be already active. 788212188a5SHendrik Brueckner * Because all counters in a set are active, the event->hw.prev_count 789212188a5SHendrik Brueckner * needs to be synchronized. At this point, the counter set can be in 790212188a5SHendrik Brueckner * the inactive or disabled state. 791212188a5SHendrik Brueckner */ 792a029a4eaSThomas Richter if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 793a029a4eaSThomas Richter cpuhw->usedss = cfdiag_getctr(cpuhw->start, 794a029a4eaSThomas Richter sizeof(cpuhw->start), 795a029a4eaSThomas Richter hwc->config_base, true); 796a029a4eaSThomas Richter } else { 797212188a5SHendrik Brueckner hw_perf_event_reset(event); 798a029a4eaSThomas Richter } 799212188a5SHendrik Brueckner 800a029a4eaSThomas Richter /* Increment refcount for counter sets */ 801a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 802a029a4eaSThomas Richter if ((hwc->config_base & cpumf_ctr_ctl[i])) 803a029a4eaSThomas Richter atomic_inc(&cpuhw->ctr_set[i]); 804a029a4eaSThomas Richter } 805a029a4eaSThomas Richter 806a029a4eaSThomas Richter /* Create perf event sample with the counter sets as raw data. The sample 807a029a4eaSThomas Richter * is then pushed to the event subsystem and the function checks for 808a029a4eaSThomas Richter * possible event overflows. If an event overflow occurs, the PMU is 809a029a4eaSThomas Richter * stopped. 810a029a4eaSThomas Richter * 811a029a4eaSThomas Richter * Return non-zero if an event overflow occurred. 812a029a4eaSThomas Richter */ 813a029a4eaSThomas Richter static int cfdiag_push_sample(struct perf_event *event, 814a029a4eaSThomas Richter struct cpu_cf_events *cpuhw) 815a029a4eaSThomas Richter { 816a029a4eaSThomas Richter struct perf_sample_data data; 817a029a4eaSThomas Richter struct perf_raw_record raw; 818a029a4eaSThomas Richter struct pt_regs regs; 819a029a4eaSThomas Richter int overflow; 820a029a4eaSThomas Richter 821a029a4eaSThomas Richter /* Setup perf sample */ 822a029a4eaSThomas Richter perf_sample_data_init(&data, 0, event->hw.last_period); 823a029a4eaSThomas Richter memset(®s, 0, sizeof(regs)); 824a029a4eaSThomas Richter memset(&raw, 0, sizeof(raw)); 825a029a4eaSThomas Richter 826a029a4eaSThomas Richter if (event->attr.sample_type & PERF_SAMPLE_CPU) 827a029a4eaSThomas Richter data.cpu_entry.cpu = event->cpu; 828a029a4eaSThomas Richter if (event->attr.sample_type & PERF_SAMPLE_RAW) { 829a029a4eaSThomas Richter raw.frag.size = cpuhw->usedss; 830a029a4eaSThomas Richter raw.frag.data = cpuhw->stop; 8310a9081cfSNamhyung Kim perf_sample_save_raw_data(&data, &raw); 832a029a4eaSThomas Richter } 833a029a4eaSThomas Richter 834a029a4eaSThomas Richter overflow = perf_event_overflow(event, &data, ®s); 835a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 3, 836a029a4eaSThomas Richter "%s event %#llx sample_type %#llx raw %d ov %d\n", 837a029a4eaSThomas Richter __func__, event->hw.config, 838a029a4eaSThomas Richter event->attr.sample_type, raw.size, overflow); 839a029a4eaSThomas Richter if (overflow) 840a029a4eaSThomas Richter event->pmu->stop(event, 0); 841a029a4eaSThomas Richter 842a029a4eaSThomas Richter perf_event_update_userpage(event); 843a029a4eaSThomas Richter return overflow; 844212188a5SHendrik Brueckner } 845212188a5SHendrik Brueckner 846212188a5SHendrik Brueckner static void cpumf_pmu_stop(struct perf_event *event, int flags) 847212188a5SHendrik Brueckner { 848f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 849212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 850a029a4eaSThomas Richter int i; 851212188a5SHendrik Brueckner 852212188a5SHendrik Brueckner if (!(hwc->state & PERF_HES_STOPPED)) { 853212188a5SHendrik Brueckner /* Decrement reference count for this counter set and if this 854212188a5SHendrik Brueckner * is the last used counter in the set, clear activation 855212188a5SHendrik Brueckner * control and set the counter set state to inactive. 856212188a5SHendrik Brueckner */ 857a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 858a029a4eaSThomas Richter if (!(hwc->config_base & cpumf_ctr_ctl[i])) 859a029a4eaSThomas Richter continue; 860a029a4eaSThomas Richter if (!atomic_dec_return(&cpuhw->ctr_set[i])) 861a029a4eaSThomas Richter ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 862a029a4eaSThomas Richter } 8630cceeab5SThomas Richter hwc->state |= PERF_HES_STOPPED; 864212188a5SHendrik Brueckner } 865212188a5SHendrik Brueckner 866212188a5SHendrik Brueckner if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 867a029a4eaSThomas Richter if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 868a029a4eaSThomas Richter local64_inc(&event->count); 869a029a4eaSThomas Richter cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 870a029a4eaSThomas Richter sizeof(cpuhw->stop), 871a029a4eaSThomas Richter event->hw.config_base, 872a029a4eaSThomas Richter false); 873a029a4eaSThomas Richter if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 874a029a4eaSThomas Richter cfdiag_push_sample(event, cpuhw); 8759d48c7afSThomas Richter } else if (cpuhw->flags & PMU_F_RESERVED) { 8769d48c7afSThomas Richter /* Only update when PMU not hotplugged off */ 877212188a5SHendrik Brueckner hw_perf_event_update(event); 8789d48c7afSThomas Richter } 8790cceeab5SThomas Richter hwc->state |= PERF_HES_UPTODATE; 880212188a5SHendrik Brueckner } 881212188a5SHendrik Brueckner } 882212188a5SHendrik Brueckner 883212188a5SHendrik Brueckner static int cpumf_pmu_add(struct perf_event *event, int flags) 884212188a5SHendrik Brueckner { 885f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 886212188a5SHendrik Brueckner 887212188a5SHendrik Brueckner ctr_set_enable(&cpuhw->state, event->hw.config_base); 888212188a5SHendrik Brueckner event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 889212188a5SHendrik Brueckner 890212188a5SHendrik Brueckner if (flags & PERF_EF_START) 891212188a5SHendrik Brueckner cpumf_pmu_start(event, PERF_EF_RELOAD); 892212188a5SHendrik Brueckner 893212188a5SHendrik Brueckner return 0; 894212188a5SHendrik Brueckner } 895212188a5SHendrik Brueckner 896212188a5SHendrik Brueckner static void cpumf_pmu_del(struct perf_event *event, int flags) 897212188a5SHendrik Brueckner { 898f1c0b831SHendrik Brueckner struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 899a029a4eaSThomas Richter int i; 900212188a5SHendrik Brueckner 901212188a5SHendrik Brueckner cpumf_pmu_stop(event, PERF_EF_UPDATE); 902212188a5SHendrik Brueckner 903212188a5SHendrik Brueckner /* Check if any counter in the counter set is still used. If not used, 904212188a5SHendrik Brueckner * change the counter set to the disabled state. This also clears the 905212188a5SHendrik Brueckner * content of all counters in the set. 906212188a5SHendrik Brueckner * 907212188a5SHendrik Brueckner * When a new perf event has been added but not yet started, this can 908212188a5SHendrik Brueckner * clear enable control and resets all counters in a set. Therefore, 909212188a5SHendrik Brueckner * cpumf_pmu_start() always has to reenable a counter set. 910212188a5SHendrik Brueckner */ 911a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 912a029a4eaSThomas Richter if (!atomic_read(&cpuhw->ctr_set[i])) 913a029a4eaSThomas Richter ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 914212188a5SHendrik Brueckner } 915212188a5SHendrik Brueckner 916212188a5SHendrik Brueckner /* Performance monitoring unit for s390x */ 917212188a5SHendrik Brueckner static struct pmu cpumf_pmu = { 9189254e70cSHendrik Brueckner .task_ctx_nr = perf_sw_context, 9199254e70cSHendrik Brueckner .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 920212188a5SHendrik Brueckner .pmu_enable = cpumf_pmu_enable, 921212188a5SHendrik Brueckner .pmu_disable = cpumf_pmu_disable, 922212188a5SHendrik Brueckner .event_init = cpumf_pmu_event_init, 923212188a5SHendrik Brueckner .add = cpumf_pmu_add, 924212188a5SHendrik Brueckner .del = cpumf_pmu_del, 925212188a5SHendrik Brueckner .start = cpumf_pmu_start, 926212188a5SHendrik Brueckner .stop = cpumf_pmu_stop, 927212188a5SHendrik Brueckner .read = cpumf_pmu_read, 928212188a5SHendrik Brueckner }; 929212188a5SHendrik Brueckner 9300d5f0dc8SThomas Richter static int cpum_cf_setup(unsigned int cpu, unsigned long flags) 9311e99c242SThomas Richter { 9321e99c242SThomas Richter local_irq_disable(); 9330d5f0dc8SThomas Richter cpum_cf_setup_cpu((void *)flags); 9341e99c242SThomas Richter local_irq_enable(); 9351e99c242SThomas Richter return 0; 9361e99c242SThomas Richter } 9371e99c242SThomas Richter 9381e99c242SThomas Richter static int cfset_online_cpu(unsigned int cpu); 9391e99c242SThomas Richter static int cpum_cf_online_cpu(unsigned int cpu) 9401e99c242SThomas Richter { 9411e99c242SThomas Richter debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, 9421e99c242SThomas Richter cpu, in_interrupt()); 9431e99c242SThomas Richter cpum_cf_setup(cpu, PMC_INIT); 9441e99c242SThomas Richter return cfset_online_cpu(cpu); 9451e99c242SThomas Richter } 9461e99c242SThomas Richter 9471e99c242SThomas Richter static int cfset_offline_cpu(unsigned int cpu); 9481e99c242SThomas Richter static int cpum_cf_offline_cpu(unsigned int cpu) 9491e99c242SThomas Richter { 9501e99c242SThomas Richter debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); 9511e99c242SThomas Richter cfset_offline_cpu(cpu); 9521e99c242SThomas Richter return cpum_cf_setup(cpu, PMC_RELEASE); 9531e99c242SThomas Richter } 9541e99c242SThomas Richter 9557a8f09acSThomas Richter /* Return true if store counter set multiple instruction is available */ 9567a8f09acSThomas Richter static inline int stccm_avail(void) 9577a8f09acSThomas Richter { 9587a8f09acSThomas Richter return test_facility(142); 9597a8f09acSThomas Richter } 9607a8f09acSThomas Richter 9611e99c242SThomas Richter /* CPU-measurement alerts for the counter facility */ 9621e99c242SThomas Richter static void cpumf_measurement_alert(struct ext_code ext_code, 9631e99c242SThomas Richter unsigned int alert, unsigned long unused) 9641e99c242SThomas Richter { 9651e99c242SThomas Richter struct cpu_cf_events *cpuhw; 9661e99c242SThomas Richter 9671e99c242SThomas Richter if (!(alert & CPU_MF_INT_CF_MASK)) 9681e99c242SThomas Richter return; 9691e99c242SThomas Richter 9701e99c242SThomas Richter inc_irq_stat(IRQEXT_CMC); 9711e99c242SThomas Richter cpuhw = this_cpu_ptr(&cpu_cf_events); 9721e99c242SThomas Richter 9731e99c242SThomas Richter /* 9741e99c242SThomas Richter * Measurement alerts are shared and might happen when the PMU 9751e99c242SThomas Richter * is not reserved. Ignore these alerts in this case. 9761e99c242SThomas Richter */ 9771e99c242SThomas Richter if (!(cpuhw->flags & PMU_F_RESERVED)) 9781e99c242SThomas Richter return; 9791e99c242SThomas Richter 9801e99c242SThomas Richter /* counter authorization change alert */ 9811e99c242SThomas Richter if (alert & CPU_MF_INT_CF_CACA) 982*46c4d945SThomas Richter qctri(&cpumf_ctr_info); 9831e99c242SThomas Richter 9841e99c242SThomas Richter /* loss of counter data alert */ 9851e99c242SThomas Richter if (alert & CPU_MF_INT_CF_LCDA) 9861e99c242SThomas Richter pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); 9871e99c242SThomas Richter 9881e99c242SThomas Richter /* loss of MT counter data alert */ 9891e99c242SThomas Richter if (alert & CPU_MF_INT_CF_MTDA) 9901e99c242SThomas Richter pr_warn("CPU[%i] MT counter data was lost\n", 9911e99c242SThomas Richter smp_processor_id()); 9921e99c242SThomas Richter } 9931e99c242SThomas Richter 994a029a4eaSThomas Richter static int cfset_init(void); 995212188a5SHendrik Brueckner static int __init cpumf_pmu_init(void) 996212188a5SHendrik Brueckner { 997212188a5SHendrik Brueckner int rc; 998212188a5SHendrik Brueckner 999*46c4d945SThomas Richter /* Extract counter measurement facility information */ 1000*46c4d945SThomas Richter if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) 1001212188a5SHendrik Brueckner return -ENODEV; 1002212188a5SHendrik Brueckner 1003*46c4d945SThomas Richter /* Determine and store counter set sizes for later reference */ 1004*46c4d945SThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1005*46c4d945SThomas Richter cpum_cf_make_setsize(rc); 1006*46c4d945SThomas Richter 10071e99c242SThomas Richter /* 10081e99c242SThomas Richter * Clear bit 15 of cr0 to unauthorize problem-state to 10091e99c242SThomas Richter * extract measurement counters 10101e99c242SThomas Richter */ 10111e99c242SThomas Richter ctl_clear_bit(0, 48); 10121e99c242SThomas Richter 10131e99c242SThomas Richter /* register handler for measurement-alert interruptions */ 10141e99c242SThomas Richter rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, 10151e99c242SThomas Richter cpumf_measurement_alert); 10161e99c242SThomas Richter if (rc) { 10171e99c242SThomas Richter pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc); 10181e99c242SThomas Richter return rc; 10191e99c242SThomas Richter } 10201e99c242SThomas Richter 1021a029a4eaSThomas Richter /* Setup s390dbf facility */ 1022a029a4eaSThomas Richter cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 1023a029a4eaSThomas Richter if (!cf_dbg) { 1024a029a4eaSThomas Richter pr_err("Registration of s390dbf(cpum_cf) failed\n"); 10251e99c242SThomas Richter rc = -ENOMEM; 10261e99c242SThomas Richter goto out1; 10277d244643Skernel test robot } 1028a029a4eaSThomas Richter debug_register_view(cf_dbg, &debug_sprintf_view); 1029a029a4eaSThomas Richter 1030c7168325SHendrik Brueckner cpumf_pmu.attr_groups = cpumf_cf_event_group(); 10316a82e23fSThomas Richter rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 1032a029a4eaSThomas Richter if (rc) { 1033212188a5SHendrik Brueckner pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 10341e99c242SThomas Richter goto out2; 1035a029a4eaSThomas Richter } else if (stccm_avail()) { /* Setup counter set device */ 1036a029a4eaSThomas Richter cfset_init(); 1037a029a4eaSThomas Richter } 10381e99c242SThomas Richter 10391e99c242SThomas Richter rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, 10401e99c242SThomas Richter "perf/s390/cf:online", 10411e99c242SThomas Richter cpum_cf_online_cpu, cpum_cf_offline_cpu); 10421e99c242SThomas Richter return rc; 10431e99c242SThomas Richter 10441e99c242SThomas Richter out2: 10451e99c242SThomas Richter debug_unregister_view(cf_dbg, &debug_sprintf_view); 10461e99c242SThomas Richter debug_unregister(cf_dbg); 10471e99c242SThomas Richter out1: 10481e99c242SThomas Richter unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); 1049212188a5SHendrik Brueckner return rc; 1050212188a5SHendrik Brueckner } 1051a029a4eaSThomas Richter 1052a029a4eaSThomas Richter /* Support for the CPU Measurement Facility counter set extraction using 1053a029a4eaSThomas Richter * device /dev/hwctr. This allows user space programs to extract complete 1054a029a4eaSThomas Richter * counter set via normal file operations. 1055a029a4eaSThomas Richter */ 1056a029a4eaSThomas Richter 105745338031SThomas Richter static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ 1058a029a4eaSThomas Richter static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ 1059a029a4eaSThomas Richter struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 1060a029a4eaSThomas Richter unsigned int sets; /* Counter set bit mask */ 1061a029a4eaSThomas Richter atomic_t cpus_ack; /* # CPUs successfully executed func */ 1062a029a4eaSThomas Richter }; 1063a029a4eaSThomas Richter 106445338031SThomas Richter static struct cfset_session { /* CPUs and counter set bit mask */ 106545338031SThomas Richter struct list_head head; /* Head of list of active processes */ 106645338031SThomas Richter } cfset_session = { 106745338031SThomas Richter .head = LIST_HEAD_INIT(cfset_session.head) 106845338031SThomas Richter }; 106945338031SThomas Richter 107045338031SThomas Richter struct cfset_request { /* CPUs and counter set bit mask */ 1071a029a4eaSThomas Richter unsigned long ctrset; /* Bit mask of counter set to read */ 1072a029a4eaSThomas Richter cpumask_t mask; /* CPU mask to read from */ 107345338031SThomas Richter struct list_head node; /* Chain to cfset_session.head */ 107445338031SThomas Richter }; 1075a029a4eaSThomas Richter 107645338031SThomas Richter static void cfset_session_init(void) 1077a029a4eaSThomas Richter { 107845338031SThomas Richter INIT_LIST_HEAD(&cfset_session.head); 107945338031SThomas Richter } 108045338031SThomas Richter 108145338031SThomas Richter /* Remove current request from global bookkeeping. Maintain a counter set bit 108245338031SThomas Richter * mask on a per CPU basis. 108345338031SThomas Richter * Done in process context under mutex protection. 108445338031SThomas Richter */ 108545338031SThomas Richter static void cfset_session_del(struct cfset_request *p) 108645338031SThomas Richter { 108745338031SThomas Richter list_del(&p->node); 108845338031SThomas Richter } 108945338031SThomas Richter 109045338031SThomas Richter /* Add current request to global bookkeeping. Maintain a counter set bit mask 109145338031SThomas Richter * on a per CPU basis. 109245338031SThomas Richter * Done in process context under mutex protection. 109345338031SThomas Richter */ 109445338031SThomas Richter static void cfset_session_add(struct cfset_request *p) 109545338031SThomas Richter { 109645338031SThomas Richter list_add(&p->node, &cfset_session.head); 1097a029a4eaSThomas Richter } 1098a029a4eaSThomas Richter 1099a029a4eaSThomas Richter /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 1100a029a4eaSThomas Richter * path is currently used. 1101a029a4eaSThomas Richter * The cpu_cf_events::dev_state is used to denote counter sets in use by this 1102a029a4eaSThomas Richter * interface. It is always or'ed in. If this interface is not active, its 1103a029a4eaSThomas Richter * value is zero and no additional counter sets will be included. 1104a029a4eaSThomas Richter * 1105a029a4eaSThomas Richter * The cpu_cf_events::state is used by the perf_event_open SVC and remains 1106a029a4eaSThomas Richter * unchanged. 1107a029a4eaSThomas Richter * 1108a029a4eaSThomas Richter * perf_pmu_enable() and perf_pmu_enable() and its call backs 1109a029a4eaSThomas Richter * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 1110a029a4eaSThomas Richter * performance measurement subsystem to enable per process 1111a029a4eaSThomas Richter * CPU Measurement counter facility. 1112a029a4eaSThomas Richter * The XXX_enable() and XXX_disable functions are used to turn off 1113a029a4eaSThomas Richter * x86 performance monitoring interrupt (PMI) during scheduling. 1114a029a4eaSThomas Richter * s390 uses these calls to temporarily stop and resume the active CPU 1115a029a4eaSThomas Richter * counters sets during scheduling. 1116a029a4eaSThomas Richter * 1117a029a4eaSThomas Richter * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 1118a029a4eaSThomas Richter * device access. The perf_event_open() SVC interface makes a lot of effort 1119a029a4eaSThomas Richter * to only run the counters while the calling process is actively scheduled 1120a029a4eaSThomas Richter * to run. 1121a029a4eaSThomas Richter * When /dev/hwctr interface is also used at the same time, the counter sets 1122a029a4eaSThomas Richter * will keep running, even when the process is scheduled off a CPU. 1123a029a4eaSThomas Richter * However this is not a problem and does not lead to wrong counter values 1124a029a4eaSThomas Richter * for the perf_event_open() SVC. The current counter value will be recorded 1125a029a4eaSThomas Richter * during schedule-in. At schedule-out time the current counter value is 1126a029a4eaSThomas Richter * extracted again and the delta is calculated and added to the event. 1127a029a4eaSThomas Richter */ 1128a029a4eaSThomas Richter /* Stop all counter sets via ioctl interface */ 1129a029a4eaSThomas Richter static void cfset_ioctl_off(void *parm) 1130a029a4eaSThomas Richter { 1131a029a4eaSThomas Richter struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1132a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1133a029a4eaSThomas Richter int rc; 1134a029a4eaSThomas Richter 113545338031SThomas Richter /* Check if any counter set used by /dev/hwc */ 1136a029a4eaSThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 113745338031SThomas Richter if ((p->sets & cpumf_ctr_ctl[rc])) { 113845338031SThomas Richter if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { 113945338031SThomas Richter ctr_set_disable(&cpuhw->dev_state, 114045338031SThomas Richter cpumf_ctr_ctl[rc]); 114145338031SThomas Richter ctr_set_stop(&cpuhw->dev_state, 114245338031SThomas Richter cpumf_ctr_ctl[rc]); 114345338031SThomas Richter } 114445338031SThomas Richter } 114545338031SThomas Richter /* Keep perf_event_open counter sets */ 114645338031SThomas Richter rc = lcctl(cpuhw->dev_state | cpuhw->state); 1147a029a4eaSThomas Richter if (rc) 1148a029a4eaSThomas Richter pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 1149a029a4eaSThomas Richter cpuhw->state, S390_HWCTR_DEVICE, rc); 115045338031SThomas Richter if (!cpuhw->dev_state) 1151a029a4eaSThomas Richter cpuhw->flags &= ~PMU_F_IN_USE; 1152a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 1153a029a4eaSThomas Richter __func__, rc, cpuhw->state, cpuhw->dev_state); 1154a029a4eaSThomas Richter } 1155a029a4eaSThomas Richter 1156a029a4eaSThomas Richter /* Start counter sets on particular CPU */ 1157a029a4eaSThomas Richter static void cfset_ioctl_on(void *parm) 1158a029a4eaSThomas Richter { 1159a029a4eaSThomas Richter struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1160a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1161a029a4eaSThomas Richter int rc; 1162a029a4eaSThomas Richter 1163a029a4eaSThomas Richter cpuhw->flags |= PMU_F_IN_USE; 1164a029a4eaSThomas Richter ctr_set_enable(&cpuhw->dev_state, p->sets); 1165a029a4eaSThomas Richter ctr_set_start(&cpuhw->dev_state, p->sets); 1166a029a4eaSThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1167a029a4eaSThomas Richter if ((p->sets & cpumf_ctr_ctl[rc])) 1168a029a4eaSThomas Richter atomic_inc(&cpuhw->ctr_set[rc]); 1169a029a4eaSThomas Richter rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 1170a029a4eaSThomas Richter if (!rc) 1171a029a4eaSThomas Richter atomic_inc(&p->cpus_ack); 1172a029a4eaSThomas Richter else 1173a029a4eaSThomas Richter pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 1174a029a4eaSThomas Richter cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 1175a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 1176a029a4eaSThomas Richter __func__, rc, cpuhw->state, cpuhw->dev_state); 1177a029a4eaSThomas Richter } 1178a029a4eaSThomas Richter 1179a029a4eaSThomas Richter static void cfset_release_cpu(void *p) 1180a029a4eaSThomas Richter { 1181a029a4eaSThomas Richter struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1182a029a4eaSThomas Richter int rc; 1183a029a4eaSThomas Richter 1184a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", 1185a029a4eaSThomas Richter __func__, cpuhw->state, cpuhw->dev_state); 118645338031SThomas Richter cpuhw->dev_state = 0; 1187a029a4eaSThomas Richter rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 1188a029a4eaSThomas Richter if (rc) 1189a029a4eaSThomas Richter pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 1190a029a4eaSThomas Richter cpuhw->state, S390_HWCTR_DEVICE, rc); 119145338031SThomas Richter } 119245338031SThomas Richter 119345338031SThomas Richter /* This modifies the process CPU mask to adopt it to the currently online 119445338031SThomas Richter * CPUs. Offline CPUs can not be addresses. This call terminates the access 119545338031SThomas Richter * and is usually followed by close() or a new iotcl(..., START, ...) which 119645338031SThomas Richter * creates a new request structure. 119745338031SThomas Richter */ 119845338031SThomas Richter static void cfset_all_stop(struct cfset_request *req) 119945338031SThomas Richter { 120045338031SThomas Richter struct cfset_call_on_cpu_parm p = { 120145338031SThomas Richter .sets = req->ctrset, 120245338031SThomas Richter }; 120345338031SThomas Richter 120445338031SThomas Richter cpumask_and(&req->mask, &req->mask, cpu_online_mask); 120545338031SThomas Richter on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); 1206a029a4eaSThomas Richter } 1207a029a4eaSThomas Richter 1208a029a4eaSThomas Richter /* Release function is also called when application gets terminated without 1209a029a4eaSThomas Richter * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 1210a029a4eaSThomas Richter */ 1211a029a4eaSThomas Richter static int cfset_release(struct inode *inode, struct file *file) 1212a029a4eaSThomas Richter { 121345338031SThomas Richter mutex_lock(&cfset_ctrset_mutex); 121445338031SThomas Richter /* Open followed by close/exit has no private_data */ 121545338031SThomas Richter if (file->private_data) { 121645338031SThomas Richter cfset_all_stop(file->private_data); 121745338031SThomas Richter cfset_session_del(file->private_data); 121845338031SThomas Richter kfree(file->private_data); 121945338031SThomas Richter file->private_data = NULL; 122045338031SThomas Richter } 122145338031SThomas Richter if (!atomic_dec_return(&cfset_opencnt)) 1222a029a4eaSThomas Richter on_each_cpu(cfset_release_cpu, NULL, 1); 122345338031SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 122445338031SThomas Richter 1225a029a4eaSThomas Richter hw_perf_event_destroy(NULL); 1226a029a4eaSThomas Richter return 0; 1227a029a4eaSThomas Richter } 1228a029a4eaSThomas Richter 1229a029a4eaSThomas Richter static int cfset_open(struct inode *inode, struct file *file) 1230a029a4eaSThomas Richter { 1231a029a4eaSThomas Richter if (!capable(CAP_SYS_ADMIN)) 1232a029a4eaSThomas Richter return -EPERM; 123345338031SThomas Richter mutex_lock(&cfset_ctrset_mutex); 123445338031SThomas Richter if (atomic_inc_return(&cfset_opencnt) == 1) 123545338031SThomas Richter cfset_session_init(); 123645338031SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1237a029a4eaSThomas Richter 1238a029a4eaSThomas Richter cpumf_hw_inuse(); 1239a029a4eaSThomas Richter file->private_data = NULL; 1240a029a4eaSThomas Richter /* nonseekable_open() never fails */ 1241a029a4eaSThomas Richter return nonseekable_open(inode, file); 1242a029a4eaSThomas Richter } 1243a029a4eaSThomas Richter 124445338031SThomas Richter static int cfset_all_start(struct cfset_request *req) 1245a029a4eaSThomas Richter { 1246a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p = { 124745338031SThomas Richter .sets = req->ctrset, 1248a029a4eaSThomas Richter .cpus_ack = ATOMIC_INIT(0), 1249a029a4eaSThomas Richter }; 1250a029a4eaSThomas Richter cpumask_var_t mask; 1251a029a4eaSThomas Richter int rc = 0; 1252a029a4eaSThomas Richter 1253a029a4eaSThomas Richter if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1254a029a4eaSThomas Richter return -ENOMEM; 125545338031SThomas Richter cpumask_and(mask, &req->mask, cpu_online_mask); 1256a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 1257a029a4eaSThomas Richter if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 1258a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 1259a029a4eaSThomas Richter rc = -EIO; 1260a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); 1261a029a4eaSThomas Richter } 1262a029a4eaSThomas Richter free_cpumask_var(mask); 1263a029a4eaSThomas Richter return rc; 1264a029a4eaSThomas Richter } 1265a029a4eaSThomas Richter 1266a029a4eaSThomas Richter /* Return the maximum required space for all possible CPUs in case one 1267a029a4eaSThomas Richter * CPU will be onlined during the START, READ, STOP cycles. 1268a029a4eaSThomas Richter * To find out the size of the counter sets, any one CPU will do. They 1269a029a4eaSThomas Richter * all have the same counter sets. 1270a029a4eaSThomas Richter */ 1271a029a4eaSThomas Richter static size_t cfset_needspace(unsigned int sets) 1272a029a4eaSThomas Richter { 1273a029a4eaSThomas Richter size_t bytes = 0; 1274a029a4eaSThomas Richter int i; 1275a029a4eaSThomas Richter 1276a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1277a029a4eaSThomas Richter if (!(sets & cpumf_ctr_ctl[i])) 1278a029a4eaSThomas Richter continue; 1279*46c4d945SThomas Richter bytes += cpum_cf_read_setsize(i) * sizeof(u64) + 1280a029a4eaSThomas Richter sizeof(((struct s390_ctrset_setdata *)0)->set) + 1281a029a4eaSThomas Richter sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 1282a029a4eaSThomas Richter } 1283a029a4eaSThomas Richter bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 1284a029a4eaSThomas Richter (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 1285a029a4eaSThomas Richter sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 1286a029a4eaSThomas Richter return bytes; 1287a029a4eaSThomas Richter } 1288a029a4eaSThomas Richter 1289a029a4eaSThomas Richter static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 1290a029a4eaSThomas Richter { 1291a029a4eaSThomas Richter struct s390_ctrset_read __user *ctrset_read; 12923cdf0269SThomas Richter unsigned int cpu, cpus, rc = 0; 1293a029a4eaSThomas Richter void __user *uptr; 1294a029a4eaSThomas Richter 1295a029a4eaSThomas Richter ctrset_read = (struct s390_ctrset_read __user *)arg; 1296a029a4eaSThomas Richter uptr = ctrset_read->data; 1297a029a4eaSThomas Richter for_each_cpu(cpu, mask) { 1298a029a4eaSThomas Richter struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); 1299a029a4eaSThomas Richter struct s390_ctrset_cpudata __user *ctrset_cpudata; 1300a029a4eaSThomas Richter 1301a029a4eaSThomas Richter ctrset_cpudata = uptr; 1302a029a4eaSThomas Richter rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 1303a029a4eaSThomas Richter rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 1304a029a4eaSThomas Richter rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 1305a029a4eaSThomas Richter cpuhw->used); 13063cdf0269SThomas Richter if (rc) { 13073cdf0269SThomas Richter rc = -EFAULT; 13083cdf0269SThomas Richter goto out; 13093cdf0269SThomas Richter } 1310a029a4eaSThomas Richter uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 1311a029a4eaSThomas Richter cond_resched(); 1312a029a4eaSThomas Richter } 1313a029a4eaSThomas Richter cpus = cpumask_weight(mask); 1314a029a4eaSThomas Richter if (put_user(cpus, &ctrset_read->no_cpus)) 13153cdf0269SThomas Richter rc = -EFAULT; 13163cdf0269SThomas Richter out: 13173cdf0269SThomas Richter debug_sprintf_event(cf_dbg, 4, "%s rc %d copied %ld\n", __func__, rc, 1318a029a4eaSThomas Richter uptr - (void __user *)ctrset_read->data); 13193cdf0269SThomas Richter return rc; 1320a029a4eaSThomas Richter } 1321a029a4eaSThomas Richter 1322a029a4eaSThomas Richter static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 1323a029a4eaSThomas Richter int ctrset_size, size_t room) 1324a029a4eaSThomas Richter { 1325a029a4eaSThomas Richter size_t need = 0; 1326a029a4eaSThomas Richter int rc = -1; 1327a029a4eaSThomas Richter 1328a029a4eaSThomas Richter need = sizeof(*p) + sizeof(u64) * ctrset_size; 1329a029a4eaSThomas Richter if (need <= room) { 1330a029a4eaSThomas Richter p->set = cpumf_ctr_ctl[ctrset]; 1331a029a4eaSThomas Richter p->no_cnts = ctrset_size; 1332a029a4eaSThomas Richter rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1333a029a4eaSThomas Richter if (rc == 3) /* Nothing stored */ 1334a029a4eaSThomas Richter need = 0; 1335a029a4eaSThomas Richter } 1336a029a4eaSThomas Richter return need; 1337a029a4eaSThomas Richter } 1338a029a4eaSThomas Richter 1339a029a4eaSThomas Richter /* Read all counter sets. */ 1340a029a4eaSThomas Richter static void cfset_cpu_read(void *parm) 1341a029a4eaSThomas Richter { 1342a029a4eaSThomas Richter struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1343a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1344a029a4eaSThomas Richter int set, set_size; 1345a029a4eaSThomas Richter size_t space; 1346a029a4eaSThomas Richter 1347a029a4eaSThomas Richter /* No data saved yet */ 1348a029a4eaSThomas Richter cpuhw->used = 0; 1349a029a4eaSThomas Richter cpuhw->sets = 0; 1350a029a4eaSThomas Richter memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1351a029a4eaSThomas Richter 1352a029a4eaSThomas Richter /* Scan the counter sets */ 1353a029a4eaSThomas Richter for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1354a029a4eaSThomas Richter struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1355a029a4eaSThomas Richter cpuhw->used; 1356a029a4eaSThomas Richter 1357a029a4eaSThomas Richter if (!(p->sets & cpumf_ctr_ctl[set])) 1358a029a4eaSThomas Richter continue; /* Counter set not in list */ 1359*46c4d945SThomas Richter set_size = cpum_cf_read_setsize(set); 1360a029a4eaSThomas Richter space = sizeof(cpuhw->data) - cpuhw->used; 1361a029a4eaSThomas Richter space = cfset_cpuset_read(sp, set, set_size, space); 1362a029a4eaSThomas Richter if (space) { 1363a029a4eaSThomas Richter cpuhw->used += space; 1364a029a4eaSThomas Richter cpuhw->sets += 1; 1365a029a4eaSThomas Richter } 1366a029a4eaSThomas Richter } 1367a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, 1368a029a4eaSThomas Richter cpuhw->sets, cpuhw->used); 1369a029a4eaSThomas Richter } 1370a029a4eaSThomas Richter 137145338031SThomas Richter static int cfset_all_read(unsigned long arg, struct cfset_request *req) 1372a029a4eaSThomas Richter { 1373a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 1374a029a4eaSThomas Richter cpumask_var_t mask; 1375a029a4eaSThomas Richter int rc; 1376a029a4eaSThomas Richter 1377a029a4eaSThomas Richter if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1378a029a4eaSThomas Richter return -ENOMEM; 1379a029a4eaSThomas Richter 138045338031SThomas Richter p.sets = req->ctrset; 138145338031SThomas Richter cpumask_and(mask, &req->mask, cpu_online_mask); 1382a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1383a029a4eaSThomas Richter rc = cfset_all_copy(arg, mask); 1384a029a4eaSThomas Richter free_cpumask_var(mask); 1385a029a4eaSThomas Richter return rc; 1386a029a4eaSThomas Richter } 1387a029a4eaSThomas Richter 138845338031SThomas Richter static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) 1389a029a4eaSThomas Richter { 139045338031SThomas Richter int ret = -ENODATA; 1391a029a4eaSThomas Richter 139226300860SThomas Richter if (req && req->ctrset) 139345338031SThomas Richter ret = cfset_all_read(arg, req); 1394a029a4eaSThomas Richter return ret; 1395a029a4eaSThomas Richter } 1396a029a4eaSThomas Richter 139745338031SThomas Richter static long cfset_ioctl_stop(struct file *file) 1398a029a4eaSThomas Richter { 139945338031SThomas Richter struct cfset_request *req = file->private_data; 140045338031SThomas Richter int ret = -ENXIO; 1401a029a4eaSThomas Richter 140245338031SThomas Richter if (req) { 140345338031SThomas Richter cfset_all_stop(req); 140445338031SThomas Richter cfset_session_del(req); 140545338031SThomas Richter kfree(req); 140645338031SThomas Richter file->private_data = NULL; 140745338031SThomas Richter ret = 0; 1408a029a4eaSThomas Richter } 1409a029a4eaSThomas Richter return ret; 1410a029a4eaSThomas Richter } 1411a029a4eaSThomas Richter 141245338031SThomas Richter static long cfset_ioctl_start(unsigned long arg, struct file *file) 1413a029a4eaSThomas Richter { 1414a029a4eaSThomas Richter struct s390_ctrset_start __user *ustart; 1415a029a4eaSThomas Richter struct s390_ctrset_start start; 141645338031SThomas Richter struct cfset_request *preq; 1417a029a4eaSThomas Richter void __user *umask; 1418a029a4eaSThomas Richter unsigned int len; 1419a029a4eaSThomas Richter int ret = 0; 1420a029a4eaSThomas Richter size_t need; 1421a029a4eaSThomas Richter 142245338031SThomas Richter if (file->private_data) 1423a029a4eaSThomas Richter return -EBUSY; 1424a029a4eaSThomas Richter ustart = (struct s390_ctrset_start __user *)arg; 1425a029a4eaSThomas Richter if (copy_from_user(&start, ustart, sizeof(start))) 1426a029a4eaSThomas Richter return -EFAULT; 1427a029a4eaSThomas Richter if (start.version != S390_HWCTR_START_VERSION) 1428a029a4eaSThomas Richter return -EINVAL; 1429a029a4eaSThomas Richter if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1430a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1431a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1432a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1433a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1434a029a4eaSThomas Richter return -EINVAL; /* Invalid counter set */ 1435a029a4eaSThomas Richter if (!start.counter_sets) 1436a029a4eaSThomas Richter return -EINVAL; /* No counter set at all? */ 143745338031SThomas Richter 143845338031SThomas Richter preq = kzalloc(sizeof(*preq), GFP_KERNEL); 143945338031SThomas Richter if (!preq) 144045338031SThomas Richter return -ENOMEM; 144145338031SThomas Richter cpumask_clear(&preq->mask); 1442a029a4eaSThomas Richter len = min_t(u64, start.cpumask_len, cpumask_size()); 1443a029a4eaSThomas Richter umask = (void __user *)start.cpumask; 144445338031SThomas Richter if (copy_from_user(&preq->mask, umask, len)) { 144545338031SThomas Richter kfree(preq); 1446a029a4eaSThomas Richter return -EFAULT; 144745338031SThomas Richter } 144845338031SThomas Richter if (cpumask_empty(&preq->mask)) { 144945338031SThomas Richter kfree(preq); 1450a029a4eaSThomas Richter return -EINVAL; 145145338031SThomas Richter } 1452a029a4eaSThomas Richter need = cfset_needspace(start.counter_sets); 145345338031SThomas Richter if (put_user(need, &ustart->data_bytes)) { 145445338031SThomas Richter kfree(preq); 145545338031SThomas Richter return -EFAULT; 145645338031SThomas Richter } 145745338031SThomas Richter preq->ctrset = start.counter_sets; 145845338031SThomas Richter ret = cfset_all_start(preq); 145945338031SThomas Richter if (!ret) { 146045338031SThomas Richter cfset_session_add(preq); 146145338031SThomas Richter file->private_data = preq; 146245338031SThomas Richter debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n", 146345338031SThomas Richter __func__, preq->ctrset, need, ret); 146445338031SThomas Richter } else { 146545338031SThomas Richter kfree(preq); 146645338031SThomas Richter } 1467a029a4eaSThomas Richter return ret; 1468a029a4eaSThomas Richter } 1469a029a4eaSThomas Richter 1470a029a4eaSThomas Richter /* Entry point to the /dev/hwctr device interface. 1471a029a4eaSThomas Richter * The ioctl system call supports three subcommands: 1472a029a4eaSThomas Richter * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1473a029a4eaSThomas Richter * counter set keeps running until explicitly stopped. Returns the number 1474a029a4eaSThomas Richter * of bytes needed to store the counter values. If another S390_HWCTR_START 1475a029a4eaSThomas Richter * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 147645338031SThomas Richter * command on the same file descriptor, -EBUSY is returned. 1477a029a4eaSThomas Richter * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1478a029a4eaSThomas Richter * with the S390_HWCTR_START command. 1479a029a4eaSThomas Richter * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1480a029a4eaSThomas Richter * previous S390_HWCTR_START subcommand. 1481a029a4eaSThomas Richter */ 1482a029a4eaSThomas Richter static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1483a029a4eaSThomas Richter { 1484a029a4eaSThomas Richter int ret; 1485a029a4eaSThomas Richter 1486a73de293SSebastian Andrzej Siewior cpus_read_lock(); 1487a029a4eaSThomas Richter mutex_lock(&cfset_ctrset_mutex); 1488a029a4eaSThomas Richter switch (cmd) { 1489a029a4eaSThomas Richter case S390_HWCTR_START: 149045338031SThomas Richter ret = cfset_ioctl_start(arg, file); 1491a029a4eaSThomas Richter break; 1492a029a4eaSThomas Richter case S390_HWCTR_STOP: 149345338031SThomas Richter ret = cfset_ioctl_stop(file); 1494a029a4eaSThomas Richter break; 1495a029a4eaSThomas Richter case S390_HWCTR_READ: 149645338031SThomas Richter ret = cfset_ioctl_read(arg, file->private_data); 1497a029a4eaSThomas Richter break; 1498a029a4eaSThomas Richter default: 1499a029a4eaSThomas Richter ret = -ENOTTY; 1500a029a4eaSThomas Richter break; 1501a029a4eaSThomas Richter } 1502a029a4eaSThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1503a73de293SSebastian Andrzej Siewior cpus_read_unlock(); 1504a029a4eaSThomas Richter return ret; 1505a029a4eaSThomas Richter } 1506a029a4eaSThomas Richter 1507a029a4eaSThomas Richter static const struct file_operations cfset_fops = { 1508a029a4eaSThomas Richter .owner = THIS_MODULE, 1509a029a4eaSThomas Richter .open = cfset_open, 1510a029a4eaSThomas Richter .release = cfset_release, 1511a029a4eaSThomas Richter .unlocked_ioctl = cfset_ioctl, 1512a029a4eaSThomas Richter .compat_ioctl = cfset_ioctl, 1513a029a4eaSThomas Richter .llseek = no_llseek 1514a029a4eaSThomas Richter }; 1515a029a4eaSThomas Richter 1516a029a4eaSThomas Richter static struct miscdevice cfset_dev = { 1517a029a4eaSThomas Richter .name = S390_HWCTR_DEVICE, 1518a029a4eaSThomas Richter .minor = MISC_DYNAMIC_MINOR, 1519a029a4eaSThomas Richter .fops = &cfset_fops, 1520a029a4eaSThomas Richter }; 1521a029a4eaSThomas Richter 152245338031SThomas Richter /* Hotplug add of a CPU. Scan through all active processes and add 152345338031SThomas Richter * that CPU to the list of CPUs supplied with ioctl(..., START, ...). 152445338031SThomas Richter */ 15251e99c242SThomas Richter static int cfset_online_cpu(unsigned int cpu) 1526a029a4eaSThomas Richter { 1527a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 152845338031SThomas Richter struct cfset_request *rp; 1529a029a4eaSThomas Richter 1530a029a4eaSThomas Richter mutex_lock(&cfset_ctrset_mutex); 153145338031SThomas Richter if (!list_empty(&cfset_session.head)) { 153245338031SThomas Richter list_for_each_entry(rp, &cfset_session.head, node) { 153345338031SThomas Richter p.sets = rp->ctrset; 1534a029a4eaSThomas Richter cfset_ioctl_on(&p); 153545338031SThomas Richter cpumask_set_cpu(cpu, &rp->mask); 153645338031SThomas Richter } 1537a029a4eaSThomas Richter } 1538a029a4eaSThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1539a029a4eaSThomas Richter return 0; 1540a029a4eaSThomas Richter } 1541a029a4eaSThomas Richter 154245338031SThomas Richter /* Hotplug remove of a CPU. Scan through all active processes and clear 154345338031SThomas Richter * that CPU from the list of CPUs supplied with ioctl(..., START, ...). 154445338031SThomas Richter */ 15451e99c242SThomas Richter static int cfset_offline_cpu(unsigned int cpu) 1546a029a4eaSThomas Richter { 1547a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 154845338031SThomas Richter struct cfset_request *rp; 1549a029a4eaSThomas Richter 1550a029a4eaSThomas Richter mutex_lock(&cfset_ctrset_mutex); 155145338031SThomas Richter if (!list_empty(&cfset_session.head)) { 155245338031SThomas Richter list_for_each_entry(rp, &cfset_session.head, node) { 155345338031SThomas Richter p.sets = rp->ctrset; 1554a029a4eaSThomas Richter cfset_ioctl_off(&p); 155545338031SThomas Richter cpumask_clear_cpu(cpu, &rp->mask); 155645338031SThomas Richter } 1557a029a4eaSThomas Richter } 1558a029a4eaSThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1559a029a4eaSThomas Richter return 0; 1560a029a4eaSThomas Richter } 1561a029a4eaSThomas Richter 1562a029a4eaSThomas Richter static void cfdiag_read(struct perf_event *event) 1563a029a4eaSThomas Richter { 1564a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, 1565a029a4eaSThomas Richter event->attr.config, local64_read(&event->count)); 1566a029a4eaSThomas Richter } 1567a029a4eaSThomas Richter 1568a029a4eaSThomas Richter static int get_authctrsets(void) 1569a029a4eaSThomas Richter { 1570a029a4eaSThomas Richter unsigned long auth = 0; 1571a029a4eaSThomas Richter enum cpumf_ctr_set i; 1572a029a4eaSThomas Richter 1573a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1574*46c4d945SThomas Richter if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) 1575a029a4eaSThomas Richter auth |= cpumf_ctr_ctl[i]; 1576a029a4eaSThomas Richter } 1577a029a4eaSThomas Richter return auth; 1578a029a4eaSThomas Richter } 1579a029a4eaSThomas Richter 1580a029a4eaSThomas Richter /* Setup the event. Test for authorized counter sets and only include counter 1581a029a4eaSThomas Richter * sets which are authorized at the time of the setup. Including unauthorized 1582a029a4eaSThomas Richter * counter sets result in specification exception (and panic). 1583a029a4eaSThomas Richter */ 1584a029a4eaSThomas Richter static int cfdiag_event_init2(struct perf_event *event) 1585a029a4eaSThomas Richter { 1586a029a4eaSThomas Richter struct perf_event_attr *attr = &event->attr; 1587a029a4eaSThomas Richter int err = 0; 1588a029a4eaSThomas Richter 1589a029a4eaSThomas Richter /* Set sample_period to indicate sampling */ 1590a029a4eaSThomas Richter event->hw.config = attr->config; 1591a029a4eaSThomas Richter event->hw.sample_period = attr->sample_period; 1592a029a4eaSThomas Richter local64_set(&event->hw.period_left, event->hw.sample_period); 1593a029a4eaSThomas Richter local64_set(&event->count, 0); 1594a029a4eaSThomas Richter event->hw.last_period = event->hw.sample_period; 1595a029a4eaSThomas Richter 1596a029a4eaSThomas Richter /* Add all authorized counter sets to config_base. The 1597a029a4eaSThomas Richter * the hardware init function is either called per-cpu or just once 1598a029a4eaSThomas Richter * for all CPUS (event->cpu == -1). This depends on the whether 1599a029a4eaSThomas Richter * counting is started for all CPUs or on a per workload base where 1600a029a4eaSThomas Richter * the perf event moves from one CPU to another CPU. 1601a029a4eaSThomas Richter * Checking the authorization on any CPU is fine as the hardware 1602a029a4eaSThomas Richter * applies the same authorization settings to all CPUs. 1603a029a4eaSThomas Richter */ 1604a029a4eaSThomas Richter event->hw.config_base = get_authctrsets(); 1605a029a4eaSThomas Richter 1606a029a4eaSThomas Richter /* No authorized counter sets, nothing to count/sample */ 1607a029a4eaSThomas Richter if (!event->hw.config_base) 1608a029a4eaSThomas Richter err = -EINVAL; 1609a029a4eaSThomas Richter 1610a029a4eaSThomas Richter debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", 1611a029a4eaSThomas Richter __func__, err, event->hw.config_base); 1612a029a4eaSThomas Richter return err; 1613a029a4eaSThomas Richter } 1614a029a4eaSThomas Richter 1615a029a4eaSThomas Richter static int cfdiag_event_init(struct perf_event *event) 1616a029a4eaSThomas Richter { 1617a029a4eaSThomas Richter struct perf_event_attr *attr = &event->attr; 1618a029a4eaSThomas Richter int err = -ENOENT; 1619a029a4eaSThomas Richter 1620a029a4eaSThomas Richter if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1621a029a4eaSThomas Richter event->attr.type != event->pmu->type) 1622a029a4eaSThomas Richter goto out; 1623a029a4eaSThomas Richter 1624a029a4eaSThomas Richter /* Raw events are used to access counters directly, 1625a029a4eaSThomas Richter * hence do not permit excludes. 1626a029a4eaSThomas Richter * This event is useless without PERF_SAMPLE_RAW to return counter set 1627a029a4eaSThomas Richter * values as raw data. 1628a029a4eaSThomas Richter */ 1629a029a4eaSThomas Richter if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1630a029a4eaSThomas Richter !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1631a029a4eaSThomas Richter err = -EOPNOTSUPP; 1632a029a4eaSThomas Richter goto out; 1633a029a4eaSThomas Richter } 1634a029a4eaSThomas Richter 1635a029a4eaSThomas Richter /* Initialize for using the CPU-measurement counter facility */ 1636a029a4eaSThomas Richter cpumf_hw_inuse(); 1637a029a4eaSThomas Richter event->destroy = hw_perf_event_destroy; 1638a029a4eaSThomas Richter 1639a029a4eaSThomas Richter err = cfdiag_event_init2(event); 1640a029a4eaSThomas Richter if (unlikely(err)) 1641a029a4eaSThomas Richter event->destroy(event); 1642a029a4eaSThomas Richter out: 1643a029a4eaSThomas Richter return err; 1644a029a4eaSThomas Richter } 1645a029a4eaSThomas Richter 1646a029a4eaSThomas Richter /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1647a029a4eaSThomas Richter * to collect the complete counter sets for a scheduled process. Target 1648a029a4eaSThomas Richter * are complete counter sets attached as raw data to the artificial event. 1649a029a4eaSThomas Richter * This results in complete counter sets available when a process is 1650a029a4eaSThomas Richter * scheduled. Contains the delta of every counter while the process was 1651a029a4eaSThomas Richter * running. 1652a029a4eaSThomas Richter */ 1653a029a4eaSThomas Richter CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1654a029a4eaSThomas Richter 1655a029a4eaSThomas Richter static struct attribute *cfdiag_events_attr[] = { 1656a029a4eaSThomas Richter CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1657a029a4eaSThomas Richter NULL, 1658a029a4eaSThomas Richter }; 1659a029a4eaSThomas Richter 1660a029a4eaSThomas Richter PMU_FORMAT_ATTR(event, "config:0-63"); 1661a029a4eaSThomas Richter 1662a029a4eaSThomas Richter static struct attribute *cfdiag_format_attr[] = { 1663a029a4eaSThomas Richter &format_attr_event.attr, 1664a029a4eaSThomas Richter NULL, 1665a029a4eaSThomas Richter }; 1666a029a4eaSThomas Richter 1667a029a4eaSThomas Richter static struct attribute_group cfdiag_events_group = { 1668a029a4eaSThomas Richter .name = "events", 1669a029a4eaSThomas Richter .attrs = cfdiag_events_attr, 1670a029a4eaSThomas Richter }; 1671a029a4eaSThomas Richter static struct attribute_group cfdiag_format_group = { 1672a029a4eaSThomas Richter .name = "format", 1673a029a4eaSThomas Richter .attrs = cfdiag_format_attr, 1674a029a4eaSThomas Richter }; 1675a029a4eaSThomas Richter static const struct attribute_group *cfdiag_attr_groups[] = { 1676a029a4eaSThomas Richter &cfdiag_events_group, 1677a029a4eaSThomas Richter &cfdiag_format_group, 1678a029a4eaSThomas Richter NULL, 1679a029a4eaSThomas Richter }; 1680a029a4eaSThomas Richter 1681a029a4eaSThomas Richter /* Performance monitoring unit for event CF_DIAG. Since this event 1682a029a4eaSThomas Richter * is also started and stopped via the perf_event_open() system call, use 1683a029a4eaSThomas Richter * the same event enable/disable call back functions. They do not 1684a029a4eaSThomas Richter * have a pointer to the perf_event strcture as first parameter. 1685a029a4eaSThomas Richter * 1686a029a4eaSThomas Richter * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1687a029a4eaSThomas Richter * Reuse them and distinguish the event (always first parameter) via 1688a029a4eaSThomas Richter * 'config' member. 1689a029a4eaSThomas Richter */ 1690a029a4eaSThomas Richter static struct pmu cf_diag = { 1691a029a4eaSThomas Richter .task_ctx_nr = perf_sw_context, 1692a029a4eaSThomas Richter .event_init = cfdiag_event_init, 1693a029a4eaSThomas Richter .pmu_enable = cpumf_pmu_enable, 1694a029a4eaSThomas Richter .pmu_disable = cpumf_pmu_disable, 1695a029a4eaSThomas Richter .add = cpumf_pmu_add, 1696a029a4eaSThomas Richter .del = cpumf_pmu_del, 1697a029a4eaSThomas Richter .start = cpumf_pmu_start, 1698a029a4eaSThomas Richter .stop = cpumf_pmu_stop, 1699a029a4eaSThomas Richter .read = cfdiag_read, 1700a029a4eaSThomas Richter 1701a029a4eaSThomas Richter .attr_groups = cfdiag_attr_groups 1702a029a4eaSThomas Richter }; 1703a029a4eaSThomas Richter 1704a029a4eaSThomas Richter /* Calculate memory needed to store all counter sets together with header and 1705a029a4eaSThomas Richter * trailer data. This is independent of the counter set authorization which 1706a029a4eaSThomas Richter * can vary depending on the configuration. 1707a029a4eaSThomas Richter */ 1708a029a4eaSThomas Richter static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1709a029a4eaSThomas Richter { 1710a029a4eaSThomas Richter size_t max_size = sizeof(struct cf_trailer_entry); 1711a029a4eaSThomas Richter enum cpumf_ctr_set i; 1712a029a4eaSThomas Richter 1713a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1714*46c4d945SThomas Richter size_t size = cpum_cf_read_setsize(i); 1715a029a4eaSThomas Richter 1716a029a4eaSThomas Richter if (size) 1717a029a4eaSThomas Richter max_size += size * sizeof(u64) + 1718a029a4eaSThomas Richter sizeof(struct cf_ctrset_entry); 1719a029a4eaSThomas Richter } 1720a029a4eaSThomas Richter return max_size; 1721a029a4eaSThomas Richter } 1722a029a4eaSThomas Richter 1723a029a4eaSThomas Richter /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1724a029a4eaSThomas Richter static void cfdiag_get_cpu_speed(void) 1725a029a4eaSThomas Richter { 17264efd417fSVasily Gorbik unsigned long mhz; 17274efd417fSVasily Gorbik 1728a029a4eaSThomas Richter if (cpum_sf_avail()) { /* Sampling facility first */ 1729a029a4eaSThomas Richter struct hws_qsi_info_block si; 1730a029a4eaSThomas Richter 1731a029a4eaSThomas Richter memset(&si, 0, sizeof(si)); 1732a029a4eaSThomas Richter if (!qsi(&si)) { 1733a029a4eaSThomas Richter cfdiag_cpu_speed = si.cpu_speed; 1734a029a4eaSThomas Richter return; 1735a029a4eaSThomas Richter } 1736a029a4eaSThomas Richter } 1737a029a4eaSThomas Richter 1738a029a4eaSThomas Richter /* Fallback: CPU speed extract static part. Used in case 1739a029a4eaSThomas Richter * CPU Measurement Sampling Facility is turned off. 1740a029a4eaSThomas Richter */ 17414efd417fSVasily Gorbik mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1742a029a4eaSThomas Richter if (mhz != -1UL) 1743a029a4eaSThomas Richter cfdiag_cpu_speed = mhz & 0xffffffff; 1744a029a4eaSThomas Richter } 1745a029a4eaSThomas Richter 1746a029a4eaSThomas Richter static int cfset_init(void) 1747a029a4eaSThomas Richter { 1748a029a4eaSThomas Richter size_t need; 1749a029a4eaSThomas Richter int rc; 1750a029a4eaSThomas Richter 1751a029a4eaSThomas Richter cfdiag_get_cpu_speed(); 1752a029a4eaSThomas Richter /* Make sure the counter set data fits into predefined buffer. */ 1753*46c4d945SThomas Richter need = cfdiag_maxsize(&cpumf_ctr_info); 1754a029a4eaSThomas Richter if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1755a029a4eaSThomas Richter pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1756a029a4eaSThomas Richter need); 1757a029a4eaSThomas Richter return -ENOMEM; 1758a029a4eaSThomas Richter } 1759a029a4eaSThomas Richter 1760a029a4eaSThomas Richter rc = misc_register(&cfset_dev); 1761a029a4eaSThomas Richter if (rc) { 1762a029a4eaSThomas Richter pr_err("Registration of /dev/%s failed rc=%i\n", 1763a029a4eaSThomas Richter cfset_dev.name, rc); 1764a029a4eaSThomas Richter goto out; 1765a029a4eaSThomas Richter } 1766a029a4eaSThomas Richter 1767a029a4eaSThomas Richter rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1768a029a4eaSThomas Richter if (rc) { 1769a029a4eaSThomas Richter misc_deregister(&cfset_dev); 1770a029a4eaSThomas Richter pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1771a029a4eaSThomas Richter rc); 1772a029a4eaSThomas Richter } 1773a029a4eaSThomas Richter out: 1774a029a4eaSThomas Richter return rc; 1775a029a4eaSThomas Richter } 1776a029a4eaSThomas Richter 1777a029a4eaSThomas Richter device_initcall(cpumf_pmu_init); 1778