1a17ae4c3SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2212188a5SHendrik Brueckner /* 3212188a5SHendrik Brueckner * Performance event support for s390x - CPU-measurement Counter Facility 4212188a5SHendrik Brueckner * 51e99c242SThomas Richter * Copyright IBM Corp. 2012, 2023 646a984ffSThomas Richter * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7a029a4eaSThomas Richter * Thomas Richter <tmricht@linux.ibm.com> 8212188a5SHendrik Brueckner */ 9212188a5SHendrik Brueckner #define KMSG_COMPONENT "cpum_cf" 10212188a5SHendrik Brueckner #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11212188a5SHendrik Brueckner 12212188a5SHendrik Brueckner #include <linux/kernel.h> 13212188a5SHendrik Brueckner #include <linux/kernel_stat.h> 14212188a5SHendrik Brueckner #include <linux/percpu.h> 15212188a5SHendrik Brueckner #include <linux/notifier.h> 16212188a5SHendrik Brueckner #include <linux/init.h> 17212188a5SHendrik Brueckner #include <linux/export.h> 18a029a4eaSThomas Richter #include <linux/miscdevice.h> 191e99c242SThomas Richter #include <linux/perf_event.h> 20a029a4eaSThomas Richter 211e99c242SThomas Richter #include <asm/cpu_mf.h> 22a029a4eaSThomas Richter #include <asm/hwctrset.h> 23a029a4eaSThomas Richter #include <asm/debug.h> 24a029a4eaSThomas Richter 251e99c242SThomas Richter enum cpumf_ctr_set { 261e99c242SThomas Richter CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ 271e99c242SThomas Richter CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ 281e99c242SThomas Richter CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ 291e99c242SThomas Richter CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ 301e99c242SThomas Richter CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ 311e99c242SThomas Richter 321e99c242SThomas Richter /* Maximum number of counter sets */ 331e99c242SThomas Richter CPUMF_CTR_SET_MAX, 341e99c242SThomas Richter }; 351e99c242SThomas Richter 361e99c242SThomas Richter #define CPUMF_LCCTL_ENABLE_SHIFT 16 371e99c242SThomas Richter #define CPUMF_LCCTL_ACTCTL_SHIFT 0 381e99c242SThomas Richter 391e99c242SThomas Richter static inline void ctr_set_enable(u64 *state, u64 ctrsets) 401e99c242SThomas Richter { 411e99c242SThomas Richter *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; 421e99c242SThomas Richter } 431e99c242SThomas Richter 441e99c242SThomas Richter static inline void ctr_set_disable(u64 *state, u64 ctrsets) 451e99c242SThomas Richter { 461e99c242SThomas Richter *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); 471e99c242SThomas Richter } 481e99c242SThomas Richter 491e99c242SThomas Richter static inline void ctr_set_start(u64 *state, u64 ctrsets) 501e99c242SThomas Richter { 511e99c242SThomas Richter *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; 521e99c242SThomas Richter } 531e99c242SThomas Richter 541e99c242SThomas Richter static inline void ctr_set_stop(u64 *state, u64 ctrsets) 551e99c242SThomas Richter { 561e99c242SThomas Richter *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); 571e99c242SThomas Richter } 581e99c242SThomas Richter 591e99c242SThomas Richter static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) 601e99c242SThomas Richter { 611e99c242SThomas Richter switch (set) { 621e99c242SThomas Richter case CPUMF_CTR_SET_BASIC: 631e99c242SThomas Richter return stcctm(BASIC, range, dest); 641e99c242SThomas Richter case CPUMF_CTR_SET_USER: 651e99c242SThomas Richter return stcctm(PROBLEM_STATE, range, dest); 661e99c242SThomas Richter case CPUMF_CTR_SET_CRYPTO: 671e99c242SThomas Richter return stcctm(CRYPTO_ACTIVITY, range, dest); 681e99c242SThomas Richter case CPUMF_CTR_SET_EXT: 691e99c242SThomas Richter return stcctm(EXTENDED, range, dest); 701e99c242SThomas Richter case CPUMF_CTR_SET_MT_DIAG: 711e99c242SThomas Richter return stcctm(MT_DIAG_CLEARING, range, dest); 721e99c242SThomas Richter case CPUMF_CTR_SET_MAX: 731e99c242SThomas Richter return 3; 741e99c242SThomas Richter } 751e99c242SThomas Richter return 3; 761e99c242SThomas Richter } 771e99c242SThomas Richter 781e99c242SThomas Richter struct cpu_cf_events { 799b9cf3c7SThomas Richter refcount_t refcnt; /* Reference count */ 801e99c242SThomas Richter atomic_t ctr_set[CPUMF_CTR_SET_MAX]; 811e99c242SThomas Richter u64 state; /* For perf_event_open SVC */ 821e99c242SThomas Richter u64 dev_state; /* For /dev/hwctr */ 831e99c242SThomas Richter unsigned int flags; 841e99c242SThomas Richter size_t used; /* Bytes used in data */ 851e99c242SThomas Richter size_t usedss; /* Bytes used in start/stop */ 861e99c242SThomas Richter unsigned char start[PAGE_SIZE]; /* Counter set at event add */ 871e99c242SThomas Richter unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ 881e99c242SThomas Richter unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ 891e99c242SThomas Richter unsigned int sets; /* # Counter set saved in memory */ 901e99c242SThomas Richter }; 911e99c242SThomas Richter 92a029a4eaSThomas Richter static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 93a029a4eaSThomas Richter static debug_info_t *cf_dbg; 94a029a4eaSThomas Richter 9546c4d945SThomas Richter /* 9646c4d945SThomas Richter * The CPU Measurement query counter information instruction contains 9746c4d945SThomas Richter * information which varies per machine generation, but is constant and 9846c4d945SThomas Richter * does not change when running on a particular machine, such as counter 9946c4d945SThomas Richter * first and second version number. This is needed to determine the size 10046c4d945SThomas Richter * of counter sets. Extract this information at device driver initialization. 10146c4d945SThomas Richter */ 10246c4d945SThomas Richter static struct cpumf_ctr_info cpumf_ctr_info; 10346c4d945SThomas Richter 1049b9cf3c7SThomas Richter struct cpu_cf_ptr { 1059b9cf3c7SThomas Richter struct cpu_cf_events *cpucf; 1069b9cf3c7SThomas Richter }; 1079b9cf3c7SThomas Richter 1089b9cf3c7SThomas Richter static struct cpu_cf_root { /* Anchor to per CPU data */ 1099b9cf3c7SThomas Richter refcount_t refcnt; /* Overall active events */ 1109b9cf3c7SThomas Richter struct cpu_cf_ptr __percpu *cfptr; 1119b9cf3c7SThomas Richter } cpu_cf_root; 1129b9cf3c7SThomas Richter 1139b9cf3c7SThomas Richter /* 1149b9cf3c7SThomas Richter * Serialize event initialization and event removal. Both are called from 1159b9cf3c7SThomas Richter * user space in task context with perf_event_open() and close() 1169b9cf3c7SThomas Richter * system calls. 1179b9cf3c7SThomas Richter * 1189b9cf3c7SThomas Richter * This mutex serializes functions cpum_cf_alloc_cpu() called at event 1199b9cf3c7SThomas Richter * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() 1209b9cf3c7SThomas Richter * called at event removal via call back function hw_perf_event_destroy() 1219b9cf3c7SThomas Richter * when the event is deleted. They are serialized to enforce correct 1229b9cf3c7SThomas Richter * bookkeeping of pointer and reference counts anchored by 1239b9cf3c7SThomas Richter * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the 1249b9cf3c7SThomas Richter * per CPU pointers stored in cpu_cf_root::cfptr. 1259b9cf3c7SThomas Richter */ 1269b9cf3c7SThomas Richter static DEFINE_MUTEX(pmc_reserve_mutex); 1279b9cf3c7SThomas Richter 1289b9cf3c7SThomas Richter /* 1299b9cf3c7SThomas Richter * Get pointer to per-cpu structure. 1309b9cf3c7SThomas Richter * 1319b9cf3c7SThomas Richter * Function get_cpu_cfhw() is called from 1329b9cf3c7SThomas Richter * - cfset_copy_all(): This function is protected by cpus_read_lock(), so 1339b9cf3c7SThomas Richter * CPU hot plug remove can not happen. Event removal requires a close() 1349b9cf3c7SThomas Richter * first. 1359b9cf3c7SThomas Richter * 1369b9cf3c7SThomas Richter * Function this_cpu_cfhw() is called from perf common code functions: 1379b9cf3c7SThomas Richter * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): 1389b9cf3c7SThomas Richter * All functions execute with interrupts disabled on that particular CPU. 1399b9cf3c7SThomas Richter * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). 1409b9cf3c7SThomas Richter * 1419b9cf3c7SThomas Richter * Therefore it is safe to access the CPU specific pointer to the event. 1429b9cf3c7SThomas Richter */ 1439b9cf3c7SThomas Richter static struct cpu_cf_events *get_cpu_cfhw(int cpu) 1449b9cf3c7SThomas Richter { 1459b9cf3c7SThomas Richter struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; 1469b9cf3c7SThomas Richter 1479b9cf3c7SThomas Richter if (p) { 1489b9cf3c7SThomas Richter struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); 1499b9cf3c7SThomas Richter 1509b9cf3c7SThomas Richter return q->cpucf; 1519b9cf3c7SThomas Richter } 1529b9cf3c7SThomas Richter return NULL; 1539b9cf3c7SThomas Richter } 1549b9cf3c7SThomas Richter 1559b9cf3c7SThomas Richter static struct cpu_cf_events *this_cpu_cfhw(void) 1569b9cf3c7SThomas Richter { 1579b9cf3c7SThomas Richter return get_cpu_cfhw(smp_processor_id()); 1589b9cf3c7SThomas Richter } 1599b9cf3c7SThomas Richter 1609b9cf3c7SThomas Richter /* Disable counter sets on dedicated CPU */ 1619b9cf3c7SThomas Richter static void cpum_cf_reset_cpu(void *flags) 1629b9cf3c7SThomas Richter { 1639b9cf3c7SThomas Richter lcctl(0); 1649b9cf3c7SThomas Richter } 1659b9cf3c7SThomas Richter 1669b9cf3c7SThomas Richter /* Free per CPU data when the last event is removed. */ 1679b9cf3c7SThomas Richter static void cpum_cf_free_root(void) 1689b9cf3c7SThomas Richter { 1699b9cf3c7SThomas Richter if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) 1709b9cf3c7SThomas Richter return; 1719b9cf3c7SThomas Richter free_percpu(cpu_cf_root.cfptr); 1729b9cf3c7SThomas Richter cpu_cf_root.cfptr = NULL; 1739b9cf3c7SThomas Richter irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); 1749b9cf3c7SThomas Richter on_each_cpu(cpum_cf_reset_cpu, NULL, 1); 175*f4767f9fSThomas Richter debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n", 1769b9cf3c7SThomas Richter __func__, refcount_read(&cpu_cf_root.refcnt), 177*f4767f9fSThomas Richter !cpu_cf_root.cfptr); 1789b9cf3c7SThomas Richter } 1799b9cf3c7SThomas Richter 1809b9cf3c7SThomas Richter /* 1819b9cf3c7SThomas Richter * On initialization of first event also allocate per CPU data dynamically. 1829b9cf3c7SThomas Richter * Start with an array of pointers, the array size is the maximum number of 1839b9cf3c7SThomas Richter * CPUs possible, which might be larger than the number of CPUs currently 1849b9cf3c7SThomas Richter * online. 1859b9cf3c7SThomas Richter */ 1869b9cf3c7SThomas Richter static int cpum_cf_alloc_root(void) 1879b9cf3c7SThomas Richter { 1889b9cf3c7SThomas Richter int rc = 0; 1899b9cf3c7SThomas Richter 1909b9cf3c7SThomas Richter if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) 1919b9cf3c7SThomas Richter return rc; 1929b9cf3c7SThomas Richter 1939b9cf3c7SThomas Richter /* The memory is already zeroed. */ 1949b9cf3c7SThomas Richter cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); 1959b9cf3c7SThomas Richter if (cpu_cf_root.cfptr) { 1969b9cf3c7SThomas Richter refcount_set(&cpu_cf_root.refcnt, 1); 1979b9cf3c7SThomas Richter on_each_cpu(cpum_cf_reset_cpu, NULL, 1); 1989b9cf3c7SThomas Richter irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); 1999b9cf3c7SThomas Richter } else { 2009b9cf3c7SThomas Richter rc = -ENOMEM; 2019b9cf3c7SThomas Richter } 2029b9cf3c7SThomas Richter 2039b9cf3c7SThomas Richter return rc; 2049b9cf3c7SThomas Richter } 2059b9cf3c7SThomas Richter 2069b9cf3c7SThomas Richter /* Free CPU counter data structure for a PMU */ 2079b9cf3c7SThomas Richter static void cpum_cf_free_cpu(int cpu) 2089b9cf3c7SThomas Richter { 2099b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw; 2109b9cf3c7SThomas Richter struct cpu_cf_ptr *p; 2119b9cf3c7SThomas Richter 2129b9cf3c7SThomas Richter mutex_lock(&pmc_reserve_mutex); 2139b9cf3c7SThomas Richter /* 2149b9cf3c7SThomas Richter * When invoked via CPU hotplug handler, there might be no events 2159b9cf3c7SThomas Richter * installed or that particular CPU might not have an 2169b9cf3c7SThomas Richter * event installed. This anchor pointer can be NULL! 2179b9cf3c7SThomas Richter */ 2189b9cf3c7SThomas Richter if (!cpu_cf_root.cfptr) 2199b9cf3c7SThomas Richter goto out; 2209b9cf3c7SThomas Richter p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); 2219b9cf3c7SThomas Richter cpuhw = p->cpucf; 2229b9cf3c7SThomas Richter /* 2239b9cf3c7SThomas Richter * Might be zero when called from CPU hotplug handler and no event 2249b9cf3c7SThomas Richter * installed on that CPU, but on different CPUs. 2259b9cf3c7SThomas Richter */ 2269b9cf3c7SThomas Richter if (!cpuhw) 2279b9cf3c7SThomas Richter goto out; 2289b9cf3c7SThomas Richter 2299b9cf3c7SThomas Richter if (refcount_dec_and_test(&cpuhw->refcnt)) { 2309b9cf3c7SThomas Richter kfree(cpuhw); 2319b9cf3c7SThomas Richter p->cpucf = NULL; 2329b9cf3c7SThomas Richter } 2339b9cf3c7SThomas Richter cpum_cf_free_root(); 2349b9cf3c7SThomas Richter out: 2359b9cf3c7SThomas Richter mutex_unlock(&pmc_reserve_mutex); 2369b9cf3c7SThomas Richter } 2379b9cf3c7SThomas Richter 2389b9cf3c7SThomas Richter /* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ 2399b9cf3c7SThomas Richter static int cpum_cf_alloc_cpu(int cpu) 2409b9cf3c7SThomas Richter { 2419b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw; 2429b9cf3c7SThomas Richter struct cpu_cf_ptr *p; 2439b9cf3c7SThomas Richter int rc; 2449b9cf3c7SThomas Richter 2459b9cf3c7SThomas Richter mutex_lock(&pmc_reserve_mutex); 2469b9cf3c7SThomas Richter rc = cpum_cf_alloc_root(); 2479b9cf3c7SThomas Richter if (rc) 2489b9cf3c7SThomas Richter goto unlock; 2499b9cf3c7SThomas Richter p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); 2509b9cf3c7SThomas Richter cpuhw = p->cpucf; 2519b9cf3c7SThomas Richter 2529b9cf3c7SThomas Richter if (!cpuhw) { 2539b9cf3c7SThomas Richter cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); 2549b9cf3c7SThomas Richter if (cpuhw) { 2559b9cf3c7SThomas Richter p->cpucf = cpuhw; 2569b9cf3c7SThomas Richter refcount_set(&cpuhw->refcnt, 1); 2579b9cf3c7SThomas Richter } else { 2589b9cf3c7SThomas Richter rc = -ENOMEM; 2599b9cf3c7SThomas Richter } 2609b9cf3c7SThomas Richter } else { 2619b9cf3c7SThomas Richter refcount_inc(&cpuhw->refcnt); 2629b9cf3c7SThomas Richter } 2639b9cf3c7SThomas Richter if (rc) { 2649b9cf3c7SThomas Richter /* 2659b9cf3c7SThomas Richter * Error in allocation of event, decrement anchor. Since 2669b9cf3c7SThomas Richter * cpu_cf_event in not created, its destroy() function is not 2679b9cf3c7SThomas Richter * invoked. Adjust the reference counter for the anchor. 2689b9cf3c7SThomas Richter */ 2699b9cf3c7SThomas Richter cpum_cf_free_root(); 2709b9cf3c7SThomas Richter } 2719b9cf3c7SThomas Richter unlock: 2729b9cf3c7SThomas Richter mutex_unlock(&pmc_reserve_mutex); 2739b9cf3c7SThomas Richter return rc; 2749b9cf3c7SThomas Richter } 2759b9cf3c7SThomas Richter 2769b9cf3c7SThomas Richter /* 2779b9cf3c7SThomas Richter * Create/delete per CPU data structures for /dev/hwctr interface and events 2789b9cf3c7SThomas Richter * created by perf_event_open(). 2799b9cf3c7SThomas Richter * If cpu is -1, track task on all available CPUs. This requires 2809b9cf3c7SThomas Richter * allocation of hardware data structures for all CPUs. This setup handles 2819b9cf3c7SThomas Richter * perf_event_open() with task context and /dev/hwctr interface. 2829b9cf3c7SThomas Richter * If cpu is non-zero install event on this CPU only. This setup handles 2839b9cf3c7SThomas Richter * perf_event_open() with CPU context. 2849b9cf3c7SThomas Richter */ 2859b9cf3c7SThomas Richter static int cpum_cf_alloc(int cpu) 2869b9cf3c7SThomas Richter { 2879b9cf3c7SThomas Richter cpumask_var_t mask; 2889b9cf3c7SThomas Richter int rc; 2899b9cf3c7SThomas Richter 2909b9cf3c7SThomas Richter if (cpu == -1) { 2919b9cf3c7SThomas Richter if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 2929b9cf3c7SThomas Richter return -ENOMEM; 2939b9cf3c7SThomas Richter for_each_online_cpu(cpu) { 2949b9cf3c7SThomas Richter rc = cpum_cf_alloc_cpu(cpu); 2959b9cf3c7SThomas Richter if (rc) { 2969b9cf3c7SThomas Richter for_each_cpu(cpu, mask) 2979b9cf3c7SThomas Richter cpum_cf_free_cpu(cpu); 2989b9cf3c7SThomas Richter break; 2999b9cf3c7SThomas Richter } 3009b9cf3c7SThomas Richter cpumask_set_cpu(cpu, mask); 3019b9cf3c7SThomas Richter } 3029b9cf3c7SThomas Richter free_cpumask_var(mask); 3039b9cf3c7SThomas Richter } else { 3049b9cf3c7SThomas Richter rc = cpum_cf_alloc_cpu(cpu); 3059b9cf3c7SThomas Richter } 3069b9cf3c7SThomas Richter return rc; 3079b9cf3c7SThomas Richter } 3089b9cf3c7SThomas Richter 3099b9cf3c7SThomas Richter static void cpum_cf_free(int cpu) 3109b9cf3c7SThomas Richter { 3119b9cf3c7SThomas Richter if (cpu == -1) { 3129b9cf3c7SThomas Richter for_each_online_cpu(cpu) 3139b9cf3c7SThomas Richter cpum_cf_free_cpu(cpu); 3149b9cf3c7SThomas Richter } else { 3159b9cf3c7SThomas Richter cpum_cf_free_cpu(cpu); 3169b9cf3c7SThomas Richter } 3179b9cf3c7SThomas Richter } 3189b9cf3c7SThomas Richter 319a029a4eaSThomas Richter #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 320a029a4eaSThomas Richter /* interval in seconds */ 321a029a4eaSThomas Richter 322a029a4eaSThomas Richter /* Counter sets are stored as data stream in a page sized memory buffer and 323a029a4eaSThomas Richter * exported to user space via raw data attached to the event sample data. 324a029a4eaSThomas Richter * Each counter set starts with an eight byte header consisting of: 325a029a4eaSThomas Richter * - a two byte eye catcher (0xfeef) 326a029a4eaSThomas Richter * - a one byte counter set number 327a029a4eaSThomas Richter * - a two byte counter set size (indicates the number of counters in this set) 328a029a4eaSThomas Richter * - a three byte reserved value (must be zero) to make the header the same 329a029a4eaSThomas Richter * size as a counter value. 330a029a4eaSThomas Richter * All counter values are eight byte in size. 331a029a4eaSThomas Richter * 332a029a4eaSThomas Richter * All counter sets are followed by a 64 byte trailer. 333a029a4eaSThomas Richter * The trailer consists of a: 334a029a4eaSThomas Richter * - flag field indicating valid fields when corresponding bit set 335a029a4eaSThomas Richter * - the counter facility first and second version number 336a029a4eaSThomas Richter * - the CPU speed if nonzero 337a029a4eaSThomas Richter * - the time stamp the counter sets have been collected 338a029a4eaSThomas Richter * - the time of day (TOD) base value 339a029a4eaSThomas Richter * - the machine type. 340a029a4eaSThomas Richter * 341a029a4eaSThomas Richter * The counter sets are saved when the process is prepared to be executed on a 342a029a4eaSThomas Richter * CPU and saved again when the process is going to be removed from a CPU. 343a029a4eaSThomas Richter * The difference of both counter sets are calculated and stored in the event 344a029a4eaSThomas Richter * sample data area. 345a029a4eaSThomas Richter */ 346a029a4eaSThomas Richter struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 347a029a4eaSThomas Richter unsigned int def:16; /* 0-15 Data Entry Format */ 348a029a4eaSThomas Richter unsigned int set:16; /* 16-31 Counter set identifier */ 349a029a4eaSThomas Richter unsigned int ctr:16; /* 32-47 Number of stored counters */ 350a029a4eaSThomas Richter unsigned int res1:16; /* 48-63 Reserved */ 351a029a4eaSThomas Richter }; 352a029a4eaSThomas Richter 353a029a4eaSThomas Richter struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 354a029a4eaSThomas Richter /* 0 - 7 */ 355a029a4eaSThomas Richter union { 356a029a4eaSThomas Richter struct { 357a029a4eaSThomas Richter unsigned int clock_base:1; /* TOD clock base set */ 358a029a4eaSThomas Richter unsigned int speed:1; /* CPU speed set */ 359a029a4eaSThomas Richter /* Measurement alerts */ 360a029a4eaSThomas Richter unsigned int mtda:1; /* Loss of MT ctr. data alert */ 361a029a4eaSThomas Richter unsigned int caca:1; /* Counter auth. change alert */ 362a029a4eaSThomas Richter unsigned int lcda:1; /* Loss of counter data alert */ 363a029a4eaSThomas Richter }; 364a029a4eaSThomas Richter unsigned long flags; /* 0-63 All indicators */ 365a029a4eaSThomas Richter }; 366a029a4eaSThomas Richter /* 8 - 15 */ 367a029a4eaSThomas Richter unsigned int cfvn:16; /* 64-79 Ctr First Version */ 368a029a4eaSThomas Richter unsigned int csvn:16; /* 80-95 Ctr Second Version */ 369a029a4eaSThomas Richter unsigned int cpu_speed:32; /* 96-127 CPU speed */ 370a029a4eaSThomas Richter /* 16 - 23 */ 371a029a4eaSThomas Richter unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 372a029a4eaSThomas Richter /* 24 - 55 */ 373a029a4eaSThomas Richter union { 374a029a4eaSThomas Richter struct { 375a029a4eaSThomas Richter unsigned long progusage1; 376a029a4eaSThomas Richter unsigned long progusage2; 377a029a4eaSThomas Richter unsigned long progusage3; 378a029a4eaSThomas Richter unsigned long tod_base; 379a029a4eaSThomas Richter }; 380a029a4eaSThomas Richter unsigned long progusage[4]; 381a029a4eaSThomas Richter }; 382a029a4eaSThomas Richter /* 56 - 63 */ 383a029a4eaSThomas Richter unsigned int mach_type:16; /* Machine type */ 384a029a4eaSThomas Richter unsigned int res1:16; /* Reserved */ 385a029a4eaSThomas Richter unsigned int res2:32; /* Reserved */ 386a029a4eaSThomas Richter }; 387a029a4eaSThomas Richter 388a029a4eaSThomas Richter /* Create the trailer data at the end of a page. */ 389a029a4eaSThomas Richter static void cfdiag_trailer(struct cf_trailer_entry *te) 390a029a4eaSThomas Richter { 391a029a4eaSThomas Richter struct cpuid cpuid; 392a029a4eaSThomas Richter 39346c4d945SThomas Richter te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ 39446c4d945SThomas Richter te->csvn = cpumf_ctr_info.csvn; 395a029a4eaSThomas Richter 396a029a4eaSThomas Richter get_cpu_id(&cpuid); /* Machine type */ 397a029a4eaSThomas Richter te->mach_type = cpuid.machine; 398a029a4eaSThomas Richter te->cpu_speed = cfdiag_cpu_speed; 399a029a4eaSThomas Richter if (te->cpu_speed) 400a029a4eaSThomas Richter te->speed = 1; 401a029a4eaSThomas Richter te->clock_base = 1; /* Save clock base */ 402a029a4eaSThomas Richter te->tod_base = tod_clock_base.tod; 403a029a4eaSThomas Richter te->timestamp = get_tod_clock_fast(); 404a029a4eaSThomas Richter } 405a029a4eaSThomas Richter 406345d2a4dSThomas Richter /* 40746c4d945SThomas Richter * The number of counters per counter set varies between machine generations, 40846c4d945SThomas Richter * but is constant when running on a particular machine generation. 40946c4d945SThomas Richter * Determine each counter set size at device driver initialization and 41046c4d945SThomas Richter * retrieve it later. 411345d2a4dSThomas Richter */ 41246c4d945SThomas Richter static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; 41346c4d945SThomas Richter static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) 414345d2a4dSThomas Richter { 415345d2a4dSThomas Richter size_t ctrset_size = 0; 416345d2a4dSThomas Richter 417345d2a4dSThomas Richter switch (ctrset) { 418345d2a4dSThomas Richter case CPUMF_CTR_SET_BASIC: 41946c4d945SThomas Richter if (cpumf_ctr_info.cfvn >= 1) 420345d2a4dSThomas Richter ctrset_size = 6; 421345d2a4dSThomas Richter break; 422345d2a4dSThomas Richter case CPUMF_CTR_SET_USER: 42346c4d945SThomas Richter if (cpumf_ctr_info.cfvn == 1) 424345d2a4dSThomas Richter ctrset_size = 6; 42546c4d945SThomas Richter else if (cpumf_ctr_info.cfvn >= 3) 426345d2a4dSThomas Richter ctrset_size = 2; 427345d2a4dSThomas Richter break; 428345d2a4dSThomas Richter case CPUMF_CTR_SET_CRYPTO: 42946c4d945SThomas Richter if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) 430345d2a4dSThomas Richter ctrset_size = 16; 43146c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) 432345d2a4dSThomas Richter ctrset_size = 20; 433345d2a4dSThomas Richter break; 434345d2a4dSThomas Richter case CPUMF_CTR_SET_EXT: 43546c4d945SThomas Richter if (cpumf_ctr_info.csvn == 1) 436345d2a4dSThomas Richter ctrset_size = 32; 43746c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 2) 438345d2a4dSThomas Richter ctrset_size = 48; 43946c4d945SThomas Richter else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) 440345d2a4dSThomas Richter ctrset_size = 128; 44146c4d945SThomas Richter else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) 442345d2a4dSThomas Richter ctrset_size = 160; 443345d2a4dSThomas Richter break; 444345d2a4dSThomas Richter case CPUMF_CTR_SET_MT_DIAG: 44546c4d945SThomas Richter if (cpumf_ctr_info.csvn > 3) 446345d2a4dSThomas Richter ctrset_size = 48; 447345d2a4dSThomas Richter break; 448345d2a4dSThomas Richter case CPUMF_CTR_SET_MAX: 449345d2a4dSThomas Richter break; 450345d2a4dSThomas Richter } 45146c4d945SThomas Richter cpumf_ctr_setsizes[ctrset] = ctrset_size; 45246c4d945SThomas Richter } 453345d2a4dSThomas Richter 45446c4d945SThomas Richter /* 45546c4d945SThomas Richter * Return the maximum possible counter set size (in number of 8 byte counters) 45646c4d945SThomas Richter * depending on type and model number. 45746c4d945SThomas Richter */ 45846c4d945SThomas Richter static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) 45946c4d945SThomas Richter { 46046c4d945SThomas Richter return cpumf_ctr_setsizes[ctrset]; 461345d2a4dSThomas Richter } 462345d2a4dSThomas Richter 463a029a4eaSThomas Richter /* Read a counter set. The counter set number determines the counter set and 464a029a4eaSThomas Richter * the CPUM-CF first and second version number determine the number of 465a029a4eaSThomas Richter * available counters in each counter set. 466a029a4eaSThomas Richter * Each counter set starts with header containing the counter set number and 467a029a4eaSThomas Richter * the number of eight byte counters. 468a029a4eaSThomas Richter * 469a029a4eaSThomas Richter * The functions returns the number of bytes occupied by this counter set 470a029a4eaSThomas Richter * including the header. 471a029a4eaSThomas Richter * If there is no counter in the counter set, this counter set is useless and 472a029a4eaSThomas Richter * zero is returned on this case. 473a029a4eaSThomas Richter * 474a029a4eaSThomas Richter * Note that the counter sets may not be enabled or active and the stcctm 475a029a4eaSThomas Richter * instruction might return error 3. Depending on error_ok value this is ok, 476a029a4eaSThomas Richter * for example when called from cpumf_pmu_start() call back function. 477a029a4eaSThomas Richter */ 478a029a4eaSThomas Richter static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 479a029a4eaSThomas Richter size_t room, bool error_ok) 480a029a4eaSThomas Richter { 481a029a4eaSThomas Richter size_t ctrset_size, need = 0; 482a029a4eaSThomas Richter int rc = 3; /* Assume write failure */ 483a029a4eaSThomas Richter 484a029a4eaSThomas Richter ctrdata->def = CF_DIAG_CTRSET_DEF; 485a029a4eaSThomas Richter ctrdata->set = ctrset; 486a029a4eaSThomas Richter ctrdata->res1 = 0; 48746c4d945SThomas Richter ctrset_size = cpum_cf_read_setsize(ctrset); 488a029a4eaSThomas Richter 489a029a4eaSThomas Richter if (ctrset_size) { /* Save data */ 490a029a4eaSThomas Richter need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 491a029a4eaSThomas Richter if (need <= room) { 492a029a4eaSThomas Richter rc = ctr_stcctm(ctrset, ctrset_size, 493a029a4eaSThomas Richter (u64 *)(ctrdata + 1)); 494a029a4eaSThomas Richter } 495a029a4eaSThomas Richter if (rc != 3 || error_ok) 496a029a4eaSThomas Richter ctrdata->ctr = ctrset_size; 497a029a4eaSThomas Richter else 498a029a4eaSThomas Richter need = 0; 499a029a4eaSThomas Richter } 500a029a4eaSThomas Richter 501a029a4eaSThomas Richter return need; 502a029a4eaSThomas Richter } 503a029a4eaSThomas Richter 5045dddfaacSHeiko Carstens static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { 5055dddfaacSHeiko Carstens [CPUMF_CTR_SET_BASIC] = 0x02, 5065dddfaacSHeiko Carstens [CPUMF_CTR_SET_USER] = 0x04, 5075dddfaacSHeiko Carstens [CPUMF_CTR_SET_CRYPTO] = 0x08, 5085dddfaacSHeiko Carstens [CPUMF_CTR_SET_EXT] = 0x01, 5095dddfaacSHeiko Carstens [CPUMF_CTR_SET_MT_DIAG] = 0x20, 5105dddfaacSHeiko Carstens }; 5115dddfaacSHeiko Carstens 512a029a4eaSThomas Richter /* Read out all counter sets and save them in the provided data buffer. 513a029a4eaSThomas Richter * The last 64 byte host an artificial trailer entry. 514a029a4eaSThomas Richter */ 515a029a4eaSThomas Richter static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 516a029a4eaSThomas Richter bool error_ok) 517a029a4eaSThomas Richter { 518a029a4eaSThomas Richter struct cf_trailer_entry *trailer; 519a029a4eaSThomas Richter size_t offset = 0, done; 520a029a4eaSThomas Richter int i; 521a029a4eaSThomas Richter 522a029a4eaSThomas Richter memset(data, 0, sz); 523a029a4eaSThomas Richter sz -= sizeof(*trailer); /* Always room for trailer */ 524a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 525a029a4eaSThomas Richter struct cf_ctrset_entry *ctrdata = data + offset; 526a029a4eaSThomas Richter 527a029a4eaSThomas Richter if (!(auth & cpumf_ctr_ctl[i])) 528a029a4eaSThomas Richter continue; /* Counter set not authorized */ 529a029a4eaSThomas Richter 530a029a4eaSThomas Richter done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 531a029a4eaSThomas Richter offset += done; 532a029a4eaSThomas Richter } 533a029a4eaSThomas Richter trailer = data + offset; 534a029a4eaSThomas Richter cfdiag_trailer(trailer); 535a029a4eaSThomas Richter return offset + sizeof(*trailer); 536a029a4eaSThomas Richter } 537a029a4eaSThomas Richter 538a029a4eaSThomas Richter /* Calculate the difference for each counter in a counter set. */ 539a029a4eaSThomas Richter static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 540a029a4eaSThomas Richter { 541a029a4eaSThomas Richter for (; --counters >= 0; ++pstart, ++pstop) 542a029a4eaSThomas Richter if (*pstop >= *pstart) 543a029a4eaSThomas Richter *pstop -= *pstart; 544a029a4eaSThomas Richter else 545a029a4eaSThomas Richter *pstop = *pstart - *pstop + 1; 546a029a4eaSThomas Richter } 547a029a4eaSThomas Richter 548a029a4eaSThomas Richter /* Scan the counter sets and calculate the difference of each counter 549a029a4eaSThomas Richter * in each set. The result is the increment of each counter during the 550a029a4eaSThomas Richter * period the counter set has been activated. 551a029a4eaSThomas Richter * 552a029a4eaSThomas Richter * Return true on success. 553a029a4eaSThomas Richter */ 554a029a4eaSThomas Richter static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 555a029a4eaSThomas Richter { 556a029a4eaSThomas Richter struct cf_trailer_entry *trailer_start, *trailer_stop; 557a029a4eaSThomas Richter struct cf_ctrset_entry *ctrstart, *ctrstop; 558a029a4eaSThomas Richter size_t offset = 0; 559a029a4eaSThomas Richter 560a029a4eaSThomas Richter auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 561a029a4eaSThomas Richter do { 562a029a4eaSThomas Richter ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 563a029a4eaSThomas Richter ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 564a029a4eaSThomas Richter 565a029a4eaSThomas Richter if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 566a029a4eaSThomas Richter pr_err_once("cpum_cf_diag counter set compare error " 567a029a4eaSThomas Richter "in set %i\n", ctrstart->set); 568a029a4eaSThomas Richter return 0; 569a029a4eaSThomas Richter } 570a029a4eaSThomas Richter auth &= ~cpumf_ctr_ctl[ctrstart->set]; 571a029a4eaSThomas Richter if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 572a029a4eaSThomas Richter cfdiag_diffctrset((u64 *)(ctrstart + 1), 573a029a4eaSThomas Richter (u64 *)(ctrstop + 1), ctrstart->ctr); 574a029a4eaSThomas Richter offset += ctrstart->ctr * sizeof(u64) + 575a029a4eaSThomas Richter sizeof(*ctrstart); 576a029a4eaSThomas Richter } 577a029a4eaSThomas Richter } while (ctrstart->def && auth); 578a029a4eaSThomas Richter 579a029a4eaSThomas Richter /* Save time_stamp from start of event in stop's trailer */ 580a029a4eaSThomas Richter trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 581a029a4eaSThomas Richter trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 582a029a4eaSThomas Richter trailer_stop->progusage[0] = trailer_start->timestamp; 583a029a4eaSThomas Richter 584a029a4eaSThomas Richter return 1; 585a029a4eaSThomas Richter } 586212188a5SHendrik Brueckner 587ee699f32SHendrik Brueckner static enum cpumf_ctr_set get_counter_set(u64 event) 588212188a5SHendrik Brueckner { 589ee699f32SHendrik Brueckner int set = CPUMF_CTR_SET_MAX; 590212188a5SHendrik Brueckner 591212188a5SHendrik Brueckner if (event < 32) 592212188a5SHendrik Brueckner set = CPUMF_CTR_SET_BASIC; 593212188a5SHendrik Brueckner else if (event < 64) 594212188a5SHendrik Brueckner set = CPUMF_CTR_SET_USER; 595212188a5SHendrik Brueckner else if (event < 128) 596212188a5SHendrik Brueckner set = CPUMF_CTR_SET_CRYPTO; 59746a984ffSThomas Richter else if (event < 288) 598212188a5SHendrik Brueckner set = CPUMF_CTR_SET_EXT; 599ee699f32SHendrik Brueckner else if (event >= 448 && event < 496) 600ee699f32SHendrik Brueckner set = CPUMF_CTR_SET_MT_DIAG; 601212188a5SHendrik Brueckner 602212188a5SHendrik Brueckner return set; 603212188a5SHendrik Brueckner } 604212188a5SHendrik Brueckner 6059ae9b868SThomas Richter static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) 606212188a5SHendrik Brueckner { 607ee699f32SHendrik Brueckner u16 mtdiag_ctl; 60846c4d945SThomas Richter int err = 0; 609212188a5SHendrik Brueckner 610212188a5SHendrik Brueckner /* check required version for counter sets */ 611a029a4eaSThomas Richter switch (set) { 612212188a5SHendrik Brueckner case CPUMF_CTR_SET_BASIC: 613212188a5SHendrik Brueckner case CPUMF_CTR_SET_USER: 61446c4d945SThomas Richter if (cpumf_ctr_info.cfvn < 1) 615212188a5SHendrik Brueckner err = -EOPNOTSUPP; 616212188a5SHendrik Brueckner break; 617212188a5SHendrik Brueckner case CPUMF_CTR_SET_CRYPTO: 61846c4d945SThomas Richter if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && 6199ae9b868SThomas Richter config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83)) 62046a984ffSThomas Richter err = -EOPNOTSUPP; 62146a984ffSThomas Richter break; 622212188a5SHendrik Brueckner case CPUMF_CTR_SET_EXT: 62346c4d945SThomas Richter if (cpumf_ctr_info.csvn < 1) 624212188a5SHendrik Brueckner err = -EOPNOTSUPP; 6259ae9b868SThomas Richter if ((cpumf_ctr_info.csvn == 1 && config > 159) || 6269ae9b868SThomas Richter (cpumf_ctr_info.csvn == 2 && config > 175) || 6279ae9b868SThomas Richter (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && 6289ae9b868SThomas Richter config > 255) || 6299ae9b868SThomas Richter (cpumf_ctr_info.csvn >= 6 && config > 287)) 630f47586b2SHendrik Brueckner err = -EOPNOTSUPP; 631212188a5SHendrik Brueckner break; 632ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MT_DIAG: 63346c4d945SThomas Richter if (cpumf_ctr_info.csvn <= 3) 634ee699f32SHendrik Brueckner err = -EOPNOTSUPP; 635ee699f32SHendrik Brueckner /* 636ee699f32SHendrik Brueckner * MT-diagnostic counters are read-only. The counter set 637ee699f32SHendrik Brueckner * is automatically enabled and activated on all CPUs with 638ee699f32SHendrik Brueckner * multithreading (SMT). Deactivation of multithreading 639ee699f32SHendrik Brueckner * also disables the counter set. State changes are ignored 640ee699f32SHendrik Brueckner * by lcctl(). Because Linux controls SMT enablement through 641ee699f32SHendrik Brueckner * a kernel parameter only, the counter set is either disabled 642ee699f32SHendrik Brueckner * or enabled and active. 643ee699f32SHendrik Brueckner * 644ee699f32SHendrik Brueckner * Thus, the counters can only be used if SMT is on and the 645ee699f32SHendrik Brueckner * counter set is enabled and active. 646ee699f32SHendrik Brueckner */ 64730e145f8SHendrik Brueckner mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 64846c4d945SThomas Richter if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && 64946c4d945SThomas Richter (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && 65046c4d945SThomas Richter (cpumf_ctr_info.act_ctl & mtdiag_ctl))) 651ee699f32SHendrik Brueckner err = -EOPNOTSUPP; 652ee699f32SHendrik Brueckner break; 653a029a4eaSThomas Richter case CPUMF_CTR_SET_MAX: 654a029a4eaSThomas Richter err = -EOPNOTSUPP; 655212188a5SHendrik Brueckner } 656212188a5SHendrik Brueckner 657212188a5SHendrik Brueckner return err; 658212188a5SHendrik Brueckner } 659212188a5SHendrik Brueckner 660212188a5SHendrik Brueckner /* 661212188a5SHendrik Brueckner * Change the CPUMF state to active. 662212188a5SHendrik Brueckner * Enable and activate the CPU-counter sets according 663212188a5SHendrik Brueckner * to the per-cpu control state. 664212188a5SHendrik Brueckner */ 665212188a5SHendrik Brueckner static void cpumf_pmu_enable(struct pmu *pmu) 666212188a5SHendrik Brueckner { 6679b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 668212188a5SHendrik Brueckner int err; 669212188a5SHendrik Brueckner 6709b9cf3c7SThomas Richter if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) 671212188a5SHendrik Brueckner return; 672212188a5SHendrik Brueckner 673a029a4eaSThomas Richter err = lcctl(cpuhw->state | cpuhw->dev_state); 674c01f2a5fSThomas Richter if (err) 675c01f2a5fSThomas Richter pr_err("Enabling the performance measuring unit failed with rc=%x\n", err); 676c01f2a5fSThomas Richter else 677212188a5SHendrik Brueckner cpuhw->flags |= PMU_F_ENABLED; 678212188a5SHendrik Brueckner } 679212188a5SHendrik Brueckner 680212188a5SHendrik Brueckner /* 681212188a5SHendrik Brueckner * Change the CPUMF state to inactive. 682212188a5SHendrik Brueckner * Disable and enable (inactive) the CPU-counter sets according 683212188a5SHendrik Brueckner * to the per-cpu control state. 684212188a5SHendrik Brueckner */ 685212188a5SHendrik Brueckner static void cpumf_pmu_disable(struct pmu *pmu) 686212188a5SHendrik Brueckner { 6879b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 688212188a5SHendrik Brueckner u64 inactive; 6899b9cf3c7SThomas Richter int err; 690212188a5SHendrik Brueckner 6919b9cf3c7SThomas Richter if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) 692212188a5SHendrik Brueckner return; 693212188a5SHendrik Brueckner 694212188a5SHendrik Brueckner inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 695a029a4eaSThomas Richter inactive |= cpuhw->dev_state; 696212188a5SHendrik Brueckner err = lcctl(inactive); 697c01f2a5fSThomas Richter if (err) 698c01f2a5fSThomas Richter pr_err("Disabling the performance measuring unit failed with rc=%x\n", err); 699c01f2a5fSThomas Richter else 700212188a5SHendrik Brueckner cpuhw->flags &= ~PMU_F_ENABLED; 701212188a5SHendrik Brueckner } 702212188a5SHendrik Brueckner 703212188a5SHendrik Brueckner /* Release the PMU if event is the last perf event */ 704212188a5SHendrik Brueckner static void hw_perf_event_destroy(struct perf_event *event) 705212188a5SHendrik Brueckner { 7069b9cf3c7SThomas Richter cpum_cf_free(event->cpu); 707212188a5SHendrik Brueckner } 708212188a5SHendrik Brueckner 709212188a5SHendrik Brueckner /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 710212188a5SHendrik Brueckner static const int cpumf_generic_events_basic[] = { 711212188a5SHendrik Brueckner [PERF_COUNT_HW_CPU_CYCLES] = 0, 712212188a5SHendrik Brueckner [PERF_COUNT_HW_INSTRUCTIONS] = 1, 713212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 714212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_MISSES] = -1, 715212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 716212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_MISSES] = -1, 717212188a5SHendrik Brueckner [PERF_COUNT_HW_BUS_CYCLES] = -1, 718212188a5SHendrik Brueckner }; 719212188a5SHendrik Brueckner /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 720212188a5SHendrik Brueckner static const int cpumf_generic_events_user[] = { 721212188a5SHendrik Brueckner [PERF_COUNT_HW_CPU_CYCLES] = 32, 722212188a5SHendrik Brueckner [PERF_COUNT_HW_INSTRUCTIONS] = 33, 723212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 724212188a5SHendrik Brueckner [PERF_COUNT_HW_CACHE_MISSES] = -1, 725212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 726212188a5SHendrik Brueckner [PERF_COUNT_HW_BRANCH_MISSES] = -1, 727212188a5SHendrik Brueckner [PERF_COUNT_HW_BUS_CYCLES] = -1, 728212188a5SHendrik Brueckner }; 729212188a5SHendrik Brueckner 73091d5364dSThomas Richter static int is_userspace_event(u64 ev) 73191d5364dSThomas Richter { 73291d5364dSThomas Richter return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 73391d5364dSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; 73491d5364dSThomas Richter } 73591d5364dSThomas Richter 7366a82e23fSThomas Richter static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 737212188a5SHendrik Brueckner { 738212188a5SHendrik Brueckner struct perf_event_attr *attr = &event->attr; 739212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 740ee699f32SHendrik Brueckner enum cpumf_ctr_set set; 741212188a5SHendrik Brueckner u64 ev; 742212188a5SHendrik Brueckner 7436a82e23fSThomas Richter switch (type) { 744212188a5SHendrik Brueckner case PERF_TYPE_RAW: 745212188a5SHendrik Brueckner /* Raw events are used to access counters directly, 746212188a5SHendrik Brueckner * hence do not permit excludes */ 747212188a5SHendrik Brueckner if (attr->exclude_kernel || attr->exclude_user || 748212188a5SHendrik Brueckner attr->exclude_hv) 749212188a5SHendrik Brueckner return -EOPNOTSUPP; 750212188a5SHendrik Brueckner ev = attr->config; 751212188a5SHendrik Brueckner break; 752212188a5SHendrik Brueckner 753212188a5SHendrik Brueckner case PERF_TYPE_HARDWARE: 754613a41b0SThomas Richter if (is_sampling_event(event)) /* No sampling support */ 755613a41b0SThomas Richter return -ENOENT; 756212188a5SHendrik Brueckner ev = attr->config; 757212188a5SHendrik Brueckner if (!attr->exclude_user && attr->exclude_kernel) { 75891d5364dSThomas Richter /* 75991d5364dSThomas Richter * Count user space (problem-state) only 76091d5364dSThomas Richter * Handle events 32 and 33 as 0:u and 1:u 76191d5364dSThomas Richter */ 76291d5364dSThomas Richter if (!is_userspace_event(ev)) { 763212188a5SHendrik Brueckner if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 764212188a5SHendrik Brueckner return -EOPNOTSUPP; 765212188a5SHendrik Brueckner ev = cpumf_generic_events_user[ev]; 76691d5364dSThomas Richter } 767212188a5SHendrik Brueckner } else if (!attr->exclude_kernel && attr->exclude_user) { 76891d5364dSThomas Richter /* No support for kernel space counters only */ 769212188a5SHendrik Brueckner return -EOPNOTSUPP; 77091d5364dSThomas Richter } else { 77191d5364dSThomas Richter /* Count user and kernel space, incl. events 32 + 33 */ 77291d5364dSThomas Richter if (!is_userspace_event(ev)) { 773212188a5SHendrik Brueckner if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 774212188a5SHendrik Brueckner return -EOPNOTSUPP; 775212188a5SHendrik Brueckner ev = cpumf_generic_events_basic[ev]; 776212188a5SHendrik Brueckner } 77791d5364dSThomas Richter } 778212188a5SHendrik Brueckner break; 779212188a5SHendrik Brueckner 780212188a5SHendrik Brueckner default: 781212188a5SHendrik Brueckner return -ENOENT; 782212188a5SHendrik Brueckner } 783212188a5SHendrik Brueckner 784212188a5SHendrik Brueckner if (ev == -1) 785212188a5SHendrik Brueckner return -ENOENT; 786212188a5SHendrik Brueckner 78720ba46daSHendrik Brueckner if (ev > PERF_CPUM_CF_MAX_CTR) 7880bb2ae1bSThomas Richter return -ENOENT; 789212188a5SHendrik Brueckner 790ee699f32SHendrik Brueckner /* Obtain the counter set to which the specified counter belongs */ 791ee699f32SHendrik Brueckner set = get_counter_set(ev); 792ee699f32SHendrik Brueckner switch (set) { 793ee699f32SHendrik Brueckner case CPUMF_CTR_SET_BASIC: 794ee699f32SHendrik Brueckner case CPUMF_CTR_SET_USER: 795ee699f32SHendrik Brueckner case CPUMF_CTR_SET_CRYPTO: 796ee699f32SHendrik Brueckner case CPUMF_CTR_SET_EXT: 797ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MT_DIAG: 798ee699f32SHendrik Brueckner /* 799ee699f32SHendrik Brueckner * Use the hardware perf event structure to store the 800ee699f32SHendrik Brueckner * counter number in the 'config' member and the counter 801a029a4eaSThomas Richter * set number in the 'config_base' as bit mask. 802a029a4eaSThomas Richter * It is later used to enable/disable the counter(s). 803212188a5SHendrik Brueckner */ 804212188a5SHendrik Brueckner hwc->config = ev; 805a029a4eaSThomas Richter hwc->config_base = cpumf_ctr_ctl[set]; 806ee699f32SHendrik Brueckner break; 807ee699f32SHendrik Brueckner case CPUMF_CTR_SET_MAX: 808ee699f32SHendrik Brueckner /* The counter could not be associated to a counter set */ 809ee699f32SHendrik Brueckner return -EINVAL; 8101c0a9c79SJiapeng Chong } 811212188a5SHendrik Brueckner 812212188a5SHendrik Brueckner /* Initialize for using the CPU-measurement counter facility */ 8139b9cf3c7SThomas Richter if (cpum_cf_alloc(event->cpu)) 8149b9cf3c7SThomas Richter return -ENOMEM; 815212188a5SHendrik Brueckner event->destroy = hw_perf_event_destroy; 816212188a5SHendrik Brueckner 8171a33aee1SThomas Richter /* 8181a33aee1SThomas Richter * Finally, validate version and authorization of the counter set. 8191a33aee1SThomas Richter * If the particular CPU counter set is not authorized, 8201a33aee1SThomas Richter * return with -ENOENT in order to fall back to other 8211a33aee1SThomas Richter * PMUs that might suffice the event request. 8221a33aee1SThomas Richter */ 8231a33aee1SThomas Richter if (!(hwc->config_base & cpumf_ctr_info.auth_ctl)) 8241a33aee1SThomas Richter return -ENOENT; 8251a33aee1SThomas Richter return validate_ctr_version(hwc->config, set); 826212188a5SHendrik Brueckner } 827212188a5SHendrik Brueckner 828be857b7fSThomas Richter /* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different 829be857b7fSThomas Richter * attribute::type values: 830be857b7fSThomas Richter * - PERF_TYPE_HARDWARE: 831be857b7fSThomas Richter * - pmu->type: 832be857b7fSThomas Richter * Handle both type of invocations identical. They address the same hardware. 833be857b7fSThomas Richter * The result is different when event modifiers exclude_kernel and/or 834be857b7fSThomas Richter * exclude_user are also set. 835be857b7fSThomas Richter */ 836be857b7fSThomas Richter static int cpumf_pmu_event_type(struct perf_event *event) 837be857b7fSThomas Richter { 838be857b7fSThomas Richter u64 ev = event->attr.config; 839be857b7fSThomas Richter 840be857b7fSThomas Richter if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || 841be857b7fSThomas Richter cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || 842be857b7fSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 843be857b7fSThomas Richter cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) 844be857b7fSThomas Richter return PERF_TYPE_HARDWARE; 845be857b7fSThomas Richter return PERF_TYPE_RAW; 846be857b7fSThomas Richter } 847be857b7fSThomas Richter 848212188a5SHendrik Brueckner static int cpumf_pmu_event_init(struct perf_event *event) 849212188a5SHendrik Brueckner { 8506a82e23fSThomas Richter unsigned int type = event->attr.type; 851212188a5SHendrik Brueckner int err; 852212188a5SHendrik Brueckner 8536a82e23fSThomas Richter if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 8546a82e23fSThomas Richter err = __hw_perf_event_init(event, type); 8556a82e23fSThomas Richter else if (event->pmu->type == type) 8566a82e23fSThomas Richter /* Registered as unknown PMU */ 857be857b7fSThomas Richter err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); 8586a82e23fSThomas Richter else 859212188a5SHendrik Brueckner return -ENOENT; 860212188a5SHendrik Brueckner 861212188a5SHendrik Brueckner if (unlikely(err) && event->destroy) 862212188a5SHendrik Brueckner event->destroy(event); 863212188a5SHendrik Brueckner 864212188a5SHendrik Brueckner return err; 865212188a5SHendrik Brueckner } 866212188a5SHendrik Brueckner 867212188a5SHendrik Brueckner static int hw_perf_event_reset(struct perf_event *event) 868212188a5SHendrik Brueckner { 869212188a5SHendrik Brueckner u64 prev, new; 870212188a5SHendrik Brueckner int err; 871212188a5SHendrik Brueckner 872212188a5SHendrik Brueckner do { 873212188a5SHendrik Brueckner prev = local64_read(&event->hw.prev_count); 874212188a5SHendrik Brueckner err = ecctr(event->hw.config, &new); 875212188a5SHendrik Brueckner if (err) { 876212188a5SHendrik Brueckner if (err != 3) 877212188a5SHendrik Brueckner break; 878212188a5SHendrik Brueckner /* The counter is not (yet) available. This 879212188a5SHendrik Brueckner * might happen if the counter set to which 880212188a5SHendrik Brueckner * this counter belongs is in the disabled 881212188a5SHendrik Brueckner * state. 882212188a5SHendrik Brueckner */ 883212188a5SHendrik Brueckner new = 0; 884212188a5SHendrik Brueckner } 885212188a5SHendrik Brueckner } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 886212188a5SHendrik Brueckner 887212188a5SHendrik Brueckner return err; 888212188a5SHendrik Brueckner } 889212188a5SHendrik Brueckner 890485527baSHendrik Brueckner static void hw_perf_event_update(struct perf_event *event) 891212188a5SHendrik Brueckner { 892212188a5SHendrik Brueckner u64 prev, new, delta; 893212188a5SHendrik Brueckner int err; 894212188a5SHendrik Brueckner 895212188a5SHendrik Brueckner do { 896212188a5SHendrik Brueckner prev = local64_read(&event->hw.prev_count); 897212188a5SHendrik Brueckner err = ecctr(event->hw.config, &new); 898212188a5SHendrik Brueckner if (err) 899485527baSHendrik Brueckner return; 900212188a5SHendrik Brueckner } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 901212188a5SHendrik Brueckner 902212188a5SHendrik Brueckner delta = (prev <= new) ? new - prev 903212188a5SHendrik Brueckner : (-1ULL - prev) + new + 1; /* overflow */ 904212188a5SHendrik Brueckner local64_add(delta, &event->count); 905212188a5SHendrik Brueckner } 906212188a5SHendrik Brueckner 907212188a5SHendrik Brueckner static void cpumf_pmu_read(struct perf_event *event) 908212188a5SHendrik Brueckner { 909212188a5SHendrik Brueckner if (event->hw.state & PERF_HES_STOPPED) 910212188a5SHendrik Brueckner return; 911212188a5SHendrik Brueckner 912212188a5SHendrik Brueckner hw_perf_event_update(event); 913212188a5SHendrik Brueckner } 914212188a5SHendrik Brueckner 915212188a5SHendrik Brueckner static void cpumf_pmu_start(struct perf_event *event, int flags) 916212188a5SHendrik Brueckner { 9179b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 918212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 919a029a4eaSThomas Richter int i; 920212188a5SHendrik Brueckner 92115e5b53fSThomas Richter if (!(hwc->state & PERF_HES_STOPPED)) 922212188a5SHendrik Brueckner return; 923212188a5SHendrik Brueckner 924212188a5SHendrik Brueckner hwc->state = 0; 925212188a5SHendrik Brueckner 926212188a5SHendrik Brueckner /* (Re-)enable and activate the counter set */ 927212188a5SHendrik Brueckner ctr_set_enable(&cpuhw->state, hwc->config_base); 928212188a5SHendrik Brueckner ctr_set_start(&cpuhw->state, hwc->config_base); 929212188a5SHendrik Brueckner 930212188a5SHendrik Brueckner /* The counter set to which this counter belongs can be already active. 931212188a5SHendrik Brueckner * Because all counters in a set are active, the event->hw.prev_count 932212188a5SHendrik Brueckner * needs to be synchronized. At this point, the counter set can be in 933212188a5SHendrik Brueckner * the inactive or disabled state. 934212188a5SHendrik Brueckner */ 935a029a4eaSThomas Richter if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 936a029a4eaSThomas Richter cpuhw->usedss = cfdiag_getctr(cpuhw->start, 937a029a4eaSThomas Richter sizeof(cpuhw->start), 938a029a4eaSThomas Richter hwc->config_base, true); 939a029a4eaSThomas Richter } else { 940212188a5SHendrik Brueckner hw_perf_event_reset(event); 941a029a4eaSThomas Richter } 942212188a5SHendrik Brueckner 943a029a4eaSThomas Richter /* Increment refcount for counter sets */ 944a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 945a029a4eaSThomas Richter if ((hwc->config_base & cpumf_ctr_ctl[i])) 946a029a4eaSThomas Richter atomic_inc(&cpuhw->ctr_set[i]); 947a029a4eaSThomas Richter } 948a029a4eaSThomas Richter 949a029a4eaSThomas Richter /* Create perf event sample with the counter sets as raw data. The sample 950a029a4eaSThomas Richter * is then pushed to the event subsystem and the function checks for 951a029a4eaSThomas Richter * possible event overflows. If an event overflow occurs, the PMU is 952a029a4eaSThomas Richter * stopped. 953a029a4eaSThomas Richter * 954a029a4eaSThomas Richter * Return non-zero if an event overflow occurred. 955a029a4eaSThomas Richter */ 956a029a4eaSThomas Richter static int cfdiag_push_sample(struct perf_event *event, 957a029a4eaSThomas Richter struct cpu_cf_events *cpuhw) 958a029a4eaSThomas Richter { 959a029a4eaSThomas Richter struct perf_sample_data data; 960a029a4eaSThomas Richter struct perf_raw_record raw; 961a029a4eaSThomas Richter struct pt_regs regs; 962a029a4eaSThomas Richter int overflow; 963a029a4eaSThomas Richter 964a029a4eaSThomas Richter /* Setup perf sample */ 965a029a4eaSThomas Richter perf_sample_data_init(&data, 0, event->hw.last_period); 966a029a4eaSThomas Richter memset(®s, 0, sizeof(regs)); 967a029a4eaSThomas Richter memset(&raw, 0, sizeof(raw)); 968a029a4eaSThomas Richter 969a029a4eaSThomas Richter if (event->attr.sample_type & PERF_SAMPLE_CPU) 970a029a4eaSThomas Richter data.cpu_entry.cpu = event->cpu; 971a029a4eaSThomas Richter if (event->attr.sample_type & PERF_SAMPLE_RAW) { 972a029a4eaSThomas Richter raw.frag.size = cpuhw->usedss; 973a029a4eaSThomas Richter raw.frag.data = cpuhw->stop; 9740a9081cfSNamhyung Kim perf_sample_save_raw_data(&data, &raw); 975a029a4eaSThomas Richter } 976a029a4eaSThomas Richter 977a029a4eaSThomas Richter overflow = perf_event_overflow(event, &data, ®s); 978a029a4eaSThomas Richter if (overflow) 979a029a4eaSThomas Richter event->pmu->stop(event, 0); 980a029a4eaSThomas Richter 981a029a4eaSThomas Richter perf_event_update_userpage(event); 982a029a4eaSThomas Richter return overflow; 983212188a5SHendrik Brueckner } 984212188a5SHendrik Brueckner 985212188a5SHendrik Brueckner static void cpumf_pmu_stop(struct perf_event *event, int flags) 986212188a5SHendrik Brueckner { 9879b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 988212188a5SHendrik Brueckner struct hw_perf_event *hwc = &event->hw; 989a029a4eaSThomas Richter int i; 990212188a5SHendrik Brueckner 991212188a5SHendrik Brueckner if (!(hwc->state & PERF_HES_STOPPED)) { 992212188a5SHendrik Brueckner /* Decrement reference count for this counter set and if this 993212188a5SHendrik Brueckner * is the last used counter in the set, clear activation 994212188a5SHendrik Brueckner * control and set the counter set state to inactive. 995212188a5SHendrik Brueckner */ 996a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 997a029a4eaSThomas Richter if (!(hwc->config_base & cpumf_ctr_ctl[i])) 998a029a4eaSThomas Richter continue; 999a029a4eaSThomas Richter if (!atomic_dec_return(&cpuhw->ctr_set[i])) 1000a029a4eaSThomas Richter ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 1001a029a4eaSThomas Richter } 10020cceeab5SThomas Richter hwc->state |= PERF_HES_STOPPED; 1003212188a5SHendrik Brueckner } 1004212188a5SHendrik Brueckner 1005212188a5SHendrik Brueckner if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 1006a029a4eaSThomas Richter if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 1007a029a4eaSThomas Richter local64_inc(&event->count); 1008a029a4eaSThomas Richter cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 1009a029a4eaSThomas Richter sizeof(cpuhw->stop), 1010a029a4eaSThomas Richter event->hw.config_base, 1011a029a4eaSThomas Richter false); 1012a029a4eaSThomas Richter if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 1013a029a4eaSThomas Richter cfdiag_push_sample(event, cpuhw); 10149b9cf3c7SThomas Richter } else { 1015212188a5SHendrik Brueckner hw_perf_event_update(event); 10169d48c7afSThomas Richter } 10170cceeab5SThomas Richter hwc->state |= PERF_HES_UPTODATE; 1018212188a5SHendrik Brueckner } 1019212188a5SHendrik Brueckner } 1020212188a5SHendrik Brueckner 1021212188a5SHendrik Brueckner static int cpumf_pmu_add(struct perf_event *event, int flags) 1022212188a5SHendrik Brueckner { 10239b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1024212188a5SHendrik Brueckner 1025212188a5SHendrik Brueckner ctr_set_enable(&cpuhw->state, event->hw.config_base); 1026212188a5SHendrik Brueckner event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 1027212188a5SHendrik Brueckner 1028212188a5SHendrik Brueckner if (flags & PERF_EF_START) 1029212188a5SHendrik Brueckner cpumf_pmu_start(event, PERF_EF_RELOAD); 1030212188a5SHendrik Brueckner 1031212188a5SHendrik Brueckner return 0; 1032212188a5SHendrik Brueckner } 1033212188a5SHendrik Brueckner 1034212188a5SHendrik Brueckner static void cpumf_pmu_del(struct perf_event *event, int flags) 1035212188a5SHendrik Brueckner { 10369b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1037a029a4eaSThomas Richter int i; 1038212188a5SHendrik Brueckner 1039212188a5SHendrik Brueckner cpumf_pmu_stop(event, PERF_EF_UPDATE); 1040212188a5SHendrik Brueckner 1041212188a5SHendrik Brueckner /* Check if any counter in the counter set is still used. If not used, 1042212188a5SHendrik Brueckner * change the counter set to the disabled state. This also clears the 1043212188a5SHendrik Brueckner * content of all counters in the set. 1044212188a5SHendrik Brueckner * 1045212188a5SHendrik Brueckner * When a new perf event has been added but not yet started, this can 1046212188a5SHendrik Brueckner * clear enable control and resets all counters in a set. Therefore, 1047212188a5SHendrik Brueckner * cpumf_pmu_start() always has to reenable a counter set. 1048212188a5SHendrik Brueckner */ 1049a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 1050a029a4eaSThomas Richter if (!atomic_read(&cpuhw->ctr_set[i])) 1051a029a4eaSThomas Richter ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 1052212188a5SHendrik Brueckner } 1053212188a5SHendrik Brueckner 1054212188a5SHendrik Brueckner /* Performance monitoring unit for s390x */ 1055212188a5SHendrik Brueckner static struct pmu cpumf_pmu = { 10569254e70cSHendrik Brueckner .task_ctx_nr = perf_sw_context, 10579254e70cSHendrik Brueckner .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 1058212188a5SHendrik Brueckner .pmu_enable = cpumf_pmu_enable, 1059212188a5SHendrik Brueckner .pmu_disable = cpumf_pmu_disable, 1060212188a5SHendrik Brueckner .event_init = cpumf_pmu_event_init, 1061212188a5SHendrik Brueckner .add = cpumf_pmu_add, 1062212188a5SHendrik Brueckner .del = cpumf_pmu_del, 1063212188a5SHendrik Brueckner .start = cpumf_pmu_start, 1064212188a5SHendrik Brueckner .stop = cpumf_pmu_stop, 1065212188a5SHendrik Brueckner .read = cpumf_pmu_read, 1066212188a5SHendrik Brueckner }; 1067212188a5SHendrik Brueckner 10689b9cf3c7SThomas Richter static struct cfset_session { /* CPUs and counter set bit mask */ 10699b9cf3c7SThomas Richter struct list_head head; /* Head of list of active processes */ 10709b9cf3c7SThomas Richter } cfset_session = { 10719b9cf3c7SThomas Richter .head = LIST_HEAD_INIT(cfset_session.head) 10729b9cf3c7SThomas Richter }; 10731e99c242SThomas Richter 10749b9cf3c7SThomas Richter static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ 10759b9cf3c7SThomas Richter /* 10769b9cf3c7SThomas Richter * Synchronize access to device /dev/hwc. This mutex protects against 10779b9cf3c7SThomas Richter * concurrent access to functions cfset_open() and cfset_release(). 10789b9cf3c7SThomas Richter * Same for CPU hotplug add and remove events triggering 10799b9cf3c7SThomas Richter * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). 10809b9cf3c7SThomas Richter * It also serializes concurrent device ioctl access from multiple 10819b9cf3c7SThomas Richter * processes accessing /dev/hwc. 10829b9cf3c7SThomas Richter * 10839b9cf3c7SThomas Richter * The mutex protects concurrent access to the /dev/hwctr session management 10849b9cf3c7SThomas Richter * struct cfset_session and reference counting variable cfset_opencnt. 10859b9cf3c7SThomas Richter */ 10869b9cf3c7SThomas Richter static DEFINE_MUTEX(cfset_ctrset_mutex); 10879b9cf3c7SThomas Richter 10889b9cf3c7SThomas Richter /* 10899b9cf3c7SThomas Richter * CPU hotplug handles only /dev/hwctr device. 10909b9cf3c7SThomas Richter * For perf_event_open() the CPU hotplug handling is done on kernel common 10919b9cf3c7SThomas Richter * code: 10929b9cf3c7SThomas Richter * - CPU add: Nothing is done since a file descriptor can not be created 10939b9cf3c7SThomas Richter * and returned to the user. 10949b9cf3c7SThomas Richter * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and 10959b9cf3c7SThomas Richter * pmu_delete(). The event itself is removed when the file descriptor is 10969b9cf3c7SThomas Richter * closed. 10979b9cf3c7SThomas Richter */ 10981e99c242SThomas Richter static int cfset_online_cpu(unsigned int cpu); 10999b9cf3c7SThomas Richter 11001e99c242SThomas Richter static int cpum_cf_online_cpu(unsigned int cpu) 11011e99c242SThomas Richter { 11029b9cf3c7SThomas Richter int rc = 0; 11039b9cf3c7SThomas Richter 11049b9cf3c7SThomas Richter /* 11059b9cf3c7SThomas Richter * Ignore notification for perf_event_open(). 11069b9cf3c7SThomas Richter * Handle only /dev/hwctr device sessions. 11079b9cf3c7SThomas Richter */ 11089b9cf3c7SThomas Richter mutex_lock(&cfset_ctrset_mutex); 11099b9cf3c7SThomas Richter if (refcount_read(&cfset_opencnt)) { 11109b9cf3c7SThomas Richter rc = cpum_cf_alloc_cpu(cpu); 11119b9cf3c7SThomas Richter if (!rc) 11129b9cf3c7SThomas Richter cfset_online_cpu(cpu); 11139b9cf3c7SThomas Richter } 11149b9cf3c7SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 11159b9cf3c7SThomas Richter return rc; 11161e99c242SThomas Richter } 11171e99c242SThomas Richter 11181e99c242SThomas Richter static int cfset_offline_cpu(unsigned int cpu); 11199b9cf3c7SThomas Richter 11201e99c242SThomas Richter static int cpum_cf_offline_cpu(unsigned int cpu) 11211e99c242SThomas Richter { 11229b9cf3c7SThomas Richter /* 11239b9cf3c7SThomas Richter * During task exit processing of grouped perf events triggered by CPU 11249b9cf3c7SThomas Richter * hotplug processing, pmu_disable() is called as part of perf context 11259b9cf3c7SThomas Richter * removal process. Therefore do not trigger event removal now for 11269b9cf3c7SThomas Richter * perf_event_open() created events. Perf common code triggers event 11279b9cf3c7SThomas Richter * destruction when the event file descriptor is closed. 11289b9cf3c7SThomas Richter * 11299b9cf3c7SThomas Richter * Handle only /dev/hwctr device sessions. 11309b9cf3c7SThomas Richter */ 11319b9cf3c7SThomas Richter mutex_lock(&cfset_ctrset_mutex); 11329b9cf3c7SThomas Richter if (refcount_read(&cfset_opencnt)) { 11331e99c242SThomas Richter cfset_offline_cpu(cpu); 11349b9cf3c7SThomas Richter cpum_cf_free_cpu(cpu); 11359b9cf3c7SThomas Richter } 11369b9cf3c7SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 11379b9cf3c7SThomas Richter return 0; 11381e99c242SThomas Richter } 11391e99c242SThomas Richter 11407a8f09acSThomas Richter /* Return true if store counter set multiple instruction is available */ 11417a8f09acSThomas Richter static inline int stccm_avail(void) 11427a8f09acSThomas Richter { 11437a8f09acSThomas Richter return test_facility(142); 11447a8f09acSThomas Richter } 11457a8f09acSThomas Richter 11461e99c242SThomas Richter /* CPU-measurement alerts for the counter facility */ 11471e99c242SThomas Richter static void cpumf_measurement_alert(struct ext_code ext_code, 11481e99c242SThomas Richter unsigned int alert, unsigned long unused) 11491e99c242SThomas Richter { 11501e99c242SThomas Richter struct cpu_cf_events *cpuhw; 11511e99c242SThomas Richter 11521e99c242SThomas Richter if (!(alert & CPU_MF_INT_CF_MASK)) 11531e99c242SThomas Richter return; 11541e99c242SThomas Richter 11551e99c242SThomas Richter inc_irq_stat(IRQEXT_CMC); 11561e99c242SThomas Richter 11571e99c242SThomas Richter /* 11581e99c242SThomas Richter * Measurement alerts are shared and might happen when the PMU 11591e99c242SThomas Richter * is not reserved. Ignore these alerts in this case. 11601e99c242SThomas Richter */ 11619b9cf3c7SThomas Richter cpuhw = this_cpu_cfhw(); 11629b9cf3c7SThomas Richter if (!cpuhw) 11631e99c242SThomas Richter return; 11641e99c242SThomas Richter 11651e99c242SThomas Richter /* counter authorization change alert */ 11661e99c242SThomas Richter if (alert & CPU_MF_INT_CF_CACA) 116746c4d945SThomas Richter qctri(&cpumf_ctr_info); 11681e99c242SThomas Richter 11691e99c242SThomas Richter /* loss of counter data alert */ 11701e99c242SThomas Richter if (alert & CPU_MF_INT_CF_LCDA) 11711e99c242SThomas Richter pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); 11721e99c242SThomas Richter 11731e99c242SThomas Richter /* loss of MT counter data alert */ 11741e99c242SThomas Richter if (alert & CPU_MF_INT_CF_MTDA) 11751e99c242SThomas Richter pr_warn("CPU[%i] MT counter data was lost\n", 11761e99c242SThomas Richter smp_processor_id()); 11771e99c242SThomas Richter } 11781e99c242SThomas Richter 1179a029a4eaSThomas Richter static int cfset_init(void); 1180212188a5SHendrik Brueckner static int __init cpumf_pmu_init(void) 1181212188a5SHendrik Brueckner { 1182212188a5SHendrik Brueckner int rc; 1183212188a5SHendrik Brueckner 118446c4d945SThomas Richter /* Extract counter measurement facility information */ 118546c4d945SThomas Richter if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) 1186212188a5SHendrik Brueckner return -ENODEV; 1187212188a5SHendrik Brueckner 118846c4d945SThomas Richter /* Determine and store counter set sizes for later reference */ 118946c4d945SThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 119046c4d945SThomas Richter cpum_cf_make_setsize(rc); 119146c4d945SThomas Richter 11921e99c242SThomas Richter /* 11931e99c242SThomas Richter * Clear bit 15 of cr0 to unauthorize problem-state to 11941e99c242SThomas Richter * extract measurement counters 11951e99c242SThomas Richter */ 11961e99c242SThomas Richter ctl_clear_bit(0, 48); 11971e99c242SThomas Richter 11981e99c242SThomas Richter /* register handler for measurement-alert interruptions */ 11991e99c242SThomas Richter rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, 12001e99c242SThomas Richter cpumf_measurement_alert); 12011e99c242SThomas Richter if (rc) { 12021e99c242SThomas Richter pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc); 12031e99c242SThomas Richter return rc; 12041e99c242SThomas Richter } 12051e99c242SThomas Richter 1206a029a4eaSThomas Richter /* Setup s390dbf facility */ 1207a029a4eaSThomas Richter cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 1208a029a4eaSThomas Richter if (!cf_dbg) { 1209a029a4eaSThomas Richter pr_err("Registration of s390dbf(cpum_cf) failed\n"); 12101e99c242SThomas Richter rc = -ENOMEM; 12111e99c242SThomas Richter goto out1; 12127d244643Skernel test robot } 1213a029a4eaSThomas Richter debug_register_view(cf_dbg, &debug_sprintf_view); 1214a029a4eaSThomas Richter 1215c7168325SHendrik Brueckner cpumf_pmu.attr_groups = cpumf_cf_event_group(); 12166a82e23fSThomas Richter rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 1217a029a4eaSThomas Richter if (rc) { 1218212188a5SHendrik Brueckner pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 12191e99c242SThomas Richter goto out2; 1220a029a4eaSThomas Richter } else if (stccm_avail()) { /* Setup counter set device */ 1221a029a4eaSThomas Richter cfset_init(); 1222a029a4eaSThomas Richter } 12231e99c242SThomas Richter 12241e99c242SThomas Richter rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, 12251e99c242SThomas Richter "perf/s390/cf:online", 12261e99c242SThomas Richter cpum_cf_online_cpu, cpum_cf_offline_cpu); 12271e99c242SThomas Richter return rc; 12281e99c242SThomas Richter 12291e99c242SThomas Richter out2: 12301e99c242SThomas Richter debug_unregister_view(cf_dbg, &debug_sprintf_view); 12311e99c242SThomas Richter debug_unregister(cf_dbg); 12321e99c242SThomas Richter out1: 12331e99c242SThomas Richter unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); 1234212188a5SHendrik Brueckner return rc; 1235212188a5SHendrik Brueckner } 1236a029a4eaSThomas Richter 1237a029a4eaSThomas Richter /* Support for the CPU Measurement Facility counter set extraction using 1238a029a4eaSThomas Richter * device /dev/hwctr. This allows user space programs to extract complete 1239a029a4eaSThomas Richter * counter set via normal file operations. 1240a029a4eaSThomas Richter */ 1241a029a4eaSThomas Richter 1242a029a4eaSThomas Richter struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 1243a029a4eaSThomas Richter unsigned int sets; /* Counter set bit mask */ 1244a029a4eaSThomas Richter atomic_t cpus_ack; /* # CPUs successfully executed func */ 1245a029a4eaSThomas Richter }; 1246a029a4eaSThomas Richter 124745338031SThomas Richter struct cfset_request { /* CPUs and counter set bit mask */ 1248a029a4eaSThomas Richter unsigned long ctrset; /* Bit mask of counter set to read */ 1249a029a4eaSThomas Richter cpumask_t mask; /* CPU mask to read from */ 125045338031SThomas Richter struct list_head node; /* Chain to cfset_session.head */ 125145338031SThomas Richter }; 1252a029a4eaSThomas Richter 125345338031SThomas Richter static void cfset_session_init(void) 1254a029a4eaSThomas Richter { 125545338031SThomas Richter INIT_LIST_HEAD(&cfset_session.head); 125645338031SThomas Richter } 125745338031SThomas Richter 125845338031SThomas Richter /* Remove current request from global bookkeeping. Maintain a counter set bit 125945338031SThomas Richter * mask on a per CPU basis. 126045338031SThomas Richter * Done in process context under mutex protection. 126145338031SThomas Richter */ 126245338031SThomas Richter static void cfset_session_del(struct cfset_request *p) 126345338031SThomas Richter { 126445338031SThomas Richter list_del(&p->node); 126545338031SThomas Richter } 126645338031SThomas Richter 126745338031SThomas Richter /* Add current request to global bookkeeping. Maintain a counter set bit mask 126845338031SThomas Richter * on a per CPU basis. 126945338031SThomas Richter * Done in process context under mutex protection. 127045338031SThomas Richter */ 127145338031SThomas Richter static void cfset_session_add(struct cfset_request *p) 127245338031SThomas Richter { 127345338031SThomas Richter list_add(&p->node, &cfset_session.head); 1274a029a4eaSThomas Richter } 1275a029a4eaSThomas Richter 1276a029a4eaSThomas Richter /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 1277a029a4eaSThomas Richter * path is currently used. 1278a029a4eaSThomas Richter * The cpu_cf_events::dev_state is used to denote counter sets in use by this 1279a029a4eaSThomas Richter * interface. It is always or'ed in. If this interface is not active, its 1280a029a4eaSThomas Richter * value is zero and no additional counter sets will be included. 1281a029a4eaSThomas Richter * 1282a029a4eaSThomas Richter * The cpu_cf_events::state is used by the perf_event_open SVC and remains 1283a029a4eaSThomas Richter * unchanged. 1284a029a4eaSThomas Richter * 1285a029a4eaSThomas Richter * perf_pmu_enable() and perf_pmu_enable() and its call backs 1286a029a4eaSThomas Richter * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 1287a029a4eaSThomas Richter * performance measurement subsystem to enable per process 1288a029a4eaSThomas Richter * CPU Measurement counter facility. 1289a029a4eaSThomas Richter * The XXX_enable() and XXX_disable functions are used to turn off 1290a029a4eaSThomas Richter * x86 performance monitoring interrupt (PMI) during scheduling. 1291a029a4eaSThomas Richter * s390 uses these calls to temporarily stop and resume the active CPU 1292a029a4eaSThomas Richter * counters sets during scheduling. 1293a029a4eaSThomas Richter * 1294a029a4eaSThomas Richter * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 1295a029a4eaSThomas Richter * device access. The perf_event_open() SVC interface makes a lot of effort 1296a029a4eaSThomas Richter * to only run the counters while the calling process is actively scheduled 1297a029a4eaSThomas Richter * to run. 1298a029a4eaSThomas Richter * When /dev/hwctr interface is also used at the same time, the counter sets 1299a029a4eaSThomas Richter * will keep running, even when the process is scheduled off a CPU. 1300a029a4eaSThomas Richter * However this is not a problem and does not lead to wrong counter values 1301a029a4eaSThomas Richter * for the perf_event_open() SVC. The current counter value will be recorded 1302a029a4eaSThomas Richter * during schedule-in. At schedule-out time the current counter value is 1303a029a4eaSThomas Richter * extracted again and the delta is calculated and added to the event. 1304a029a4eaSThomas Richter */ 1305a029a4eaSThomas Richter /* Stop all counter sets via ioctl interface */ 1306a029a4eaSThomas Richter static void cfset_ioctl_off(void *parm) 1307a029a4eaSThomas Richter { 13089b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1309a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1310a029a4eaSThomas Richter int rc; 1311a029a4eaSThomas Richter 13129b9cf3c7SThomas Richter /* Check if any counter set used by /dev/hwctr */ 1313a029a4eaSThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 131445338031SThomas Richter if ((p->sets & cpumf_ctr_ctl[rc])) { 131545338031SThomas Richter if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { 131645338031SThomas Richter ctr_set_disable(&cpuhw->dev_state, 131745338031SThomas Richter cpumf_ctr_ctl[rc]); 131845338031SThomas Richter ctr_set_stop(&cpuhw->dev_state, 131945338031SThomas Richter cpumf_ctr_ctl[rc]); 132045338031SThomas Richter } 132145338031SThomas Richter } 132245338031SThomas Richter /* Keep perf_event_open counter sets */ 132345338031SThomas Richter rc = lcctl(cpuhw->dev_state | cpuhw->state); 1324a029a4eaSThomas Richter if (rc) 1325a029a4eaSThomas Richter pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 1326a029a4eaSThomas Richter cpuhw->state, S390_HWCTR_DEVICE, rc); 132745338031SThomas Richter if (!cpuhw->dev_state) 1328a029a4eaSThomas Richter cpuhw->flags &= ~PMU_F_IN_USE; 1329a029a4eaSThomas Richter } 1330a029a4eaSThomas Richter 1331a029a4eaSThomas Richter /* Start counter sets on particular CPU */ 1332a029a4eaSThomas Richter static void cfset_ioctl_on(void *parm) 1333a029a4eaSThomas Richter { 13349b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1335a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1336a029a4eaSThomas Richter int rc; 1337a029a4eaSThomas Richter 1338a029a4eaSThomas Richter cpuhw->flags |= PMU_F_IN_USE; 1339a029a4eaSThomas Richter ctr_set_enable(&cpuhw->dev_state, p->sets); 1340a029a4eaSThomas Richter ctr_set_start(&cpuhw->dev_state, p->sets); 1341a029a4eaSThomas Richter for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1342a029a4eaSThomas Richter if ((p->sets & cpumf_ctr_ctl[rc])) 1343a029a4eaSThomas Richter atomic_inc(&cpuhw->ctr_set[rc]); 1344a029a4eaSThomas Richter rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 1345a029a4eaSThomas Richter if (!rc) 1346a029a4eaSThomas Richter atomic_inc(&p->cpus_ack); 1347a029a4eaSThomas Richter else 1348a029a4eaSThomas Richter pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 1349a029a4eaSThomas Richter cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 1350a029a4eaSThomas Richter } 1351a029a4eaSThomas Richter 1352a029a4eaSThomas Richter static void cfset_release_cpu(void *p) 1353a029a4eaSThomas Richter { 13549b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1355a029a4eaSThomas Richter int rc; 1356a029a4eaSThomas Richter 135745338031SThomas Richter cpuhw->dev_state = 0; 1358a029a4eaSThomas Richter rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 1359a029a4eaSThomas Richter if (rc) 1360a029a4eaSThomas Richter pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 1361a029a4eaSThomas Richter cpuhw->state, S390_HWCTR_DEVICE, rc); 136245338031SThomas Richter } 136345338031SThomas Richter 136445338031SThomas Richter /* This modifies the process CPU mask to adopt it to the currently online 136545338031SThomas Richter * CPUs. Offline CPUs can not be addresses. This call terminates the access 136645338031SThomas Richter * and is usually followed by close() or a new iotcl(..., START, ...) which 136745338031SThomas Richter * creates a new request structure. 136845338031SThomas Richter */ 136945338031SThomas Richter static void cfset_all_stop(struct cfset_request *req) 137045338031SThomas Richter { 137145338031SThomas Richter struct cfset_call_on_cpu_parm p = { 137245338031SThomas Richter .sets = req->ctrset, 137345338031SThomas Richter }; 137445338031SThomas Richter 137545338031SThomas Richter cpumask_and(&req->mask, &req->mask, cpu_online_mask); 137645338031SThomas Richter on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); 1377a029a4eaSThomas Richter } 1378a029a4eaSThomas Richter 1379a029a4eaSThomas Richter /* Release function is also called when application gets terminated without 1380a029a4eaSThomas Richter * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 1381a029a4eaSThomas Richter */ 1382a029a4eaSThomas Richter static int cfset_release(struct inode *inode, struct file *file) 1383a029a4eaSThomas Richter { 138445338031SThomas Richter mutex_lock(&cfset_ctrset_mutex); 138545338031SThomas Richter /* Open followed by close/exit has no private_data */ 138645338031SThomas Richter if (file->private_data) { 138745338031SThomas Richter cfset_all_stop(file->private_data); 138845338031SThomas Richter cfset_session_del(file->private_data); 138945338031SThomas Richter kfree(file->private_data); 139045338031SThomas Richter file->private_data = NULL; 139145338031SThomas Richter } 13929b9cf3c7SThomas Richter if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ 1393a029a4eaSThomas Richter on_each_cpu(cfset_release_cpu, NULL, 1); 13949b9cf3c7SThomas Richter cpum_cf_free(-1); 13959b9cf3c7SThomas Richter } 139645338031SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1397a029a4eaSThomas Richter return 0; 1398a029a4eaSThomas Richter } 1399a029a4eaSThomas Richter 14009b9cf3c7SThomas Richter /* 14019b9cf3c7SThomas Richter * Open via /dev/hwctr device. Allocate all per CPU resources on the first 14029b9cf3c7SThomas Richter * open of the device. The last close releases all per CPU resources. 14039b9cf3c7SThomas Richter * Parallel perf_event_open system calls also use per CPU resources. 14049b9cf3c7SThomas Richter * These invocations are handled via reference counting on the per CPU data 14059b9cf3c7SThomas Richter * structures. 14069b9cf3c7SThomas Richter */ 1407a029a4eaSThomas Richter static int cfset_open(struct inode *inode, struct file *file) 1408a029a4eaSThomas Richter { 14099b9cf3c7SThomas Richter int rc = 0; 14109b9cf3c7SThomas Richter 1411d0d3e218SThomas Richter if (!perfmon_capable()) 1412a029a4eaSThomas Richter return -EPERM; 14139b9cf3c7SThomas Richter file->private_data = NULL; 14149b9cf3c7SThomas Richter 141545338031SThomas Richter mutex_lock(&cfset_ctrset_mutex); 14169b9cf3c7SThomas Richter if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ 14179b9cf3c7SThomas Richter rc = cpum_cf_alloc(-1); 14189b9cf3c7SThomas Richter if (!rc) { 141945338031SThomas Richter cfset_session_init(); 14209b9cf3c7SThomas Richter refcount_set(&cfset_opencnt, 1); 14219b9cf3c7SThomas Richter } 14229b9cf3c7SThomas Richter } 142345338031SThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1424a029a4eaSThomas Richter 1425a029a4eaSThomas Richter /* nonseekable_open() never fails */ 14269b9cf3c7SThomas Richter return rc ?: nonseekable_open(inode, file); 1427a029a4eaSThomas Richter } 1428a029a4eaSThomas Richter 142945338031SThomas Richter static int cfset_all_start(struct cfset_request *req) 1430a029a4eaSThomas Richter { 1431a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p = { 143245338031SThomas Richter .sets = req->ctrset, 1433a029a4eaSThomas Richter .cpus_ack = ATOMIC_INIT(0), 1434a029a4eaSThomas Richter }; 1435a029a4eaSThomas Richter cpumask_var_t mask; 1436a029a4eaSThomas Richter int rc = 0; 1437a029a4eaSThomas Richter 1438a029a4eaSThomas Richter if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1439a029a4eaSThomas Richter return -ENOMEM; 144045338031SThomas Richter cpumask_and(mask, &req->mask, cpu_online_mask); 1441a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 1442a029a4eaSThomas Richter if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 1443a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 1444a029a4eaSThomas Richter rc = -EIO; 1445a029a4eaSThomas Richter } 1446a029a4eaSThomas Richter free_cpumask_var(mask); 1447a029a4eaSThomas Richter return rc; 1448a029a4eaSThomas Richter } 1449a029a4eaSThomas Richter 1450a029a4eaSThomas Richter /* Return the maximum required space for all possible CPUs in case one 1451a029a4eaSThomas Richter * CPU will be onlined during the START, READ, STOP cycles. 1452a029a4eaSThomas Richter * To find out the size of the counter sets, any one CPU will do. They 1453a029a4eaSThomas Richter * all have the same counter sets. 1454a029a4eaSThomas Richter */ 1455a029a4eaSThomas Richter static size_t cfset_needspace(unsigned int sets) 1456a029a4eaSThomas Richter { 1457a029a4eaSThomas Richter size_t bytes = 0; 1458a029a4eaSThomas Richter int i; 1459a029a4eaSThomas Richter 1460a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1461a029a4eaSThomas Richter if (!(sets & cpumf_ctr_ctl[i])) 1462a029a4eaSThomas Richter continue; 146346c4d945SThomas Richter bytes += cpum_cf_read_setsize(i) * sizeof(u64) + 1464a029a4eaSThomas Richter sizeof(((struct s390_ctrset_setdata *)0)->set) + 1465a029a4eaSThomas Richter sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 1466a029a4eaSThomas Richter } 1467a029a4eaSThomas Richter bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 1468a029a4eaSThomas Richter (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 1469a029a4eaSThomas Richter sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 1470a029a4eaSThomas Richter return bytes; 1471a029a4eaSThomas Richter } 1472a029a4eaSThomas Richter 1473a029a4eaSThomas Richter static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 1474a029a4eaSThomas Richter { 1475a029a4eaSThomas Richter struct s390_ctrset_read __user *ctrset_read; 14763cdf0269SThomas Richter unsigned int cpu, cpus, rc = 0; 1477a029a4eaSThomas Richter void __user *uptr; 1478a029a4eaSThomas Richter 1479a029a4eaSThomas Richter ctrset_read = (struct s390_ctrset_read __user *)arg; 1480a029a4eaSThomas Richter uptr = ctrset_read->data; 1481a029a4eaSThomas Richter for_each_cpu(cpu, mask) { 14829b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); 1483a029a4eaSThomas Richter struct s390_ctrset_cpudata __user *ctrset_cpudata; 1484a029a4eaSThomas Richter 1485a029a4eaSThomas Richter ctrset_cpudata = uptr; 1486a029a4eaSThomas Richter rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 1487a029a4eaSThomas Richter rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 1488a029a4eaSThomas Richter rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 1489a029a4eaSThomas Richter cpuhw->used); 14903cdf0269SThomas Richter if (rc) { 14913cdf0269SThomas Richter rc = -EFAULT; 14923cdf0269SThomas Richter goto out; 14933cdf0269SThomas Richter } 1494a029a4eaSThomas Richter uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 1495a029a4eaSThomas Richter cond_resched(); 1496a029a4eaSThomas Richter } 1497a029a4eaSThomas Richter cpus = cpumask_weight(mask); 1498a029a4eaSThomas Richter if (put_user(cpus, &ctrset_read->no_cpus)) 14993cdf0269SThomas Richter rc = -EFAULT; 15003cdf0269SThomas Richter out: 15013cdf0269SThomas Richter return rc; 1502a029a4eaSThomas Richter } 1503a029a4eaSThomas Richter 1504a029a4eaSThomas Richter static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 1505a029a4eaSThomas Richter int ctrset_size, size_t room) 1506a029a4eaSThomas Richter { 1507a029a4eaSThomas Richter size_t need = 0; 1508a029a4eaSThomas Richter int rc = -1; 1509a029a4eaSThomas Richter 1510a029a4eaSThomas Richter need = sizeof(*p) + sizeof(u64) * ctrset_size; 1511a029a4eaSThomas Richter if (need <= room) { 1512a029a4eaSThomas Richter p->set = cpumf_ctr_ctl[ctrset]; 1513a029a4eaSThomas Richter p->no_cnts = ctrset_size; 1514a029a4eaSThomas Richter rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1515a029a4eaSThomas Richter if (rc == 3) /* Nothing stored */ 1516a029a4eaSThomas Richter need = 0; 1517a029a4eaSThomas Richter } 1518a029a4eaSThomas Richter return need; 1519a029a4eaSThomas Richter } 1520a029a4eaSThomas Richter 1521a029a4eaSThomas Richter /* Read all counter sets. */ 1522a029a4eaSThomas Richter static void cfset_cpu_read(void *parm) 1523a029a4eaSThomas Richter { 15249b9cf3c7SThomas Richter struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1525a029a4eaSThomas Richter struct cfset_call_on_cpu_parm *p = parm; 1526a029a4eaSThomas Richter int set, set_size; 1527a029a4eaSThomas Richter size_t space; 1528a029a4eaSThomas Richter 1529a029a4eaSThomas Richter /* No data saved yet */ 1530a029a4eaSThomas Richter cpuhw->used = 0; 1531a029a4eaSThomas Richter cpuhw->sets = 0; 1532a029a4eaSThomas Richter memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1533a029a4eaSThomas Richter 1534a029a4eaSThomas Richter /* Scan the counter sets */ 1535a029a4eaSThomas Richter for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1536a029a4eaSThomas Richter struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1537a029a4eaSThomas Richter cpuhw->used; 1538a029a4eaSThomas Richter 1539a029a4eaSThomas Richter if (!(p->sets & cpumf_ctr_ctl[set])) 1540a029a4eaSThomas Richter continue; /* Counter set not in list */ 154146c4d945SThomas Richter set_size = cpum_cf_read_setsize(set); 1542a029a4eaSThomas Richter space = sizeof(cpuhw->data) - cpuhw->used; 1543a029a4eaSThomas Richter space = cfset_cpuset_read(sp, set, set_size, space); 1544a029a4eaSThomas Richter if (space) { 1545a029a4eaSThomas Richter cpuhw->used += space; 1546a029a4eaSThomas Richter cpuhw->sets += 1; 1547a029a4eaSThomas Richter } 1548a029a4eaSThomas Richter } 15499b9cf3c7SThomas Richter } 1550a029a4eaSThomas Richter 155145338031SThomas Richter static int cfset_all_read(unsigned long arg, struct cfset_request *req) 1552a029a4eaSThomas Richter { 1553a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 1554a029a4eaSThomas Richter cpumask_var_t mask; 1555a029a4eaSThomas Richter int rc; 1556a029a4eaSThomas Richter 1557a029a4eaSThomas Richter if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1558a029a4eaSThomas Richter return -ENOMEM; 1559a029a4eaSThomas Richter 156045338031SThomas Richter p.sets = req->ctrset; 156145338031SThomas Richter cpumask_and(mask, &req->mask, cpu_online_mask); 1562a029a4eaSThomas Richter on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1563a029a4eaSThomas Richter rc = cfset_all_copy(arg, mask); 1564a029a4eaSThomas Richter free_cpumask_var(mask); 1565a029a4eaSThomas Richter return rc; 1566a029a4eaSThomas Richter } 1567a029a4eaSThomas Richter 156845338031SThomas Richter static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) 1569a029a4eaSThomas Richter { 157045338031SThomas Richter int ret = -ENODATA; 1571a029a4eaSThomas Richter 157226300860SThomas Richter if (req && req->ctrset) 157345338031SThomas Richter ret = cfset_all_read(arg, req); 1574a029a4eaSThomas Richter return ret; 1575a029a4eaSThomas Richter } 1576a029a4eaSThomas Richter 157745338031SThomas Richter static long cfset_ioctl_stop(struct file *file) 1578a029a4eaSThomas Richter { 157945338031SThomas Richter struct cfset_request *req = file->private_data; 158045338031SThomas Richter int ret = -ENXIO; 1581a029a4eaSThomas Richter 158245338031SThomas Richter if (req) { 158345338031SThomas Richter cfset_all_stop(req); 158445338031SThomas Richter cfset_session_del(req); 158545338031SThomas Richter kfree(req); 158645338031SThomas Richter file->private_data = NULL; 158745338031SThomas Richter ret = 0; 1588a029a4eaSThomas Richter } 1589a029a4eaSThomas Richter return ret; 1590a029a4eaSThomas Richter } 1591a029a4eaSThomas Richter 159245338031SThomas Richter static long cfset_ioctl_start(unsigned long arg, struct file *file) 1593a029a4eaSThomas Richter { 1594a029a4eaSThomas Richter struct s390_ctrset_start __user *ustart; 1595a029a4eaSThomas Richter struct s390_ctrset_start start; 159645338031SThomas Richter struct cfset_request *preq; 1597a029a4eaSThomas Richter void __user *umask; 1598a029a4eaSThomas Richter unsigned int len; 1599a029a4eaSThomas Richter int ret = 0; 1600a029a4eaSThomas Richter size_t need; 1601a029a4eaSThomas Richter 160245338031SThomas Richter if (file->private_data) 1603a029a4eaSThomas Richter return -EBUSY; 1604a029a4eaSThomas Richter ustart = (struct s390_ctrset_start __user *)arg; 1605a029a4eaSThomas Richter if (copy_from_user(&start, ustart, sizeof(start))) 1606a029a4eaSThomas Richter return -EFAULT; 1607a029a4eaSThomas Richter if (start.version != S390_HWCTR_START_VERSION) 1608a029a4eaSThomas Richter return -EINVAL; 1609a029a4eaSThomas Richter if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1610a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1611a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1612a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1613a029a4eaSThomas Richter cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1614a029a4eaSThomas Richter return -EINVAL; /* Invalid counter set */ 1615a029a4eaSThomas Richter if (!start.counter_sets) 1616a029a4eaSThomas Richter return -EINVAL; /* No counter set at all? */ 161745338031SThomas Richter 161845338031SThomas Richter preq = kzalloc(sizeof(*preq), GFP_KERNEL); 161945338031SThomas Richter if (!preq) 162045338031SThomas Richter return -ENOMEM; 162145338031SThomas Richter cpumask_clear(&preq->mask); 1622a029a4eaSThomas Richter len = min_t(u64, start.cpumask_len, cpumask_size()); 1623a029a4eaSThomas Richter umask = (void __user *)start.cpumask; 162445338031SThomas Richter if (copy_from_user(&preq->mask, umask, len)) { 162545338031SThomas Richter kfree(preq); 1626a029a4eaSThomas Richter return -EFAULT; 162745338031SThomas Richter } 162845338031SThomas Richter if (cpumask_empty(&preq->mask)) { 162945338031SThomas Richter kfree(preq); 1630a029a4eaSThomas Richter return -EINVAL; 163145338031SThomas Richter } 1632a029a4eaSThomas Richter need = cfset_needspace(start.counter_sets); 163345338031SThomas Richter if (put_user(need, &ustart->data_bytes)) { 163445338031SThomas Richter kfree(preq); 163545338031SThomas Richter return -EFAULT; 163645338031SThomas Richter } 163745338031SThomas Richter preq->ctrset = start.counter_sets; 163845338031SThomas Richter ret = cfset_all_start(preq); 163945338031SThomas Richter if (!ret) { 164045338031SThomas Richter cfset_session_add(preq); 164145338031SThomas Richter file->private_data = preq; 164245338031SThomas Richter } else { 164345338031SThomas Richter kfree(preq); 164445338031SThomas Richter } 1645a029a4eaSThomas Richter return ret; 1646a029a4eaSThomas Richter } 1647a029a4eaSThomas Richter 1648a029a4eaSThomas Richter /* Entry point to the /dev/hwctr device interface. 1649a029a4eaSThomas Richter * The ioctl system call supports three subcommands: 1650a029a4eaSThomas Richter * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1651a029a4eaSThomas Richter * counter set keeps running until explicitly stopped. Returns the number 1652a029a4eaSThomas Richter * of bytes needed to store the counter values. If another S390_HWCTR_START 1653a029a4eaSThomas Richter * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 165445338031SThomas Richter * command on the same file descriptor, -EBUSY is returned. 1655a029a4eaSThomas Richter * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1656a029a4eaSThomas Richter * with the S390_HWCTR_START command. 1657a029a4eaSThomas Richter * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1658a029a4eaSThomas Richter * previous S390_HWCTR_START subcommand. 1659a029a4eaSThomas Richter */ 1660a029a4eaSThomas Richter static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1661a029a4eaSThomas Richter { 1662a029a4eaSThomas Richter int ret; 1663a029a4eaSThomas Richter 1664a73de293SSebastian Andrzej Siewior cpus_read_lock(); 1665a029a4eaSThomas Richter mutex_lock(&cfset_ctrset_mutex); 1666a029a4eaSThomas Richter switch (cmd) { 1667a029a4eaSThomas Richter case S390_HWCTR_START: 166845338031SThomas Richter ret = cfset_ioctl_start(arg, file); 1669a029a4eaSThomas Richter break; 1670a029a4eaSThomas Richter case S390_HWCTR_STOP: 167145338031SThomas Richter ret = cfset_ioctl_stop(file); 1672a029a4eaSThomas Richter break; 1673a029a4eaSThomas Richter case S390_HWCTR_READ: 167445338031SThomas Richter ret = cfset_ioctl_read(arg, file->private_data); 1675a029a4eaSThomas Richter break; 1676a029a4eaSThomas Richter default: 1677a029a4eaSThomas Richter ret = -ENOTTY; 1678a029a4eaSThomas Richter break; 1679a029a4eaSThomas Richter } 1680a029a4eaSThomas Richter mutex_unlock(&cfset_ctrset_mutex); 1681a73de293SSebastian Andrzej Siewior cpus_read_unlock(); 1682a029a4eaSThomas Richter return ret; 1683a029a4eaSThomas Richter } 1684a029a4eaSThomas Richter 1685a029a4eaSThomas Richter static const struct file_operations cfset_fops = { 1686a029a4eaSThomas Richter .owner = THIS_MODULE, 1687a029a4eaSThomas Richter .open = cfset_open, 1688a029a4eaSThomas Richter .release = cfset_release, 1689a029a4eaSThomas Richter .unlocked_ioctl = cfset_ioctl, 1690a029a4eaSThomas Richter .compat_ioctl = cfset_ioctl, 1691a029a4eaSThomas Richter .llseek = no_llseek 1692a029a4eaSThomas Richter }; 1693a029a4eaSThomas Richter 1694a029a4eaSThomas Richter static struct miscdevice cfset_dev = { 1695a029a4eaSThomas Richter .name = S390_HWCTR_DEVICE, 1696a029a4eaSThomas Richter .minor = MISC_DYNAMIC_MINOR, 1697a029a4eaSThomas Richter .fops = &cfset_fops, 1698d0d3e218SThomas Richter .mode = 0666, 1699a029a4eaSThomas Richter }; 1700a029a4eaSThomas Richter 170145338031SThomas Richter /* Hotplug add of a CPU. Scan through all active processes and add 170245338031SThomas Richter * that CPU to the list of CPUs supplied with ioctl(..., START, ...). 170345338031SThomas Richter */ 17041e99c242SThomas Richter static int cfset_online_cpu(unsigned int cpu) 1705a029a4eaSThomas Richter { 1706a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 170745338031SThomas Richter struct cfset_request *rp; 1708a029a4eaSThomas Richter 170945338031SThomas Richter if (!list_empty(&cfset_session.head)) { 171045338031SThomas Richter list_for_each_entry(rp, &cfset_session.head, node) { 171145338031SThomas Richter p.sets = rp->ctrset; 1712a029a4eaSThomas Richter cfset_ioctl_on(&p); 171345338031SThomas Richter cpumask_set_cpu(cpu, &rp->mask); 171445338031SThomas Richter } 1715a029a4eaSThomas Richter } 1716a029a4eaSThomas Richter return 0; 1717a029a4eaSThomas Richter } 1718a029a4eaSThomas Richter 171945338031SThomas Richter /* Hotplug remove of a CPU. Scan through all active processes and clear 172045338031SThomas Richter * that CPU from the list of CPUs supplied with ioctl(..., START, ...). 17219b9cf3c7SThomas Richter * Adjust reference counts. 172245338031SThomas Richter */ 17231e99c242SThomas Richter static int cfset_offline_cpu(unsigned int cpu) 1724a029a4eaSThomas Richter { 1725a029a4eaSThomas Richter struct cfset_call_on_cpu_parm p; 172645338031SThomas Richter struct cfset_request *rp; 1727a029a4eaSThomas Richter 172845338031SThomas Richter if (!list_empty(&cfset_session.head)) { 172945338031SThomas Richter list_for_each_entry(rp, &cfset_session.head, node) { 173045338031SThomas Richter p.sets = rp->ctrset; 1731a029a4eaSThomas Richter cfset_ioctl_off(&p); 173245338031SThomas Richter cpumask_clear_cpu(cpu, &rp->mask); 173345338031SThomas Richter } 1734a029a4eaSThomas Richter } 1735a029a4eaSThomas Richter return 0; 1736a029a4eaSThomas Richter } 1737a029a4eaSThomas Richter 1738a029a4eaSThomas Richter static void cfdiag_read(struct perf_event *event) 1739a029a4eaSThomas Richter { 1740a029a4eaSThomas Richter } 1741a029a4eaSThomas Richter 1742a029a4eaSThomas Richter static int get_authctrsets(void) 1743a029a4eaSThomas Richter { 1744a029a4eaSThomas Richter unsigned long auth = 0; 1745a029a4eaSThomas Richter enum cpumf_ctr_set i; 1746a029a4eaSThomas Richter 1747a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 174846c4d945SThomas Richter if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) 1749a029a4eaSThomas Richter auth |= cpumf_ctr_ctl[i]; 1750a029a4eaSThomas Richter } 1751a029a4eaSThomas Richter return auth; 1752a029a4eaSThomas Richter } 1753a029a4eaSThomas Richter 1754a029a4eaSThomas Richter /* Setup the event. Test for authorized counter sets and only include counter 1755a029a4eaSThomas Richter * sets which are authorized at the time of the setup. Including unauthorized 1756a029a4eaSThomas Richter * counter sets result in specification exception (and panic). 1757a029a4eaSThomas Richter */ 1758a029a4eaSThomas Richter static int cfdiag_event_init2(struct perf_event *event) 1759a029a4eaSThomas Richter { 1760a029a4eaSThomas Richter struct perf_event_attr *attr = &event->attr; 1761a029a4eaSThomas Richter int err = 0; 1762a029a4eaSThomas Richter 1763a029a4eaSThomas Richter /* Set sample_period to indicate sampling */ 1764a029a4eaSThomas Richter event->hw.config = attr->config; 1765a029a4eaSThomas Richter event->hw.sample_period = attr->sample_period; 1766a029a4eaSThomas Richter local64_set(&event->hw.period_left, event->hw.sample_period); 1767a029a4eaSThomas Richter local64_set(&event->count, 0); 1768a029a4eaSThomas Richter event->hw.last_period = event->hw.sample_period; 1769a029a4eaSThomas Richter 1770a029a4eaSThomas Richter /* Add all authorized counter sets to config_base. The 1771a029a4eaSThomas Richter * the hardware init function is either called per-cpu or just once 1772a029a4eaSThomas Richter * for all CPUS (event->cpu == -1). This depends on the whether 1773a029a4eaSThomas Richter * counting is started for all CPUs or on a per workload base where 1774a029a4eaSThomas Richter * the perf event moves from one CPU to another CPU. 1775a029a4eaSThomas Richter * Checking the authorization on any CPU is fine as the hardware 1776a029a4eaSThomas Richter * applies the same authorization settings to all CPUs. 1777a029a4eaSThomas Richter */ 1778a029a4eaSThomas Richter event->hw.config_base = get_authctrsets(); 1779a029a4eaSThomas Richter 1780a029a4eaSThomas Richter /* No authorized counter sets, nothing to count/sample */ 1781a029a4eaSThomas Richter if (!event->hw.config_base) 1782a029a4eaSThomas Richter err = -EINVAL; 1783a029a4eaSThomas Richter 1784a029a4eaSThomas Richter return err; 1785a029a4eaSThomas Richter } 1786a029a4eaSThomas Richter 1787a029a4eaSThomas Richter static int cfdiag_event_init(struct perf_event *event) 1788a029a4eaSThomas Richter { 1789a029a4eaSThomas Richter struct perf_event_attr *attr = &event->attr; 1790a029a4eaSThomas Richter int err = -ENOENT; 1791a029a4eaSThomas Richter 1792a029a4eaSThomas Richter if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1793a029a4eaSThomas Richter event->attr.type != event->pmu->type) 1794a029a4eaSThomas Richter goto out; 1795a029a4eaSThomas Richter 1796a029a4eaSThomas Richter /* Raw events are used to access counters directly, 1797a029a4eaSThomas Richter * hence do not permit excludes. 1798a029a4eaSThomas Richter * This event is useless without PERF_SAMPLE_RAW to return counter set 1799a029a4eaSThomas Richter * values as raw data. 1800a029a4eaSThomas Richter */ 1801a029a4eaSThomas Richter if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1802a029a4eaSThomas Richter !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1803a029a4eaSThomas Richter err = -EOPNOTSUPP; 1804a029a4eaSThomas Richter goto out; 1805a029a4eaSThomas Richter } 1806a029a4eaSThomas Richter 1807a029a4eaSThomas Richter /* Initialize for using the CPU-measurement counter facility */ 18089b9cf3c7SThomas Richter if (cpum_cf_alloc(event->cpu)) 18099b9cf3c7SThomas Richter return -ENOMEM; 1810a029a4eaSThomas Richter event->destroy = hw_perf_event_destroy; 1811a029a4eaSThomas Richter 1812a029a4eaSThomas Richter err = cfdiag_event_init2(event); 1813a029a4eaSThomas Richter if (unlikely(err)) 1814a029a4eaSThomas Richter event->destroy(event); 1815a029a4eaSThomas Richter out: 1816a029a4eaSThomas Richter return err; 1817a029a4eaSThomas Richter } 1818a029a4eaSThomas Richter 1819a029a4eaSThomas Richter /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1820a029a4eaSThomas Richter * to collect the complete counter sets for a scheduled process. Target 1821a029a4eaSThomas Richter * are complete counter sets attached as raw data to the artificial event. 1822a029a4eaSThomas Richter * This results in complete counter sets available when a process is 1823a029a4eaSThomas Richter * scheduled. Contains the delta of every counter while the process was 1824a029a4eaSThomas Richter * running. 1825a029a4eaSThomas Richter */ 1826a029a4eaSThomas Richter CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1827a029a4eaSThomas Richter 1828a029a4eaSThomas Richter static struct attribute *cfdiag_events_attr[] = { 1829a029a4eaSThomas Richter CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1830a029a4eaSThomas Richter NULL, 1831a029a4eaSThomas Richter }; 1832a029a4eaSThomas Richter 1833a029a4eaSThomas Richter PMU_FORMAT_ATTR(event, "config:0-63"); 1834a029a4eaSThomas Richter 1835a029a4eaSThomas Richter static struct attribute *cfdiag_format_attr[] = { 1836a029a4eaSThomas Richter &format_attr_event.attr, 1837a029a4eaSThomas Richter NULL, 1838a029a4eaSThomas Richter }; 1839a029a4eaSThomas Richter 1840a029a4eaSThomas Richter static struct attribute_group cfdiag_events_group = { 1841a029a4eaSThomas Richter .name = "events", 1842a029a4eaSThomas Richter .attrs = cfdiag_events_attr, 1843a029a4eaSThomas Richter }; 1844a029a4eaSThomas Richter static struct attribute_group cfdiag_format_group = { 1845a029a4eaSThomas Richter .name = "format", 1846a029a4eaSThomas Richter .attrs = cfdiag_format_attr, 1847a029a4eaSThomas Richter }; 1848a029a4eaSThomas Richter static const struct attribute_group *cfdiag_attr_groups[] = { 1849a029a4eaSThomas Richter &cfdiag_events_group, 1850a029a4eaSThomas Richter &cfdiag_format_group, 1851a029a4eaSThomas Richter NULL, 1852a029a4eaSThomas Richter }; 1853a029a4eaSThomas Richter 1854a029a4eaSThomas Richter /* Performance monitoring unit for event CF_DIAG. Since this event 1855a029a4eaSThomas Richter * is also started and stopped via the perf_event_open() system call, use 1856a029a4eaSThomas Richter * the same event enable/disable call back functions. They do not 1857a029a4eaSThomas Richter * have a pointer to the perf_event strcture as first parameter. 1858a029a4eaSThomas Richter * 1859a029a4eaSThomas Richter * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1860a029a4eaSThomas Richter * Reuse them and distinguish the event (always first parameter) via 1861a029a4eaSThomas Richter * 'config' member. 1862a029a4eaSThomas Richter */ 1863a029a4eaSThomas Richter static struct pmu cf_diag = { 1864a029a4eaSThomas Richter .task_ctx_nr = perf_sw_context, 1865a029a4eaSThomas Richter .event_init = cfdiag_event_init, 1866a029a4eaSThomas Richter .pmu_enable = cpumf_pmu_enable, 1867a029a4eaSThomas Richter .pmu_disable = cpumf_pmu_disable, 1868a029a4eaSThomas Richter .add = cpumf_pmu_add, 1869a029a4eaSThomas Richter .del = cpumf_pmu_del, 1870a029a4eaSThomas Richter .start = cpumf_pmu_start, 1871a029a4eaSThomas Richter .stop = cpumf_pmu_stop, 1872a029a4eaSThomas Richter .read = cfdiag_read, 1873a029a4eaSThomas Richter 1874a029a4eaSThomas Richter .attr_groups = cfdiag_attr_groups 1875a029a4eaSThomas Richter }; 1876a029a4eaSThomas Richter 1877a029a4eaSThomas Richter /* Calculate memory needed to store all counter sets together with header and 1878a029a4eaSThomas Richter * trailer data. This is independent of the counter set authorization which 1879a029a4eaSThomas Richter * can vary depending on the configuration. 1880a029a4eaSThomas Richter */ 1881a029a4eaSThomas Richter static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1882a029a4eaSThomas Richter { 1883a029a4eaSThomas Richter size_t max_size = sizeof(struct cf_trailer_entry); 1884a029a4eaSThomas Richter enum cpumf_ctr_set i; 1885a029a4eaSThomas Richter 1886a029a4eaSThomas Richter for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 188746c4d945SThomas Richter size_t size = cpum_cf_read_setsize(i); 1888a029a4eaSThomas Richter 1889a029a4eaSThomas Richter if (size) 1890a029a4eaSThomas Richter max_size += size * sizeof(u64) + 1891a029a4eaSThomas Richter sizeof(struct cf_ctrset_entry); 1892a029a4eaSThomas Richter } 1893a029a4eaSThomas Richter return max_size; 1894a029a4eaSThomas Richter } 1895a029a4eaSThomas Richter 1896a029a4eaSThomas Richter /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1897a029a4eaSThomas Richter static void cfdiag_get_cpu_speed(void) 1898a029a4eaSThomas Richter { 18994efd417fSVasily Gorbik unsigned long mhz; 19004efd417fSVasily Gorbik 1901a029a4eaSThomas Richter if (cpum_sf_avail()) { /* Sampling facility first */ 1902a029a4eaSThomas Richter struct hws_qsi_info_block si; 1903a029a4eaSThomas Richter 1904a029a4eaSThomas Richter memset(&si, 0, sizeof(si)); 1905a029a4eaSThomas Richter if (!qsi(&si)) { 1906a029a4eaSThomas Richter cfdiag_cpu_speed = si.cpu_speed; 1907a029a4eaSThomas Richter return; 1908a029a4eaSThomas Richter } 1909a029a4eaSThomas Richter } 1910a029a4eaSThomas Richter 1911a029a4eaSThomas Richter /* Fallback: CPU speed extract static part. Used in case 1912a029a4eaSThomas Richter * CPU Measurement Sampling Facility is turned off. 1913a029a4eaSThomas Richter */ 19144efd417fSVasily Gorbik mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1915a029a4eaSThomas Richter if (mhz != -1UL) 1916a029a4eaSThomas Richter cfdiag_cpu_speed = mhz & 0xffffffff; 1917a029a4eaSThomas Richter } 1918a029a4eaSThomas Richter 1919a029a4eaSThomas Richter static int cfset_init(void) 1920a029a4eaSThomas Richter { 1921a029a4eaSThomas Richter size_t need; 1922a029a4eaSThomas Richter int rc; 1923a029a4eaSThomas Richter 1924a029a4eaSThomas Richter cfdiag_get_cpu_speed(); 1925a029a4eaSThomas Richter /* Make sure the counter set data fits into predefined buffer. */ 192646c4d945SThomas Richter need = cfdiag_maxsize(&cpumf_ctr_info); 1927a029a4eaSThomas Richter if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1928a029a4eaSThomas Richter pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1929a029a4eaSThomas Richter need); 1930a029a4eaSThomas Richter return -ENOMEM; 1931a029a4eaSThomas Richter } 1932a029a4eaSThomas Richter 1933a029a4eaSThomas Richter rc = misc_register(&cfset_dev); 1934a029a4eaSThomas Richter if (rc) { 1935a029a4eaSThomas Richter pr_err("Registration of /dev/%s failed rc=%i\n", 1936a029a4eaSThomas Richter cfset_dev.name, rc); 1937a029a4eaSThomas Richter goto out; 1938a029a4eaSThomas Richter } 1939a029a4eaSThomas Richter 1940a029a4eaSThomas Richter rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1941a029a4eaSThomas Richter if (rc) { 1942a029a4eaSThomas Richter misc_deregister(&cfset_dev); 1943a029a4eaSThomas Richter pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1944a029a4eaSThomas Richter rc); 1945a029a4eaSThomas Richter } 1946a029a4eaSThomas Richter out: 1947a029a4eaSThomas Richter return rc; 1948a029a4eaSThomas Richter } 1949a029a4eaSThomas Richter 1950a029a4eaSThomas Richter device_initcall(cpumf_pmu_init); 1951