1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support for s390x - CPU-measurement Counter Facility 4 * 5 * Copyright IBM Corp. 2012, 2021 6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 * Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define KMSG_COMPONENT "cpum_cf" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/percpu.h> 15 #include <linux/notifier.h> 16 #include <linux/init.h> 17 #include <linux/export.h> 18 #include <linux/miscdevice.h> 19 20 #include <asm/cpu_mcf.h> 21 #include <asm/hwctrset.h> 22 #include <asm/debug.h> 23 24 static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 25 static debug_info_t *cf_dbg; 26 27 #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 28 /* interval in seconds */ 29 30 /* Counter sets are stored as data stream in a page sized memory buffer and 31 * exported to user space via raw data attached to the event sample data. 32 * Each counter set starts with an eight byte header consisting of: 33 * - a two byte eye catcher (0xfeef) 34 * - a one byte counter set number 35 * - a two byte counter set size (indicates the number of counters in this set) 36 * - a three byte reserved value (must be zero) to make the header the same 37 * size as a counter value. 38 * All counter values are eight byte in size. 39 * 40 * All counter sets are followed by a 64 byte trailer. 41 * The trailer consists of a: 42 * - flag field indicating valid fields when corresponding bit set 43 * - the counter facility first and second version number 44 * - the CPU speed if nonzero 45 * - the time stamp the counter sets have been collected 46 * - the time of day (TOD) base value 47 * - the machine type. 48 * 49 * The counter sets are saved when the process is prepared to be executed on a 50 * CPU and saved again when the process is going to be removed from a CPU. 51 * The difference of both counter sets are calculated and stored in the event 52 * sample data area. 53 */ 54 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 55 unsigned int def:16; /* 0-15 Data Entry Format */ 56 unsigned int set:16; /* 16-31 Counter set identifier */ 57 unsigned int ctr:16; /* 32-47 Number of stored counters */ 58 unsigned int res1:16; /* 48-63 Reserved */ 59 }; 60 61 struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 62 /* 0 - 7 */ 63 union { 64 struct { 65 unsigned int clock_base:1; /* TOD clock base set */ 66 unsigned int speed:1; /* CPU speed set */ 67 /* Measurement alerts */ 68 unsigned int mtda:1; /* Loss of MT ctr. data alert */ 69 unsigned int caca:1; /* Counter auth. change alert */ 70 unsigned int lcda:1; /* Loss of counter data alert */ 71 }; 72 unsigned long flags; /* 0-63 All indicators */ 73 }; 74 /* 8 - 15 */ 75 unsigned int cfvn:16; /* 64-79 Ctr First Version */ 76 unsigned int csvn:16; /* 80-95 Ctr Second Version */ 77 unsigned int cpu_speed:32; /* 96-127 CPU speed */ 78 /* 16 - 23 */ 79 unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 80 /* 24 - 55 */ 81 union { 82 struct { 83 unsigned long progusage1; 84 unsigned long progusage2; 85 unsigned long progusage3; 86 unsigned long tod_base; 87 }; 88 unsigned long progusage[4]; 89 }; 90 /* 56 - 63 */ 91 unsigned int mach_type:16; /* Machine type */ 92 unsigned int res1:16; /* Reserved */ 93 unsigned int res2:32; /* Reserved */ 94 }; 95 96 /* Create the trailer data at the end of a page. */ 97 static void cfdiag_trailer(struct cf_trailer_entry *te) 98 { 99 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 100 struct cpuid cpuid; 101 102 te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ 103 te->csvn = cpuhw->info.csvn; 104 105 get_cpu_id(&cpuid); /* Machine type */ 106 te->mach_type = cpuid.machine; 107 te->cpu_speed = cfdiag_cpu_speed; 108 if (te->cpu_speed) 109 te->speed = 1; 110 te->clock_base = 1; /* Save clock base */ 111 te->tod_base = tod_clock_base.tod; 112 te->timestamp = get_tod_clock_fast(); 113 } 114 115 /* Read a counter set. The counter set number determines the counter set and 116 * the CPUM-CF first and second version number determine the number of 117 * available counters in each counter set. 118 * Each counter set starts with header containing the counter set number and 119 * the number of eight byte counters. 120 * 121 * The functions returns the number of bytes occupied by this counter set 122 * including the header. 123 * If there is no counter in the counter set, this counter set is useless and 124 * zero is returned on this case. 125 * 126 * Note that the counter sets may not be enabled or active and the stcctm 127 * instruction might return error 3. Depending on error_ok value this is ok, 128 * for example when called from cpumf_pmu_start() call back function. 129 */ 130 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 131 size_t room, bool error_ok) 132 { 133 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 134 size_t ctrset_size, need = 0; 135 int rc = 3; /* Assume write failure */ 136 137 ctrdata->def = CF_DIAG_CTRSET_DEF; 138 ctrdata->set = ctrset; 139 ctrdata->res1 = 0; 140 ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); 141 142 if (ctrset_size) { /* Save data */ 143 need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 144 if (need <= room) { 145 rc = ctr_stcctm(ctrset, ctrset_size, 146 (u64 *)(ctrdata + 1)); 147 } 148 if (rc != 3 || error_ok) 149 ctrdata->ctr = ctrset_size; 150 else 151 need = 0; 152 } 153 154 debug_sprintf_event(cf_dbg, 3, 155 "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" 156 " need %zd rc %d\n", __func__, ctrset, ctrset_size, 157 cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); 158 return need; 159 } 160 161 /* Read out all counter sets and save them in the provided data buffer. 162 * The last 64 byte host an artificial trailer entry. 163 */ 164 static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 165 bool error_ok) 166 { 167 struct cf_trailer_entry *trailer; 168 size_t offset = 0, done; 169 int i; 170 171 memset(data, 0, sz); 172 sz -= sizeof(*trailer); /* Always room for trailer */ 173 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 174 struct cf_ctrset_entry *ctrdata = data + offset; 175 176 if (!(auth & cpumf_ctr_ctl[i])) 177 continue; /* Counter set not authorized */ 178 179 done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 180 offset += done; 181 } 182 trailer = data + offset; 183 cfdiag_trailer(trailer); 184 return offset + sizeof(*trailer); 185 } 186 187 /* Calculate the difference for each counter in a counter set. */ 188 static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 189 { 190 for (; --counters >= 0; ++pstart, ++pstop) 191 if (*pstop >= *pstart) 192 *pstop -= *pstart; 193 else 194 *pstop = *pstart - *pstop + 1; 195 } 196 197 /* Scan the counter sets and calculate the difference of each counter 198 * in each set. The result is the increment of each counter during the 199 * period the counter set has been activated. 200 * 201 * Return true on success. 202 */ 203 static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 204 { 205 struct cf_trailer_entry *trailer_start, *trailer_stop; 206 struct cf_ctrset_entry *ctrstart, *ctrstop; 207 size_t offset = 0; 208 209 auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 210 do { 211 ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 212 ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 213 214 if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 215 pr_err_once("cpum_cf_diag counter set compare error " 216 "in set %i\n", ctrstart->set); 217 return 0; 218 } 219 auth &= ~cpumf_ctr_ctl[ctrstart->set]; 220 if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 221 cfdiag_diffctrset((u64 *)(ctrstart + 1), 222 (u64 *)(ctrstop + 1), ctrstart->ctr); 223 offset += ctrstart->ctr * sizeof(u64) + 224 sizeof(*ctrstart); 225 } 226 } while (ctrstart->def && auth); 227 228 /* Save time_stamp from start of event in stop's trailer */ 229 trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 230 trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 231 trailer_stop->progusage[0] = trailer_start->timestamp; 232 233 return 1; 234 } 235 236 static enum cpumf_ctr_set get_counter_set(u64 event) 237 { 238 int set = CPUMF_CTR_SET_MAX; 239 240 if (event < 32) 241 set = CPUMF_CTR_SET_BASIC; 242 else if (event < 64) 243 set = CPUMF_CTR_SET_USER; 244 else if (event < 128) 245 set = CPUMF_CTR_SET_CRYPTO; 246 else if (event < 288) 247 set = CPUMF_CTR_SET_EXT; 248 else if (event >= 448 && event < 496) 249 set = CPUMF_CTR_SET_MT_DIAG; 250 251 return set; 252 } 253 254 static int validate_ctr_version(const struct hw_perf_event *hwc, 255 enum cpumf_ctr_set set) 256 { 257 struct cpu_cf_events *cpuhw; 258 int err = 0; 259 u16 mtdiag_ctl; 260 261 cpuhw = &get_cpu_var(cpu_cf_events); 262 263 /* check required version for counter sets */ 264 switch (set) { 265 case CPUMF_CTR_SET_BASIC: 266 case CPUMF_CTR_SET_USER: 267 if (cpuhw->info.cfvn < 1) 268 err = -EOPNOTSUPP; 269 break; 270 case CPUMF_CTR_SET_CRYPTO: 271 if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 && 272 hwc->config > 79) || 273 (cpuhw->info.csvn >= 6 && hwc->config > 83)) 274 err = -EOPNOTSUPP; 275 break; 276 case CPUMF_CTR_SET_EXT: 277 if (cpuhw->info.csvn < 1) 278 err = -EOPNOTSUPP; 279 if ((cpuhw->info.csvn == 1 && hwc->config > 159) || 280 (cpuhw->info.csvn == 2 && hwc->config > 175) || 281 (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5 282 && hwc->config > 255) || 283 (cpuhw->info.csvn >= 6 && hwc->config > 287)) 284 err = -EOPNOTSUPP; 285 break; 286 case CPUMF_CTR_SET_MT_DIAG: 287 if (cpuhw->info.csvn <= 3) 288 err = -EOPNOTSUPP; 289 /* 290 * MT-diagnostic counters are read-only. The counter set 291 * is automatically enabled and activated on all CPUs with 292 * multithreading (SMT). Deactivation of multithreading 293 * also disables the counter set. State changes are ignored 294 * by lcctl(). Because Linux controls SMT enablement through 295 * a kernel parameter only, the counter set is either disabled 296 * or enabled and active. 297 * 298 * Thus, the counters can only be used if SMT is on and the 299 * counter set is enabled and active. 300 */ 301 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 302 if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && 303 (cpuhw->info.enable_ctl & mtdiag_ctl) && 304 (cpuhw->info.act_ctl & mtdiag_ctl))) 305 err = -EOPNOTSUPP; 306 break; 307 case CPUMF_CTR_SET_MAX: 308 err = -EOPNOTSUPP; 309 } 310 311 put_cpu_var(cpu_cf_events); 312 return err; 313 } 314 315 static int validate_ctr_auth(const struct hw_perf_event *hwc) 316 { 317 struct cpu_cf_events *cpuhw; 318 int err = 0; 319 320 cpuhw = &get_cpu_var(cpu_cf_events); 321 322 /* Check authorization for cpu counter sets. 323 * If the particular CPU counter set is not authorized, 324 * return with -ENOENT in order to fall back to other 325 * PMUs that might suffice the event request. 326 */ 327 if (!(hwc->config_base & cpuhw->info.auth_ctl)) 328 err = -ENOENT; 329 330 put_cpu_var(cpu_cf_events); 331 return err; 332 } 333 334 /* 335 * Change the CPUMF state to active. 336 * Enable and activate the CPU-counter sets according 337 * to the per-cpu control state. 338 */ 339 static void cpumf_pmu_enable(struct pmu *pmu) 340 { 341 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 342 int err; 343 344 if (cpuhw->flags & PMU_F_ENABLED) 345 return; 346 347 err = lcctl(cpuhw->state | cpuhw->dev_state); 348 if (err) { 349 pr_err("Enabling the performance measuring unit " 350 "failed with rc=%x\n", err); 351 return; 352 } 353 354 cpuhw->flags |= PMU_F_ENABLED; 355 } 356 357 /* 358 * Change the CPUMF state to inactive. 359 * Disable and enable (inactive) the CPU-counter sets according 360 * to the per-cpu control state. 361 */ 362 static void cpumf_pmu_disable(struct pmu *pmu) 363 { 364 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 365 int err; 366 u64 inactive; 367 368 if (!(cpuhw->flags & PMU_F_ENABLED)) 369 return; 370 371 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 372 inactive |= cpuhw->dev_state; 373 err = lcctl(inactive); 374 if (err) { 375 pr_err("Disabling the performance measuring unit " 376 "failed with rc=%x\n", err); 377 return; 378 } 379 380 cpuhw->flags &= ~PMU_F_ENABLED; 381 } 382 383 384 /* Number of perf events counting hardware events */ 385 static atomic_t num_events = ATOMIC_INIT(0); 386 /* Used to avoid races in calling reserve/release_cpumf_hardware */ 387 static DEFINE_MUTEX(pmc_reserve_mutex); 388 389 /* Release the PMU if event is the last perf event */ 390 static void hw_perf_event_destroy(struct perf_event *event) 391 { 392 if (!atomic_add_unless(&num_events, -1, 1)) { 393 mutex_lock(&pmc_reserve_mutex); 394 if (atomic_dec_return(&num_events) == 0) 395 __kernel_cpumcf_end(); 396 mutex_unlock(&pmc_reserve_mutex); 397 } 398 } 399 400 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 401 static const int cpumf_generic_events_basic[] = { 402 [PERF_COUNT_HW_CPU_CYCLES] = 0, 403 [PERF_COUNT_HW_INSTRUCTIONS] = 1, 404 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 405 [PERF_COUNT_HW_CACHE_MISSES] = -1, 406 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 407 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 408 [PERF_COUNT_HW_BUS_CYCLES] = -1, 409 }; 410 /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 411 static const int cpumf_generic_events_user[] = { 412 [PERF_COUNT_HW_CPU_CYCLES] = 32, 413 [PERF_COUNT_HW_INSTRUCTIONS] = 33, 414 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 415 [PERF_COUNT_HW_CACHE_MISSES] = -1, 416 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 417 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 418 [PERF_COUNT_HW_BUS_CYCLES] = -1, 419 }; 420 421 static void cpumf_hw_inuse(void) 422 { 423 mutex_lock(&pmc_reserve_mutex); 424 if (atomic_inc_return(&num_events) == 1) 425 __kernel_cpumcf_begin(); 426 mutex_unlock(&pmc_reserve_mutex); 427 } 428 429 static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 430 { 431 struct perf_event_attr *attr = &event->attr; 432 struct hw_perf_event *hwc = &event->hw; 433 enum cpumf_ctr_set set; 434 int err = 0; 435 u64 ev; 436 437 switch (type) { 438 case PERF_TYPE_RAW: 439 /* Raw events are used to access counters directly, 440 * hence do not permit excludes */ 441 if (attr->exclude_kernel || attr->exclude_user || 442 attr->exclude_hv) 443 return -EOPNOTSUPP; 444 ev = attr->config; 445 break; 446 447 case PERF_TYPE_HARDWARE: 448 if (is_sampling_event(event)) /* No sampling support */ 449 return -ENOENT; 450 ev = attr->config; 451 /* Count user space (problem-state) only */ 452 if (!attr->exclude_user && attr->exclude_kernel) { 453 if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 454 return -EOPNOTSUPP; 455 ev = cpumf_generic_events_user[ev]; 456 457 /* No support for kernel space counters only */ 458 } else if (!attr->exclude_kernel && attr->exclude_user) { 459 return -EOPNOTSUPP; 460 } else { /* Count user and kernel space */ 461 if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 462 return -EOPNOTSUPP; 463 ev = cpumf_generic_events_basic[ev]; 464 } 465 break; 466 467 default: 468 return -ENOENT; 469 } 470 471 if (ev == -1) 472 return -ENOENT; 473 474 if (ev > PERF_CPUM_CF_MAX_CTR) 475 return -ENOENT; 476 477 /* Obtain the counter set to which the specified counter belongs */ 478 set = get_counter_set(ev); 479 switch (set) { 480 case CPUMF_CTR_SET_BASIC: 481 case CPUMF_CTR_SET_USER: 482 case CPUMF_CTR_SET_CRYPTO: 483 case CPUMF_CTR_SET_EXT: 484 case CPUMF_CTR_SET_MT_DIAG: 485 /* 486 * Use the hardware perf event structure to store the 487 * counter number in the 'config' member and the counter 488 * set number in the 'config_base' as bit mask. 489 * It is later used to enable/disable the counter(s). 490 */ 491 hwc->config = ev; 492 hwc->config_base = cpumf_ctr_ctl[set]; 493 break; 494 case CPUMF_CTR_SET_MAX: 495 /* The counter could not be associated to a counter set */ 496 return -EINVAL; 497 } 498 499 /* Initialize for using the CPU-measurement counter facility */ 500 cpumf_hw_inuse(); 501 event->destroy = hw_perf_event_destroy; 502 503 /* Finally, validate version and authorization of the counter set */ 504 err = validate_ctr_auth(hwc); 505 if (!err) 506 err = validate_ctr_version(hwc, set); 507 508 return err; 509 } 510 511 static int cpumf_pmu_event_init(struct perf_event *event) 512 { 513 unsigned int type = event->attr.type; 514 int err; 515 516 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 517 err = __hw_perf_event_init(event, type); 518 else if (event->pmu->type == type) 519 /* Registered as unknown PMU */ 520 err = __hw_perf_event_init(event, PERF_TYPE_RAW); 521 else 522 return -ENOENT; 523 524 if (unlikely(err) && event->destroy) 525 event->destroy(event); 526 527 return err; 528 } 529 530 static int hw_perf_event_reset(struct perf_event *event) 531 { 532 u64 prev, new; 533 int err; 534 535 do { 536 prev = local64_read(&event->hw.prev_count); 537 err = ecctr(event->hw.config, &new); 538 if (err) { 539 if (err != 3) 540 break; 541 /* The counter is not (yet) available. This 542 * might happen if the counter set to which 543 * this counter belongs is in the disabled 544 * state. 545 */ 546 new = 0; 547 } 548 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 549 550 return err; 551 } 552 553 static void hw_perf_event_update(struct perf_event *event) 554 { 555 u64 prev, new, delta; 556 int err; 557 558 do { 559 prev = local64_read(&event->hw.prev_count); 560 err = ecctr(event->hw.config, &new); 561 if (err) 562 return; 563 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 564 565 delta = (prev <= new) ? new - prev 566 : (-1ULL - prev) + new + 1; /* overflow */ 567 local64_add(delta, &event->count); 568 } 569 570 static void cpumf_pmu_read(struct perf_event *event) 571 { 572 if (event->hw.state & PERF_HES_STOPPED) 573 return; 574 575 hw_perf_event_update(event); 576 } 577 578 static void cpumf_pmu_start(struct perf_event *event, int flags) 579 { 580 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 581 struct hw_perf_event *hwc = &event->hw; 582 int i; 583 584 if (!(hwc->state & PERF_HES_STOPPED)) 585 return; 586 587 hwc->state = 0; 588 589 /* (Re-)enable and activate the counter set */ 590 ctr_set_enable(&cpuhw->state, hwc->config_base); 591 ctr_set_start(&cpuhw->state, hwc->config_base); 592 593 /* The counter set to which this counter belongs can be already active. 594 * Because all counters in a set are active, the event->hw.prev_count 595 * needs to be synchronized. At this point, the counter set can be in 596 * the inactive or disabled state. 597 */ 598 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 599 cpuhw->usedss = cfdiag_getctr(cpuhw->start, 600 sizeof(cpuhw->start), 601 hwc->config_base, true); 602 } else { 603 hw_perf_event_reset(event); 604 } 605 606 /* Increment refcount for counter sets */ 607 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 608 if ((hwc->config_base & cpumf_ctr_ctl[i])) 609 atomic_inc(&cpuhw->ctr_set[i]); 610 } 611 612 /* Create perf event sample with the counter sets as raw data. The sample 613 * is then pushed to the event subsystem and the function checks for 614 * possible event overflows. If an event overflow occurs, the PMU is 615 * stopped. 616 * 617 * Return non-zero if an event overflow occurred. 618 */ 619 static int cfdiag_push_sample(struct perf_event *event, 620 struct cpu_cf_events *cpuhw) 621 { 622 struct perf_sample_data data; 623 struct perf_raw_record raw; 624 struct pt_regs regs; 625 int overflow; 626 627 /* Setup perf sample */ 628 perf_sample_data_init(&data, 0, event->hw.last_period); 629 memset(®s, 0, sizeof(regs)); 630 memset(&raw, 0, sizeof(raw)); 631 632 if (event->attr.sample_type & PERF_SAMPLE_CPU) 633 data.cpu_entry.cpu = event->cpu; 634 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 635 raw.frag.size = cpuhw->usedss; 636 raw.frag.data = cpuhw->stop; 637 raw.size = raw.frag.size; 638 data.raw = &raw; 639 } 640 641 overflow = perf_event_overflow(event, &data, ®s); 642 debug_sprintf_event(cf_dbg, 3, 643 "%s event %#llx sample_type %#llx raw %d ov %d\n", 644 __func__, event->hw.config, 645 event->attr.sample_type, raw.size, overflow); 646 if (overflow) 647 event->pmu->stop(event, 0); 648 649 perf_event_update_userpage(event); 650 return overflow; 651 } 652 653 static void cpumf_pmu_stop(struct perf_event *event, int flags) 654 { 655 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 656 struct hw_perf_event *hwc = &event->hw; 657 int i; 658 659 if (!(hwc->state & PERF_HES_STOPPED)) { 660 /* Decrement reference count for this counter set and if this 661 * is the last used counter in the set, clear activation 662 * control and set the counter set state to inactive. 663 */ 664 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 665 if (!(hwc->config_base & cpumf_ctr_ctl[i])) 666 continue; 667 if (!atomic_dec_return(&cpuhw->ctr_set[i])) 668 ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 669 } 670 hwc->state |= PERF_HES_STOPPED; 671 } 672 673 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 674 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 675 local64_inc(&event->count); 676 cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 677 sizeof(cpuhw->stop), 678 event->hw.config_base, 679 false); 680 if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 681 cfdiag_push_sample(event, cpuhw); 682 } else 683 hw_perf_event_update(event); 684 hwc->state |= PERF_HES_UPTODATE; 685 } 686 } 687 688 static int cpumf_pmu_add(struct perf_event *event, int flags) 689 { 690 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 691 692 ctr_set_enable(&cpuhw->state, event->hw.config_base); 693 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 694 695 if (flags & PERF_EF_START) 696 cpumf_pmu_start(event, PERF_EF_RELOAD); 697 698 return 0; 699 } 700 701 static void cpumf_pmu_del(struct perf_event *event, int flags) 702 { 703 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 704 int i; 705 706 cpumf_pmu_stop(event, PERF_EF_UPDATE); 707 708 /* Check if any counter in the counter set is still used. If not used, 709 * change the counter set to the disabled state. This also clears the 710 * content of all counters in the set. 711 * 712 * When a new perf event has been added but not yet started, this can 713 * clear enable control and resets all counters in a set. Therefore, 714 * cpumf_pmu_start() always has to reenable a counter set. 715 */ 716 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 717 if (!atomic_read(&cpuhw->ctr_set[i])) 718 ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 719 } 720 721 /* Performance monitoring unit for s390x */ 722 static struct pmu cpumf_pmu = { 723 .task_ctx_nr = perf_sw_context, 724 .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 725 .pmu_enable = cpumf_pmu_enable, 726 .pmu_disable = cpumf_pmu_disable, 727 .event_init = cpumf_pmu_event_init, 728 .add = cpumf_pmu_add, 729 .del = cpumf_pmu_del, 730 .start = cpumf_pmu_start, 731 .stop = cpumf_pmu_stop, 732 .read = cpumf_pmu_read, 733 }; 734 735 static int cfset_init(void); 736 static int __init cpumf_pmu_init(void) 737 { 738 int rc; 739 740 if (!kernel_cpumcf_avail()) 741 return -ENODEV; 742 743 /* Setup s390dbf facility */ 744 cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 745 if (!cf_dbg) { 746 pr_err("Registration of s390dbf(cpum_cf) failed\n"); 747 return -ENOMEM; 748 } 749 debug_register_view(cf_dbg, &debug_sprintf_view); 750 751 cpumf_pmu.attr_groups = cpumf_cf_event_group(); 752 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 753 if (rc) { 754 debug_unregister_view(cf_dbg, &debug_sprintf_view); 755 debug_unregister(cf_dbg); 756 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 757 } else if (stccm_avail()) { /* Setup counter set device */ 758 cfset_init(); 759 } 760 return rc; 761 } 762 763 /* Support for the CPU Measurement Facility counter set extraction using 764 * device /dev/hwctr. This allows user space programs to extract complete 765 * counter set via normal file operations. 766 */ 767 768 static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */ 769 static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ 770 struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 771 unsigned int sets; /* Counter set bit mask */ 772 atomic_t cpus_ack; /* # CPUs successfully executed func */ 773 }; 774 775 static struct cfset_request { /* CPUs and counter set bit mask */ 776 unsigned long ctrset; /* Bit mask of counter set to read */ 777 cpumask_t mask; /* CPU mask to read from */ 778 } cfset_request; 779 780 static void cfset_ctrset_clear(void) 781 { 782 cpumask_clear(&cfset_request.mask); 783 cfset_request.ctrset = 0; 784 } 785 786 /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 787 * path is currently used. 788 * The cpu_cf_events::dev_state is used to denote counter sets in use by this 789 * interface. It is always or'ed in. If this interface is not active, its 790 * value is zero and no additional counter sets will be included. 791 * 792 * The cpu_cf_events::state is used by the perf_event_open SVC and remains 793 * unchanged. 794 * 795 * perf_pmu_enable() and perf_pmu_enable() and its call backs 796 * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 797 * performance measurement subsystem to enable per process 798 * CPU Measurement counter facility. 799 * The XXX_enable() and XXX_disable functions are used to turn off 800 * x86 performance monitoring interrupt (PMI) during scheduling. 801 * s390 uses these calls to temporarily stop and resume the active CPU 802 * counters sets during scheduling. 803 * 804 * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 805 * device access. The perf_event_open() SVC interface makes a lot of effort 806 * to only run the counters while the calling process is actively scheduled 807 * to run. 808 * When /dev/hwctr interface is also used at the same time, the counter sets 809 * will keep running, even when the process is scheduled off a CPU. 810 * However this is not a problem and does not lead to wrong counter values 811 * for the perf_event_open() SVC. The current counter value will be recorded 812 * during schedule-in. At schedule-out time the current counter value is 813 * extracted again and the delta is calculated and added to the event. 814 */ 815 /* Stop all counter sets via ioctl interface */ 816 static void cfset_ioctl_off(void *parm) 817 { 818 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 819 struct cfset_call_on_cpu_parm *p = parm; 820 int rc; 821 822 cpuhw->dev_state = 0; 823 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 824 if ((p->sets & cpumf_ctr_ctl[rc])) 825 atomic_dec(&cpuhw->ctr_set[rc]); 826 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 827 if (rc) 828 pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 829 cpuhw->state, S390_HWCTR_DEVICE, rc); 830 cpuhw->flags &= ~PMU_F_IN_USE; 831 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 832 __func__, rc, cpuhw->state, cpuhw->dev_state); 833 } 834 835 /* Start counter sets on particular CPU */ 836 static void cfset_ioctl_on(void *parm) 837 { 838 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 839 struct cfset_call_on_cpu_parm *p = parm; 840 int rc; 841 842 cpuhw->flags |= PMU_F_IN_USE; 843 ctr_set_enable(&cpuhw->dev_state, p->sets); 844 ctr_set_start(&cpuhw->dev_state, p->sets); 845 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 846 if ((p->sets & cpumf_ctr_ctl[rc])) 847 atomic_inc(&cpuhw->ctr_set[rc]); 848 rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 849 if (!rc) 850 atomic_inc(&p->cpus_ack); 851 else 852 pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 853 cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 854 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 855 __func__, rc, cpuhw->state, cpuhw->dev_state); 856 } 857 858 static void cfset_release_cpu(void *p) 859 { 860 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 861 int rc; 862 863 debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", 864 __func__, cpuhw->state, cpuhw->dev_state); 865 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 866 if (rc) 867 pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 868 cpuhw->state, S390_HWCTR_DEVICE, rc); 869 cpuhw->dev_state = 0; 870 } 871 872 /* Release function is also called when application gets terminated without 873 * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 874 */ 875 static int cfset_release(struct inode *inode, struct file *file) 876 { 877 on_each_cpu(cfset_release_cpu, NULL, 1); 878 hw_perf_event_destroy(NULL); 879 cfset_ctrset_clear(); 880 atomic_set(&cfset_opencnt, 0); 881 return 0; 882 } 883 884 static int cfset_open(struct inode *inode, struct file *file) 885 { 886 if (!capable(CAP_SYS_ADMIN)) 887 return -EPERM; 888 /* Only one user space program can open /dev/hwctr */ 889 if (atomic_xchg(&cfset_opencnt, 1)) 890 return -EBUSY; 891 892 cpumf_hw_inuse(); 893 file->private_data = NULL; 894 /* nonseekable_open() never fails */ 895 return nonseekable_open(inode, file); 896 } 897 898 static int cfset_all_stop(void) 899 { 900 struct cfset_call_on_cpu_parm p = { 901 .sets = cfset_request.ctrset, 902 }; 903 cpumask_var_t mask; 904 905 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 906 return -ENOMEM; 907 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 908 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 909 free_cpumask_var(mask); 910 return 0; 911 } 912 913 static int cfset_all_start(void) 914 { 915 struct cfset_call_on_cpu_parm p = { 916 .sets = cfset_request.ctrset, 917 .cpus_ack = ATOMIC_INIT(0), 918 }; 919 cpumask_var_t mask; 920 int rc = 0; 921 922 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 923 return -ENOMEM; 924 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 925 on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 926 if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 927 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 928 rc = -EIO; 929 debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); 930 } 931 free_cpumask_var(mask); 932 return rc; 933 } 934 935 936 /* Return the maximum required space for all possible CPUs in case one 937 * CPU will be onlined during the START, READ, STOP cycles. 938 * To find out the size of the counter sets, any one CPU will do. They 939 * all have the same counter sets. 940 */ 941 static size_t cfset_needspace(unsigned int sets) 942 { 943 struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); 944 size_t bytes = 0; 945 int i; 946 947 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 948 if (!(sets & cpumf_ctr_ctl[i])) 949 continue; 950 bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + 951 sizeof(((struct s390_ctrset_setdata *)0)->set) + 952 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 953 } 954 bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 955 (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 956 sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 957 put_cpu_ptr(&cpu_cf_events); 958 return bytes; 959 } 960 961 static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 962 { 963 struct s390_ctrset_read __user *ctrset_read; 964 unsigned int cpu, cpus, rc; 965 void __user *uptr; 966 967 ctrset_read = (struct s390_ctrset_read __user *)arg; 968 uptr = ctrset_read->data; 969 for_each_cpu(cpu, mask) { 970 struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); 971 struct s390_ctrset_cpudata __user *ctrset_cpudata; 972 973 ctrset_cpudata = uptr; 974 rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 975 rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 976 rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 977 cpuhw->used); 978 if (rc) 979 return -EFAULT; 980 uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 981 cond_resched(); 982 } 983 cpus = cpumask_weight(mask); 984 if (put_user(cpus, &ctrset_read->no_cpus)) 985 return -EFAULT; 986 debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, 987 uptr - (void __user *)ctrset_read->data); 988 return 0; 989 } 990 991 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 992 int ctrset_size, size_t room) 993 { 994 size_t need = 0; 995 int rc = -1; 996 997 need = sizeof(*p) + sizeof(u64) * ctrset_size; 998 if (need <= room) { 999 p->set = cpumf_ctr_ctl[ctrset]; 1000 p->no_cnts = ctrset_size; 1001 rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1002 if (rc == 3) /* Nothing stored */ 1003 need = 0; 1004 } 1005 return need; 1006 } 1007 1008 /* Read all counter sets. */ 1009 static void cfset_cpu_read(void *parm) 1010 { 1011 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1012 struct cfset_call_on_cpu_parm *p = parm; 1013 int set, set_size; 1014 size_t space; 1015 1016 /* No data saved yet */ 1017 cpuhw->used = 0; 1018 cpuhw->sets = 0; 1019 memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1020 1021 /* Scan the counter sets */ 1022 for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1023 struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1024 cpuhw->used; 1025 1026 if (!(p->sets & cpumf_ctr_ctl[set])) 1027 continue; /* Counter set not in list */ 1028 set_size = cpum_cf_ctrset_size(set, &cpuhw->info); 1029 space = sizeof(cpuhw->data) - cpuhw->used; 1030 space = cfset_cpuset_read(sp, set, set_size, space); 1031 if (space) { 1032 cpuhw->used += space; 1033 cpuhw->sets += 1; 1034 } 1035 } 1036 debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, 1037 cpuhw->sets, cpuhw->used); 1038 } 1039 1040 static int cfset_all_read(unsigned long arg) 1041 { 1042 struct cfset_call_on_cpu_parm p; 1043 cpumask_var_t mask; 1044 int rc; 1045 1046 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1047 return -ENOMEM; 1048 1049 p.sets = cfset_request.ctrset; 1050 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 1051 on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1052 rc = cfset_all_copy(arg, mask); 1053 free_cpumask_var(mask); 1054 return rc; 1055 } 1056 1057 static long cfset_ioctl_read(unsigned long arg) 1058 { 1059 struct s390_ctrset_read read; 1060 int ret = 0; 1061 1062 if (copy_from_user(&read, (char __user *)arg, sizeof(read))) 1063 return -EFAULT; 1064 ret = cfset_all_read(arg); 1065 return ret; 1066 } 1067 1068 static long cfset_ioctl_stop(void) 1069 { 1070 int ret = ENXIO; 1071 1072 if (cfset_request.ctrset) { 1073 ret = cfset_all_stop(); 1074 cfset_ctrset_clear(); 1075 } 1076 return ret; 1077 } 1078 1079 static long cfset_ioctl_start(unsigned long arg) 1080 { 1081 struct s390_ctrset_start __user *ustart; 1082 struct s390_ctrset_start start; 1083 void __user *umask; 1084 unsigned int len; 1085 int ret = 0; 1086 size_t need; 1087 1088 if (cfset_request.ctrset) 1089 return -EBUSY; 1090 ustart = (struct s390_ctrset_start __user *)arg; 1091 if (copy_from_user(&start, ustart, sizeof(start))) 1092 return -EFAULT; 1093 if (start.version != S390_HWCTR_START_VERSION) 1094 return -EINVAL; 1095 if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1096 cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1097 cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1098 cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1099 cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1100 return -EINVAL; /* Invalid counter set */ 1101 if (!start.counter_sets) 1102 return -EINVAL; /* No counter set at all? */ 1103 cpumask_clear(&cfset_request.mask); 1104 len = min_t(u64, start.cpumask_len, cpumask_size()); 1105 umask = (void __user *)start.cpumask; 1106 if (copy_from_user(&cfset_request.mask, umask, len)) 1107 return -EFAULT; 1108 if (cpumask_empty(&cfset_request.mask)) 1109 return -EINVAL; 1110 need = cfset_needspace(start.counter_sets); 1111 if (put_user(need, &ustart->data_bytes)) 1112 ret = -EFAULT; 1113 if (ret) 1114 goto out; 1115 cfset_request.ctrset = start.counter_sets; 1116 ret = cfset_all_start(); 1117 out: 1118 if (ret) 1119 cfset_ctrset_clear(); 1120 debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n", 1121 __func__, cfset_request.ctrset, need, ret); 1122 return ret; 1123 } 1124 1125 /* Entry point to the /dev/hwctr device interface. 1126 * The ioctl system call supports three subcommands: 1127 * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1128 * counter set keeps running until explicitly stopped. Returns the number 1129 * of bytes needed to store the counter values. If another S390_HWCTR_START 1130 * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 1131 * command, -EBUSY is returned. 1132 * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1133 * with the S390_HWCTR_START command. 1134 * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1135 * previous S390_HWCTR_START subcommand. 1136 */ 1137 static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1138 { 1139 int ret; 1140 1141 get_online_cpus(); 1142 mutex_lock(&cfset_ctrset_mutex); 1143 switch (cmd) { 1144 case S390_HWCTR_START: 1145 ret = cfset_ioctl_start(arg); 1146 break; 1147 case S390_HWCTR_STOP: 1148 ret = cfset_ioctl_stop(); 1149 break; 1150 case S390_HWCTR_READ: 1151 ret = cfset_ioctl_read(arg); 1152 break; 1153 default: 1154 ret = -ENOTTY; 1155 break; 1156 } 1157 mutex_unlock(&cfset_ctrset_mutex); 1158 put_online_cpus(); 1159 return ret; 1160 } 1161 1162 static const struct file_operations cfset_fops = { 1163 .owner = THIS_MODULE, 1164 .open = cfset_open, 1165 .release = cfset_release, 1166 .unlocked_ioctl = cfset_ioctl, 1167 .compat_ioctl = cfset_ioctl, 1168 .llseek = no_llseek 1169 }; 1170 1171 static struct miscdevice cfset_dev = { 1172 .name = S390_HWCTR_DEVICE, 1173 .minor = MISC_DYNAMIC_MINOR, 1174 .fops = &cfset_fops, 1175 }; 1176 1177 int cfset_online_cpu(unsigned int cpu) 1178 { 1179 struct cfset_call_on_cpu_parm p; 1180 1181 mutex_lock(&cfset_ctrset_mutex); 1182 if (cfset_request.ctrset) { 1183 p.sets = cfset_request.ctrset; 1184 cfset_ioctl_on(&p); 1185 cpumask_set_cpu(cpu, &cfset_request.mask); 1186 } 1187 mutex_unlock(&cfset_ctrset_mutex); 1188 return 0; 1189 } 1190 1191 int cfset_offline_cpu(unsigned int cpu) 1192 { 1193 struct cfset_call_on_cpu_parm p; 1194 1195 mutex_lock(&cfset_ctrset_mutex); 1196 if (cfset_request.ctrset) { 1197 p.sets = cfset_request.ctrset; 1198 cfset_ioctl_off(&p); 1199 cpumask_clear_cpu(cpu, &cfset_request.mask); 1200 } 1201 mutex_unlock(&cfset_ctrset_mutex); 1202 return 0; 1203 } 1204 1205 static void cfdiag_read(struct perf_event *event) 1206 { 1207 debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, 1208 event->attr.config, local64_read(&event->count)); 1209 } 1210 1211 static int get_authctrsets(void) 1212 { 1213 struct cpu_cf_events *cpuhw; 1214 unsigned long auth = 0; 1215 enum cpumf_ctr_set i; 1216 1217 cpuhw = &get_cpu_var(cpu_cf_events); 1218 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1219 if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) 1220 auth |= cpumf_ctr_ctl[i]; 1221 } 1222 put_cpu_var(cpu_cf_events); 1223 return auth; 1224 } 1225 1226 /* Setup the event. Test for authorized counter sets and only include counter 1227 * sets which are authorized at the time of the setup. Including unauthorized 1228 * counter sets result in specification exception (and panic). 1229 */ 1230 static int cfdiag_event_init2(struct perf_event *event) 1231 { 1232 struct perf_event_attr *attr = &event->attr; 1233 int err = 0; 1234 1235 /* Set sample_period to indicate sampling */ 1236 event->hw.config = attr->config; 1237 event->hw.sample_period = attr->sample_period; 1238 local64_set(&event->hw.period_left, event->hw.sample_period); 1239 local64_set(&event->count, 0); 1240 event->hw.last_period = event->hw.sample_period; 1241 1242 /* Add all authorized counter sets to config_base. The 1243 * the hardware init function is either called per-cpu or just once 1244 * for all CPUS (event->cpu == -1). This depends on the whether 1245 * counting is started for all CPUs or on a per workload base where 1246 * the perf event moves from one CPU to another CPU. 1247 * Checking the authorization on any CPU is fine as the hardware 1248 * applies the same authorization settings to all CPUs. 1249 */ 1250 event->hw.config_base = get_authctrsets(); 1251 1252 /* No authorized counter sets, nothing to count/sample */ 1253 if (!event->hw.config_base) 1254 err = -EINVAL; 1255 1256 debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", 1257 __func__, err, event->hw.config_base); 1258 return err; 1259 } 1260 1261 static int cfdiag_event_init(struct perf_event *event) 1262 { 1263 struct perf_event_attr *attr = &event->attr; 1264 int err = -ENOENT; 1265 1266 if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1267 event->attr.type != event->pmu->type) 1268 goto out; 1269 1270 /* Raw events are used to access counters directly, 1271 * hence do not permit excludes. 1272 * This event is useless without PERF_SAMPLE_RAW to return counter set 1273 * values as raw data. 1274 */ 1275 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1276 !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1277 err = -EOPNOTSUPP; 1278 goto out; 1279 } 1280 1281 /* Initialize for using the CPU-measurement counter facility */ 1282 cpumf_hw_inuse(); 1283 event->destroy = hw_perf_event_destroy; 1284 1285 err = cfdiag_event_init2(event); 1286 if (unlikely(err)) 1287 event->destroy(event); 1288 out: 1289 return err; 1290 } 1291 1292 /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1293 * to collect the complete counter sets for a scheduled process. Target 1294 * are complete counter sets attached as raw data to the artificial event. 1295 * This results in complete counter sets available when a process is 1296 * scheduled. Contains the delta of every counter while the process was 1297 * running. 1298 */ 1299 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1300 1301 static struct attribute *cfdiag_events_attr[] = { 1302 CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1303 NULL, 1304 }; 1305 1306 PMU_FORMAT_ATTR(event, "config:0-63"); 1307 1308 static struct attribute *cfdiag_format_attr[] = { 1309 &format_attr_event.attr, 1310 NULL, 1311 }; 1312 1313 static struct attribute_group cfdiag_events_group = { 1314 .name = "events", 1315 .attrs = cfdiag_events_attr, 1316 }; 1317 static struct attribute_group cfdiag_format_group = { 1318 .name = "format", 1319 .attrs = cfdiag_format_attr, 1320 }; 1321 static const struct attribute_group *cfdiag_attr_groups[] = { 1322 &cfdiag_events_group, 1323 &cfdiag_format_group, 1324 NULL, 1325 }; 1326 1327 /* Performance monitoring unit for event CF_DIAG. Since this event 1328 * is also started and stopped via the perf_event_open() system call, use 1329 * the same event enable/disable call back functions. They do not 1330 * have a pointer to the perf_event strcture as first parameter. 1331 * 1332 * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1333 * Reuse them and distinguish the event (always first parameter) via 1334 * 'config' member. 1335 */ 1336 static struct pmu cf_diag = { 1337 .task_ctx_nr = perf_sw_context, 1338 .event_init = cfdiag_event_init, 1339 .pmu_enable = cpumf_pmu_enable, 1340 .pmu_disable = cpumf_pmu_disable, 1341 .add = cpumf_pmu_add, 1342 .del = cpumf_pmu_del, 1343 .start = cpumf_pmu_start, 1344 .stop = cpumf_pmu_stop, 1345 .read = cfdiag_read, 1346 1347 .attr_groups = cfdiag_attr_groups 1348 }; 1349 1350 /* Calculate memory needed to store all counter sets together with header and 1351 * trailer data. This is independent of the counter set authorization which 1352 * can vary depending on the configuration. 1353 */ 1354 static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1355 { 1356 size_t max_size = sizeof(struct cf_trailer_entry); 1357 enum cpumf_ctr_set i; 1358 1359 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1360 size_t size = cpum_cf_ctrset_size(i, info); 1361 1362 if (size) 1363 max_size += size * sizeof(u64) + 1364 sizeof(struct cf_ctrset_entry); 1365 } 1366 return max_size; 1367 } 1368 1369 /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1370 static void cfdiag_get_cpu_speed(void) 1371 { 1372 if (cpum_sf_avail()) { /* Sampling facility first */ 1373 struct hws_qsi_info_block si; 1374 1375 memset(&si, 0, sizeof(si)); 1376 if (!qsi(&si)) { 1377 cfdiag_cpu_speed = si.cpu_speed; 1378 return; 1379 } 1380 } 1381 1382 /* Fallback: CPU speed extract static part. Used in case 1383 * CPU Measurement Sampling Facility is turned off. 1384 */ 1385 if (test_facility(34)) { 1386 unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1387 1388 if (mhz != -1UL) 1389 cfdiag_cpu_speed = mhz & 0xffffffff; 1390 } 1391 } 1392 1393 static int cfset_init(void) 1394 { 1395 struct cpumf_ctr_info info; 1396 size_t need; 1397 int rc; 1398 1399 if (qctri(&info)) 1400 return -ENODEV; 1401 1402 cfdiag_get_cpu_speed(); 1403 /* Make sure the counter set data fits into predefined buffer. */ 1404 need = cfdiag_maxsize(&info); 1405 if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1406 pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1407 need); 1408 return -ENOMEM; 1409 } 1410 1411 rc = misc_register(&cfset_dev); 1412 if (rc) { 1413 pr_err("Registration of /dev/%s failed rc=%i\n", 1414 cfset_dev.name, rc); 1415 goto out; 1416 } 1417 1418 rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1419 if (rc) { 1420 misc_deregister(&cfset_dev); 1421 pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1422 rc); 1423 } 1424 out: 1425 return rc; 1426 } 1427 1428 device_initcall(cpumf_pmu_init); 1429