1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support for s390x - CPU-measurement Counter Facility 4 * 5 * Copyright IBM Corp. 2012, 2021 6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 * Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define KMSG_COMPONENT "cpum_cf" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/percpu.h> 15 #include <linux/notifier.h> 16 #include <linux/init.h> 17 #include <linux/export.h> 18 #include <linux/miscdevice.h> 19 20 #include <asm/cpu_mcf.h> 21 #include <asm/hwctrset.h> 22 #include <asm/debug.h> 23 24 static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 25 static debug_info_t *cf_dbg; 26 27 #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 28 /* interval in seconds */ 29 30 /* Counter sets are stored as data stream in a page sized memory buffer and 31 * exported to user space via raw data attached to the event sample data. 32 * Each counter set starts with an eight byte header consisting of: 33 * - a two byte eye catcher (0xfeef) 34 * - a one byte counter set number 35 * - a two byte counter set size (indicates the number of counters in this set) 36 * - a three byte reserved value (must be zero) to make the header the same 37 * size as a counter value. 38 * All counter values are eight byte in size. 39 * 40 * All counter sets are followed by a 64 byte trailer. 41 * The trailer consists of a: 42 * - flag field indicating valid fields when corresponding bit set 43 * - the counter facility first and second version number 44 * - the CPU speed if nonzero 45 * - the time stamp the counter sets have been collected 46 * - the time of day (TOD) base value 47 * - the machine type. 48 * 49 * The counter sets are saved when the process is prepared to be executed on a 50 * CPU and saved again when the process is going to be removed from a CPU. 51 * The difference of both counter sets are calculated and stored in the event 52 * sample data area. 53 */ 54 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 55 unsigned int def:16; /* 0-15 Data Entry Format */ 56 unsigned int set:16; /* 16-31 Counter set identifier */ 57 unsigned int ctr:16; /* 32-47 Number of stored counters */ 58 unsigned int res1:16; /* 48-63 Reserved */ 59 }; 60 61 struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 62 /* 0 - 7 */ 63 union { 64 struct { 65 unsigned int clock_base:1; /* TOD clock base set */ 66 unsigned int speed:1; /* CPU speed set */ 67 /* Measurement alerts */ 68 unsigned int mtda:1; /* Loss of MT ctr. data alert */ 69 unsigned int caca:1; /* Counter auth. change alert */ 70 unsigned int lcda:1; /* Loss of counter data alert */ 71 }; 72 unsigned long flags; /* 0-63 All indicators */ 73 }; 74 /* 8 - 15 */ 75 unsigned int cfvn:16; /* 64-79 Ctr First Version */ 76 unsigned int csvn:16; /* 80-95 Ctr Second Version */ 77 unsigned int cpu_speed:32; /* 96-127 CPU speed */ 78 /* 16 - 23 */ 79 unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 80 /* 24 - 55 */ 81 union { 82 struct { 83 unsigned long progusage1; 84 unsigned long progusage2; 85 unsigned long progusage3; 86 unsigned long tod_base; 87 }; 88 unsigned long progusage[4]; 89 }; 90 /* 56 - 63 */ 91 unsigned int mach_type:16; /* Machine type */ 92 unsigned int res1:16; /* Reserved */ 93 unsigned int res2:32; /* Reserved */ 94 }; 95 96 /* Create the trailer data at the end of a page. */ 97 static void cfdiag_trailer(struct cf_trailer_entry *te) 98 { 99 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 100 struct cpuid cpuid; 101 102 te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ 103 te->csvn = cpuhw->info.csvn; 104 105 get_cpu_id(&cpuid); /* Machine type */ 106 te->mach_type = cpuid.machine; 107 te->cpu_speed = cfdiag_cpu_speed; 108 if (te->cpu_speed) 109 te->speed = 1; 110 te->clock_base = 1; /* Save clock base */ 111 te->tod_base = tod_clock_base.tod; 112 te->timestamp = get_tod_clock_fast(); 113 } 114 115 /* Read a counter set. The counter set number determines the counter set and 116 * the CPUM-CF first and second version number determine the number of 117 * available counters in each counter set. 118 * Each counter set starts with header containing the counter set number and 119 * the number of eight byte counters. 120 * 121 * The functions returns the number of bytes occupied by this counter set 122 * including the header. 123 * If there is no counter in the counter set, this counter set is useless and 124 * zero is returned on this case. 125 * 126 * Note that the counter sets may not be enabled or active and the stcctm 127 * instruction might return error 3. Depending on error_ok value this is ok, 128 * for example when called from cpumf_pmu_start() call back function. 129 */ 130 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 131 size_t room, bool error_ok) 132 { 133 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 134 size_t ctrset_size, need = 0; 135 int rc = 3; /* Assume write failure */ 136 137 ctrdata->def = CF_DIAG_CTRSET_DEF; 138 ctrdata->set = ctrset; 139 ctrdata->res1 = 0; 140 ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); 141 142 if (ctrset_size) { /* Save data */ 143 need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 144 if (need <= room) { 145 rc = ctr_stcctm(ctrset, ctrset_size, 146 (u64 *)(ctrdata + 1)); 147 } 148 if (rc != 3 || error_ok) 149 ctrdata->ctr = ctrset_size; 150 else 151 need = 0; 152 } 153 154 debug_sprintf_event(cf_dbg, 3, 155 "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" 156 " need %zd rc %d\n", __func__, ctrset, ctrset_size, 157 cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); 158 return need; 159 } 160 161 static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { 162 [CPUMF_CTR_SET_BASIC] = 0x02, 163 [CPUMF_CTR_SET_USER] = 0x04, 164 [CPUMF_CTR_SET_CRYPTO] = 0x08, 165 [CPUMF_CTR_SET_EXT] = 0x01, 166 [CPUMF_CTR_SET_MT_DIAG] = 0x20, 167 }; 168 169 /* Read out all counter sets and save them in the provided data buffer. 170 * The last 64 byte host an artificial trailer entry. 171 */ 172 static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 173 bool error_ok) 174 { 175 struct cf_trailer_entry *trailer; 176 size_t offset = 0, done; 177 int i; 178 179 memset(data, 0, sz); 180 sz -= sizeof(*trailer); /* Always room for trailer */ 181 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 182 struct cf_ctrset_entry *ctrdata = data + offset; 183 184 if (!(auth & cpumf_ctr_ctl[i])) 185 continue; /* Counter set not authorized */ 186 187 done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 188 offset += done; 189 } 190 trailer = data + offset; 191 cfdiag_trailer(trailer); 192 return offset + sizeof(*trailer); 193 } 194 195 /* Calculate the difference for each counter in a counter set. */ 196 static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 197 { 198 for (; --counters >= 0; ++pstart, ++pstop) 199 if (*pstop >= *pstart) 200 *pstop -= *pstart; 201 else 202 *pstop = *pstart - *pstop + 1; 203 } 204 205 /* Scan the counter sets and calculate the difference of each counter 206 * in each set. The result is the increment of each counter during the 207 * period the counter set has been activated. 208 * 209 * Return true on success. 210 */ 211 static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 212 { 213 struct cf_trailer_entry *trailer_start, *trailer_stop; 214 struct cf_ctrset_entry *ctrstart, *ctrstop; 215 size_t offset = 0; 216 217 auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 218 do { 219 ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 220 ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 221 222 if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 223 pr_err_once("cpum_cf_diag counter set compare error " 224 "in set %i\n", ctrstart->set); 225 return 0; 226 } 227 auth &= ~cpumf_ctr_ctl[ctrstart->set]; 228 if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 229 cfdiag_diffctrset((u64 *)(ctrstart + 1), 230 (u64 *)(ctrstop + 1), ctrstart->ctr); 231 offset += ctrstart->ctr * sizeof(u64) + 232 sizeof(*ctrstart); 233 } 234 } while (ctrstart->def && auth); 235 236 /* Save time_stamp from start of event in stop's trailer */ 237 trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 238 trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 239 trailer_stop->progusage[0] = trailer_start->timestamp; 240 241 return 1; 242 } 243 244 static enum cpumf_ctr_set get_counter_set(u64 event) 245 { 246 int set = CPUMF_CTR_SET_MAX; 247 248 if (event < 32) 249 set = CPUMF_CTR_SET_BASIC; 250 else if (event < 64) 251 set = CPUMF_CTR_SET_USER; 252 else if (event < 128) 253 set = CPUMF_CTR_SET_CRYPTO; 254 else if (event < 288) 255 set = CPUMF_CTR_SET_EXT; 256 else if (event >= 448 && event < 496) 257 set = CPUMF_CTR_SET_MT_DIAG; 258 259 return set; 260 } 261 262 static int validate_ctr_version(const struct hw_perf_event *hwc, 263 enum cpumf_ctr_set set) 264 { 265 struct cpu_cf_events *cpuhw; 266 int err = 0; 267 u16 mtdiag_ctl; 268 269 cpuhw = &get_cpu_var(cpu_cf_events); 270 271 /* check required version for counter sets */ 272 switch (set) { 273 case CPUMF_CTR_SET_BASIC: 274 case CPUMF_CTR_SET_USER: 275 if (cpuhw->info.cfvn < 1) 276 err = -EOPNOTSUPP; 277 break; 278 case CPUMF_CTR_SET_CRYPTO: 279 if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 && 280 hwc->config > 79) || 281 (cpuhw->info.csvn >= 6 && hwc->config > 83)) 282 err = -EOPNOTSUPP; 283 break; 284 case CPUMF_CTR_SET_EXT: 285 if (cpuhw->info.csvn < 1) 286 err = -EOPNOTSUPP; 287 if ((cpuhw->info.csvn == 1 && hwc->config > 159) || 288 (cpuhw->info.csvn == 2 && hwc->config > 175) || 289 (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5 290 && hwc->config > 255) || 291 (cpuhw->info.csvn >= 6 && hwc->config > 287)) 292 err = -EOPNOTSUPP; 293 break; 294 case CPUMF_CTR_SET_MT_DIAG: 295 if (cpuhw->info.csvn <= 3) 296 err = -EOPNOTSUPP; 297 /* 298 * MT-diagnostic counters are read-only. The counter set 299 * is automatically enabled and activated on all CPUs with 300 * multithreading (SMT). Deactivation of multithreading 301 * also disables the counter set. State changes are ignored 302 * by lcctl(). Because Linux controls SMT enablement through 303 * a kernel parameter only, the counter set is either disabled 304 * or enabled and active. 305 * 306 * Thus, the counters can only be used if SMT is on and the 307 * counter set is enabled and active. 308 */ 309 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 310 if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && 311 (cpuhw->info.enable_ctl & mtdiag_ctl) && 312 (cpuhw->info.act_ctl & mtdiag_ctl))) 313 err = -EOPNOTSUPP; 314 break; 315 case CPUMF_CTR_SET_MAX: 316 err = -EOPNOTSUPP; 317 } 318 319 put_cpu_var(cpu_cf_events); 320 return err; 321 } 322 323 static int validate_ctr_auth(const struct hw_perf_event *hwc) 324 { 325 struct cpu_cf_events *cpuhw; 326 int err = 0; 327 328 cpuhw = &get_cpu_var(cpu_cf_events); 329 330 /* Check authorization for cpu counter sets. 331 * If the particular CPU counter set is not authorized, 332 * return with -ENOENT in order to fall back to other 333 * PMUs that might suffice the event request. 334 */ 335 if (!(hwc->config_base & cpuhw->info.auth_ctl)) 336 err = -ENOENT; 337 338 put_cpu_var(cpu_cf_events); 339 return err; 340 } 341 342 /* 343 * Change the CPUMF state to active. 344 * Enable and activate the CPU-counter sets according 345 * to the per-cpu control state. 346 */ 347 static void cpumf_pmu_enable(struct pmu *pmu) 348 { 349 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 350 int err; 351 352 if (cpuhw->flags & PMU_F_ENABLED) 353 return; 354 355 err = lcctl(cpuhw->state | cpuhw->dev_state); 356 if (err) { 357 pr_err("Enabling the performance measuring unit " 358 "failed with rc=%x\n", err); 359 return; 360 } 361 362 cpuhw->flags |= PMU_F_ENABLED; 363 } 364 365 /* 366 * Change the CPUMF state to inactive. 367 * Disable and enable (inactive) the CPU-counter sets according 368 * to the per-cpu control state. 369 */ 370 static void cpumf_pmu_disable(struct pmu *pmu) 371 { 372 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 373 int err; 374 u64 inactive; 375 376 if (!(cpuhw->flags & PMU_F_ENABLED)) 377 return; 378 379 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 380 inactive |= cpuhw->dev_state; 381 err = lcctl(inactive); 382 if (err) { 383 pr_err("Disabling the performance measuring unit " 384 "failed with rc=%x\n", err); 385 return; 386 } 387 388 cpuhw->flags &= ~PMU_F_ENABLED; 389 } 390 391 392 /* Number of perf events counting hardware events */ 393 static atomic_t num_events = ATOMIC_INIT(0); 394 /* Used to avoid races in calling reserve/release_cpumf_hardware */ 395 static DEFINE_MUTEX(pmc_reserve_mutex); 396 397 /* Release the PMU if event is the last perf event */ 398 static void hw_perf_event_destroy(struct perf_event *event) 399 { 400 if (!atomic_add_unless(&num_events, -1, 1)) { 401 mutex_lock(&pmc_reserve_mutex); 402 if (atomic_dec_return(&num_events) == 0) 403 __kernel_cpumcf_end(); 404 mutex_unlock(&pmc_reserve_mutex); 405 } 406 } 407 408 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 409 static const int cpumf_generic_events_basic[] = { 410 [PERF_COUNT_HW_CPU_CYCLES] = 0, 411 [PERF_COUNT_HW_INSTRUCTIONS] = 1, 412 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 413 [PERF_COUNT_HW_CACHE_MISSES] = -1, 414 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 415 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 416 [PERF_COUNT_HW_BUS_CYCLES] = -1, 417 }; 418 /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 419 static const int cpumf_generic_events_user[] = { 420 [PERF_COUNT_HW_CPU_CYCLES] = 32, 421 [PERF_COUNT_HW_INSTRUCTIONS] = 33, 422 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 423 [PERF_COUNT_HW_CACHE_MISSES] = -1, 424 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 425 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 426 [PERF_COUNT_HW_BUS_CYCLES] = -1, 427 }; 428 429 static void cpumf_hw_inuse(void) 430 { 431 mutex_lock(&pmc_reserve_mutex); 432 if (atomic_inc_return(&num_events) == 1) 433 __kernel_cpumcf_begin(); 434 mutex_unlock(&pmc_reserve_mutex); 435 } 436 437 static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 438 { 439 struct perf_event_attr *attr = &event->attr; 440 struct hw_perf_event *hwc = &event->hw; 441 enum cpumf_ctr_set set; 442 int err = 0; 443 u64 ev; 444 445 switch (type) { 446 case PERF_TYPE_RAW: 447 /* Raw events are used to access counters directly, 448 * hence do not permit excludes */ 449 if (attr->exclude_kernel || attr->exclude_user || 450 attr->exclude_hv) 451 return -EOPNOTSUPP; 452 ev = attr->config; 453 break; 454 455 case PERF_TYPE_HARDWARE: 456 if (is_sampling_event(event)) /* No sampling support */ 457 return -ENOENT; 458 ev = attr->config; 459 /* Count user space (problem-state) only */ 460 if (!attr->exclude_user && attr->exclude_kernel) { 461 if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 462 return -EOPNOTSUPP; 463 ev = cpumf_generic_events_user[ev]; 464 465 /* No support for kernel space counters only */ 466 } else if (!attr->exclude_kernel && attr->exclude_user) { 467 return -EOPNOTSUPP; 468 } else { /* Count user and kernel space */ 469 if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 470 return -EOPNOTSUPP; 471 ev = cpumf_generic_events_basic[ev]; 472 } 473 break; 474 475 default: 476 return -ENOENT; 477 } 478 479 if (ev == -1) 480 return -ENOENT; 481 482 if (ev > PERF_CPUM_CF_MAX_CTR) 483 return -ENOENT; 484 485 /* Obtain the counter set to which the specified counter belongs */ 486 set = get_counter_set(ev); 487 switch (set) { 488 case CPUMF_CTR_SET_BASIC: 489 case CPUMF_CTR_SET_USER: 490 case CPUMF_CTR_SET_CRYPTO: 491 case CPUMF_CTR_SET_EXT: 492 case CPUMF_CTR_SET_MT_DIAG: 493 /* 494 * Use the hardware perf event structure to store the 495 * counter number in the 'config' member and the counter 496 * set number in the 'config_base' as bit mask. 497 * It is later used to enable/disable the counter(s). 498 */ 499 hwc->config = ev; 500 hwc->config_base = cpumf_ctr_ctl[set]; 501 break; 502 case CPUMF_CTR_SET_MAX: 503 /* The counter could not be associated to a counter set */ 504 return -EINVAL; 505 } 506 507 /* Initialize for using the CPU-measurement counter facility */ 508 cpumf_hw_inuse(); 509 event->destroy = hw_perf_event_destroy; 510 511 /* Finally, validate version and authorization of the counter set */ 512 err = validate_ctr_auth(hwc); 513 if (!err) 514 err = validate_ctr_version(hwc, set); 515 516 return err; 517 } 518 519 static int cpumf_pmu_event_init(struct perf_event *event) 520 { 521 unsigned int type = event->attr.type; 522 int err; 523 524 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 525 err = __hw_perf_event_init(event, type); 526 else if (event->pmu->type == type) 527 /* Registered as unknown PMU */ 528 err = __hw_perf_event_init(event, PERF_TYPE_RAW); 529 else 530 return -ENOENT; 531 532 if (unlikely(err) && event->destroy) 533 event->destroy(event); 534 535 return err; 536 } 537 538 static int hw_perf_event_reset(struct perf_event *event) 539 { 540 u64 prev, new; 541 int err; 542 543 do { 544 prev = local64_read(&event->hw.prev_count); 545 err = ecctr(event->hw.config, &new); 546 if (err) { 547 if (err != 3) 548 break; 549 /* The counter is not (yet) available. This 550 * might happen if the counter set to which 551 * this counter belongs is in the disabled 552 * state. 553 */ 554 new = 0; 555 } 556 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 557 558 return err; 559 } 560 561 static void hw_perf_event_update(struct perf_event *event) 562 { 563 u64 prev, new, delta; 564 int err; 565 566 do { 567 prev = local64_read(&event->hw.prev_count); 568 err = ecctr(event->hw.config, &new); 569 if (err) 570 return; 571 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 572 573 delta = (prev <= new) ? new - prev 574 : (-1ULL - prev) + new + 1; /* overflow */ 575 local64_add(delta, &event->count); 576 } 577 578 static void cpumf_pmu_read(struct perf_event *event) 579 { 580 if (event->hw.state & PERF_HES_STOPPED) 581 return; 582 583 hw_perf_event_update(event); 584 } 585 586 static void cpumf_pmu_start(struct perf_event *event, int flags) 587 { 588 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 589 struct hw_perf_event *hwc = &event->hw; 590 int i; 591 592 if (!(hwc->state & PERF_HES_STOPPED)) 593 return; 594 595 hwc->state = 0; 596 597 /* (Re-)enable and activate the counter set */ 598 ctr_set_enable(&cpuhw->state, hwc->config_base); 599 ctr_set_start(&cpuhw->state, hwc->config_base); 600 601 /* The counter set to which this counter belongs can be already active. 602 * Because all counters in a set are active, the event->hw.prev_count 603 * needs to be synchronized. At this point, the counter set can be in 604 * the inactive or disabled state. 605 */ 606 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 607 cpuhw->usedss = cfdiag_getctr(cpuhw->start, 608 sizeof(cpuhw->start), 609 hwc->config_base, true); 610 } else { 611 hw_perf_event_reset(event); 612 } 613 614 /* Increment refcount for counter sets */ 615 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 616 if ((hwc->config_base & cpumf_ctr_ctl[i])) 617 atomic_inc(&cpuhw->ctr_set[i]); 618 } 619 620 /* Create perf event sample with the counter sets as raw data. The sample 621 * is then pushed to the event subsystem and the function checks for 622 * possible event overflows. If an event overflow occurs, the PMU is 623 * stopped. 624 * 625 * Return non-zero if an event overflow occurred. 626 */ 627 static int cfdiag_push_sample(struct perf_event *event, 628 struct cpu_cf_events *cpuhw) 629 { 630 struct perf_sample_data data; 631 struct perf_raw_record raw; 632 struct pt_regs regs; 633 int overflow; 634 635 /* Setup perf sample */ 636 perf_sample_data_init(&data, 0, event->hw.last_period); 637 memset(®s, 0, sizeof(regs)); 638 memset(&raw, 0, sizeof(raw)); 639 640 if (event->attr.sample_type & PERF_SAMPLE_CPU) 641 data.cpu_entry.cpu = event->cpu; 642 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 643 raw.frag.size = cpuhw->usedss; 644 raw.frag.data = cpuhw->stop; 645 raw.size = raw.frag.size; 646 data.raw = &raw; 647 } 648 649 overflow = perf_event_overflow(event, &data, ®s); 650 debug_sprintf_event(cf_dbg, 3, 651 "%s event %#llx sample_type %#llx raw %d ov %d\n", 652 __func__, event->hw.config, 653 event->attr.sample_type, raw.size, overflow); 654 if (overflow) 655 event->pmu->stop(event, 0); 656 657 perf_event_update_userpage(event); 658 return overflow; 659 } 660 661 static void cpumf_pmu_stop(struct perf_event *event, int flags) 662 { 663 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 664 struct hw_perf_event *hwc = &event->hw; 665 int i; 666 667 if (!(hwc->state & PERF_HES_STOPPED)) { 668 /* Decrement reference count for this counter set and if this 669 * is the last used counter in the set, clear activation 670 * control and set the counter set state to inactive. 671 */ 672 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 673 if (!(hwc->config_base & cpumf_ctr_ctl[i])) 674 continue; 675 if (!atomic_dec_return(&cpuhw->ctr_set[i])) 676 ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 677 } 678 hwc->state |= PERF_HES_STOPPED; 679 } 680 681 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 682 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 683 local64_inc(&event->count); 684 cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 685 sizeof(cpuhw->stop), 686 event->hw.config_base, 687 false); 688 if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 689 cfdiag_push_sample(event, cpuhw); 690 } else 691 hw_perf_event_update(event); 692 hwc->state |= PERF_HES_UPTODATE; 693 } 694 } 695 696 static int cpumf_pmu_add(struct perf_event *event, int flags) 697 { 698 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 699 700 ctr_set_enable(&cpuhw->state, event->hw.config_base); 701 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 702 703 if (flags & PERF_EF_START) 704 cpumf_pmu_start(event, PERF_EF_RELOAD); 705 706 return 0; 707 } 708 709 static void cpumf_pmu_del(struct perf_event *event, int flags) 710 { 711 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 712 int i; 713 714 cpumf_pmu_stop(event, PERF_EF_UPDATE); 715 716 /* Check if any counter in the counter set is still used. If not used, 717 * change the counter set to the disabled state. This also clears the 718 * content of all counters in the set. 719 * 720 * When a new perf event has been added but not yet started, this can 721 * clear enable control and resets all counters in a set. Therefore, 722 * cpumf_pmu_start() always has to reenable a counter set. 723 */ 724 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 725 if (!atomic_read(&cpuhw->ctr_set[i])) 726 ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 727 } 728 729 /* Performance monitoring unit for s390x */ 730 static struct pmu cpumf_pmu = { 731 .task_ctx_nr = perf_sw_context, 732 .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 733 .pmu_enable = cpumf_pmu_enable, 734 .pmu_disable = cpumf_pmu_disable, 735 .event_init = cpumf_pmu_event_init, 736 .add = cpumf_pmu_add, 737 .del = cpumf_pmu_del, 738 .start = cpumf_pmu_start, 739 .stop = cpumf_pmu_stop, 740 .read = cpumf_pmu_read, 741 }; 742 743 static int cfset_init(void); 744 static int __init cpumf_pmu_init(void) 745 { 746 int rc; 747 748 if (!kernel_cpumcf_avail()) 749 return -ENODEV; 750 751 /* Setup s390dbf facility */ 752 cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 753 if (!cf_dbg) { 754 pr_err("Registration of s390dbf(cpum_cf) failed\n"); 755 return -ENOMEM; 756 } 757 debug_register_view(cf_dbg, &debug_sprintf_view); 758 759 cpumf_pmu.attr_groups = cpumf_cf_event_group(); 760 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 761 if (rc) { 762 debug_unregister_view(cf_dbg, &debug_sprintf_view); 763 debug_unregister(cf_dbg); 764 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 765 } else if (stccm_avail()) { /* Setup counter set device */ 766 cfset_init(); 767 } 768 return rc; 769 } 770 771 /* Support for the CPU Measurement Facility counter set extraction using 772 * device /dev/hwctr. This allows user space programs to extract complete 773 * counter set via normal file operations. 774 */ 775 776 static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Excl. access */ 777 static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ 778 struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 779 unsigned int sets; /* Counter set bit mask */ 780 atomic_t cpus_ack; /* # CPUs successfully executed func */ 781 }; 782 783 static struct cfset_request { /* CPUs and counter set bit mask */ 784 unsigned long ctrset; /* Bit mask of counter set to read */ 785 cpumask_t mask; /* CPU mask to read from */ 786 } cfset_request; 787 788 static void cfset_ctrset_clear(void) 789 { 790 cpumask_clear(&cfset_request.mask); 791 cfset_request.ctrset = 0; 792 } 793 794 /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 795 * path is currently used. 796 * The cpu_cf_events::dev_state is used to denote counter sets in use by this 797 * interface. It is always or'ed in. If this interface is not active, its 798 * value is zero and no additional counter sets will be included. 799 * 800 * The cpu_cf_events::state is used by the perf_event_open SVC and remains 801 * unchanged. 802 * 803 * perf_pmu_enable() and perf_pmu_enable() and its call backs 804 * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 805 * performance measurement subsystem to enable per process 806 * CPU Measurement counter facility. 807 * The XXX_enable() and XXX_disable functions are used to turn off 808 * x86 performance monitoring interrupt (PMI) during scheduling. 809 * s390 uses these calls to temporarily stop and resume the active CPU 810 * counters sets during scheduling. 811 * 812 * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 813 * device access. The perf_event_open() SVC interface makes a lot of effort 814 * to only run the counters while the calling process is actively scheduled 815 * to run. 816 * When /dev/hwctr interface is also used at the same time, the counter sets 817 * will keep running, even when the process is scheduled off a CPU. 818 * However this is not a problem and does not lead to wrong counter values 819 * for the perf_event_open() SVC. The current counter value will be recorded 820 * during schedule-in. At schedule-out time the current counter value is 821 * extracted again and the delta is calculated and added to the event. 822 */ 823 /* Stop all counter sets via ioctl interface */ 824 static void cfset_ioctl_off(void *parm) 825 { 826 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 827 struct cfset_call_on_cpu_parm *p = parm; 828 int rc; 829 830 cpuhw->dev_state = 0; 831 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 832 if ((p->sets & cpumf_ctr_ctl[rc])) 833 atomic_dec(&cpuhw->ctr_set[rc]); 834 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 835 if (rc) 836 pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 837 cpuhw->state, S390_HWCTR_DEVICE, rc); 838 cpuhw->flags &= ~PMU_F_IN_USE; 839 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 840 __func__, rc, cpuhw->state, cpuhw->dev_state); 841 } 842 843 /* Start counter sets on particular CPU */ 844 static void cfset_ioctl_on(void *parm) 845 { 846 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 847 struct cfset_call_on_cpu_parm *p = parm; 848 int rc; 849 850 cpuhw->flags |= PMU_F_IN_USE; 851 ctr_set_enable(&cpuhw->dev_state, p->sets); 852 ctr_set_start(&cpuhw->dev_state, p->sets); 853 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 854 if ((p->sets & cpumf_ctr_ctl[rc])) 855 atomic_inc(&cpuhw->ctr_set[rc]); 856 rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 857 if (!rc) 858 atomic_inc(&p->cpus_ack); 859 else 860 pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 861 cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 862 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 863 __func__, rc, cpuhw->state, cpuhw->dev_state); 864 } 865 866 static void cfset_release_cpu(void *p) 867 { 868 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 869 int rc; 870 871 debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", 872 __func__, cpuhw->state, cpuhw->dev_state); 873 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 874 if (rc) 875 pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 876 cpuhw->state, S390_HWCTR_DEVICE, rc); 877 cpuhw->dev_state = 0; 878 } 879 880 /* Release function is also called when application gets terminated without 881 * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 882 */ 883 static int cfset_release(struct inode *inode, struct file *file) 884 { 885 on_each_cpu(cfset_release_cpu, NULL, 1); 886 hw_perf_event_destroy(NULL); 887 cfset_ctrset_clear(); 888 atomic_set(&cfset_opencnt, 0); 889 return 0; 890 } 891 892 static int cfset_open(struct inode *inode, struct file *file) 893 { 894 if (!capable(CAP_SYS_ADMIN)) 895 return -EPERM; 896 /* Only one user space program can open /dev/hwctr */ 897 if (atomic_xchg(&cfset_opencnt, 1)) 898 return -EBUSY; 899 900 cpumf_hw_inuse(); 901 file->private_data = NULL; 902 /* nonseekable_open() never fails */ 903 return nonseekable_open(inode, file); 904 } 905 906 static int cfset_all_stop(void) 907 { 908 struct cfset_call_on_cpu_parm p = { 909 .sets = cfset_request.ctrset, 910 }; 911 cpumask_var_t mask; 912 913 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 914 return -ENOMEM; 915 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 916 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 917 free_cpumask_var(mask); 918 return 0; 919 } 920 921 static int cfset_all_start(void) 922 { 923 struct cfset_call_on_cpu_parm p = { 924 .sets = cfset_request.ctrset, 925 .cpus_ack = ATOMIC_INIT(0), 926 }; 927 cpumask_var_t mask; 928 int rc = 0; 929 930 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 931 return -ENOMEM; 932 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 933 on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 934 if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 935 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 936 rc = -EIO; 937 debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); 938 } 939 free_cpumask_var(mask); 940 return rc; 941 } 942 943 944 /* Return the maximum required space for all possible CPUs in case one 945 * CPU will be onlined during the START, READ, STOP cycles. 946 * To find out the size of the counter sets, any one CPU will do. They 947 * all have the same counter sets. 948 */ 949 static size_t cfset_needspace(unsigned int sets) 950 { 951 struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); 952 size_t bytes = 0; 953 int i; 954 955 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 956 if (!(sets & cpumf_ctr_ctl[i])) 957 continue; 958 bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + 959 sizeof(((struct s390_ctrset_setdata *)0)->set) + 960 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 961 } 962 bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 963 (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 964 sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 965 put_cpu_ptr(&cpu_cf_events); 966 return bytes; 967 } 968 969 static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 970 { 971 struct s390_ctrset_read __user *ctrset_read; 972 unsigned int cpu, cpus, rc; 973 void __user *uptr; 974 975 ctrset_read = (struct s390_ctrset_read __user *)arg; 976 uptr = ctrset_read->data; 977 for_each_cpu(cpu, mask) { 978 struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); 979 struct s390_ctrset_cpudata __user *ctrset_cpudata; 980 981 ctrset_cpudata = uptr; 982 rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 983 rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 984 rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 985 cpuhw->used); 986 if (rc) 987 return -EFAULT; 988 uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 989 cond_resched(); 990 } 991 cpus = cpumask_weight(mask); 992 if (put_user(cpus, &ctrset_read->no_cpus)) 993 return -EFAULT; 994 debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, 995 uptr - (void __user *)ctrset_read->data); 996 return 0; 997 } 998 999 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 1000 int ctrset_size, size_t room) 1001 { 1002 size_t need = 0; 1003 int rc = -1; 1004 1005 need = sizeof(*p) + sizeof(u64) * ctrset_size; 1006 if (need <= room) { 1007 p->set = cpumf_ctr_ctl[ctrset]; 1008 p->no_cnts = ctrset_size; 1009 rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1010 if (rc == 3) /* Nothing stored */ 1011 need = 0; 1012 } 1013 return need; 1014 } 1015 1016 /* Read all counter sets. */ 1017 static void cfset_cpu_read(void *parm) 1018 { 1019 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1020 struct cfset_call_on_cpu_parm *p = parm; 1021 int set, set_size; 1022 size_t space; 1023 1024 /* No data saved yet */ 1025 cpuhw->used = 0; 1026 cpuhw->sets = 0; 1027 memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1028 1029 /* Scan the counter sets */ 1030 for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1031 struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1032 cpuhw->used; 1033 1034 if (!(p->sets & cpumf_ctr_ctl[set])) 1035 continue; /* Counter set not in list */ 1036 set_size = cpum_cf_ctrset_size(set, &cpuhw->info); 1037 space = sizeof(cpuhw->data) - cpuhw->used; 1038 space = cfset_cpuset_read(sp, set, set_size, space); 1039 if (space) { 1040 cpuhw->used += space; 1041 cpuhw->sets += 1; 1042 } 1043 } 1044 debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, 1045 cpuhw->sets, cpuhw->used); 1046 } 1047 1048 static int cfset_all_read(unsigned long arg) 1049 { 1050 struct cfset_call_on_cpu_parm p; 1051 cpumask_var_t mask; 1052 int rc; 1053 1054 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1055 return -ENOMEM; 1056 1057 p.sets = cfset_request.ctrset; 1058 cpumask_and(mask, &cfset_request.mask, cpu_online_mask); 1059 on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1060 rc = cfset_all_copy(arg, mask); 1061 free_cpumask_var(mask); 1062 return rc; 1063 } 1064 1065 static long cfset_ioctl_read(unsigned long arg) 1066 { 1067 struct s390_ctrset_read read; 1068 int ret = 0; 1069 1070 if (copy_from_user(&read, (char __user *)arg, sizeof(read))) 1071 return -EFAULT; 1072 ret = cfset_all_read(arg); 1073 return ret; 1074 } 1075 1076 static long cfset_ioctl_stop(void) 1077 { 1078 int ret = ENXIO; 1079 1080 if (cfset_request.ctrset) { 1081 ret = cfset_all_stop(); 1082 cfset_ctrset_clear(); 1083 } 1084 return ret; 1085 } 1086 1087 static long cfset_ioctl_start(unsigned long arg) 1088 { 1089 struct s390_ctrset_start __user *ustart; 1090 struct s390_ctrset_start start; 1091 void __user *umask; 1092 unsigned int len; 1093 int ret = 0; 1094 size_t need; 1095 1096 if (cfset_request.ctrset) 1097 return -EBUSY; 1098 ustart = (struct s390_ctrset_start __user *)arg; 1099 if (copy_from_user(&start, ustart, sizeof(start))) 1100 return -EFAULT; 1101 if (start.version != S390_HWCTR_START_VERSION) 1102 return -EINVAL; 1103 if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1104 cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1105 cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1106 cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1107 cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1108 return -EINVAL; /* Invalid counter set */ 1109 if (!start.counter_sets) 1110 return -EINVAL; /* No counter set at all? */ 1111 cpumask_clear(&cfset_request.mask); 1112 len = min_t(u64, start.cpumask_len, cpumask_size()); 1113 umask = (void __user *)start.cpumask; 1114 if (copy_from_user(&cfset_request.mask, umask, len)) 1115 return -EFAULT; 1116 if (cpumask_empty(&cfset_request.mask)) 1117 return -EINVAL; 1118 need = cfset_needspace(start.counter_sets); 1119 if (put_user(need, &ustart->data_bytes)) 1120 ret = -EFAULT; 1121 if (ret) 1122 goto out; 1123 cfset_request.ctrset = start.counter_sets; 1124 ret = cfset_all_start(); 1125 out: 1126 if (ret) 1127 cfset_ctrset_clear(); 1128 debug_sprintf_event(cf_dbg, 4, "%s sets %#lx need %ld ret %d\n", 1129 __func__, cfset_request.ctrset, need, ret); 1130 return ret; 1131 } 1132 1133 /* Entry point to the /dev/hwctr device interface. 1134 * The ioctl system call supports three subcommands: 1135 * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1136 * counter set keeps running until explicitly stopped. Returns the number 1137 * of bytes needed to store the counter values. If another S390_HWCTR_START 1138 * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 1139 * command, -EBUSY is returned. 1140 * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1141 * with the S390_HWCTR_START command. 1142 * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1143 * previous S390_HWCTR_START subcommand. 1144 */ 1145 static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1146 { 1147 int ret; 1148 1149 cpus_read_lock(); 1150 mutex_lock(&cfset_ctrset_mutex); 1151 switch (cmd) { 1152 case S390_HWCTR_START: 1153 ret = cfset_ioctl_start(arg); 1154 break; 1155 case S390_HWCTR_STOP: 1156 ret = cfset_ioctl_stop(); 1157 break; 1158 case S390_HWCTR_READ: 1159 ret = cfset_ioctl_read(arg); 1160 break; 1161 default: 1162 ret = -ENOTTY; 1163 break; 1164 } 1165 mutex_unlock(&cfset_ctrset_mutex); 1166 cpus_read_unlock(); 1167 return ret; 1168 } 1169 1170 static const struct file_operations cfset_fops = { 1171 .owner = THIS_MODULE, 1172 .open = cfset_open, 1173 .release = cfset_release, 1174 .unlocked_ioctl = cfset_ioctl, 1175 .compat_ioctl = cfset_ioctl, 1176 .llseek = no_llseek 1177 }; 1178 1179 static struct miscdevice cfset_dev = { 1180 .name = S390_HWCTR_DEVICE, 1181 .minor = MISC_DYNAMIC_MINOR, 1182 .fops = &cfset_fops, 1183 }; 1184 1185 int cfset_online_cpu(unsigned int cpu) 1186 { 1187 struct cfset_call_on_cpu_parm p; 1188 1189 mutex_lock(&cfset_ctrset_mutex); 1190 if (cfset_request.ctrset) { 1191 p.sets = cfset_request.ctrset; 1192 cfset_ioctl_on(&p); 1193 cpumask_set_cpu(cpu, &cfset_request.mask); 1194 } 1195 mutex_unlock(&cfset_ctrset_mutex); 1196 return 0; 1197 } 1198 1199 int cfset_offline_cpu(unsigned int cpu) 1200 { 1201 struct cfset_call_on_cpu_parm p; 1202 1203 mutex_lock(&cfset_ctrset_mutex); 1204 if (cfset_request.ctrset) { 1205 p.sets = cfset_request.ctrset; 1206 cfset_ioctl_off(&p); 1207 cpumask_clear_cpu(cpu, &cfset_request.mask); 1208 } 1209 mutex_unlock(&cfset_ctrset_mutex); 1210 return 0; 1211 } 1212 1213 static void cfdiag_read(struct perf_event *event) 1214 { 1215 debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, 1216 event->attr.config, local64_read(&event->count)); 1217 } 1218 1219 static int get_authctrsets(void) 1220 { 1221 struct cpu_cf_events *cpuhw; 1222 unsigned long auth = 0; 1223 enum cpumf_ctr_set i; 1224 1225 cpuhw = &get_cpu_var(cpu_cf_events); 1226 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1227 if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) 1228 auth |= cpumf_ctr_ctl[i]; 1229 } 1230 put_cpu_var(cpu_cf_events); 1231 return auth; 1232 } 1233 1234 /* Setup the event. Test for authorized counter sets and only include counter 1235 * sets which are authorized at the time of the setup. Including unauthorized 1236 * counter sets result in specification exception (and panic). 1237 */ 1238 static int cfdiag_event_init2(struct perf_event *event) 1239 { 1240 struct perf_event_attr *attr = &event->attr; 1241 int err = 0; 1242 1243 /* Set sample_period to indicate sampling */ 1244 event->hw.config = attr->config; 1245 event->hw.sample_period = attr->sample_period; 1246 local64_set(&event->hw.period_left, event->hw.sample_period); 1247 local64_set(&event->count, 0); 1248 event->hw.last_period = event->hw.sample_period; 1249 1250 /* Add all authorized counter sets to config_base. The 1251 * the hardware init function is either called per-cpu or just once 1252 * for all CPUS (event->cpu == -1). This depends on the whether 1253 * counting is started for all CPUs or on a per workload base where 1254 * the perf event moves from one CPU to another CPU. 1255 * Checking the authorization on any CPU is fine as the hardware 1256 * applies the same authorization settings to all CPUs. 1257 */ 1258 event->hw.config_base = get_authctrsets(); 1259 1260 /* No authorized counter sets, nothing to count/sample */ 1261 if (!event->hw.config_base) 1262 err = -EINVAL; 1263 1264 debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", 1265 __func__, err, event->hw.config_base); 1266 return err; 1267 } 1268 1269 static int cfdiag_event_init(struct perf_event *event) 1270 { 1271 struct perf_event_attr *attr = &event->attr; 1272 int err = -ENOENT; 1273 1274 if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1275 event->attr.type != event->pmu->type) 1276 goto out; 1277 1278 /* Raw events are used to access counters directly, 1279 * hence do not permit excludes. 1280 * This event is useless without PERF_SAMPLE_RAW to return counter set 1281 * values as raw data. 1282 */ 1283 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1284 !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1285 err = -EOPNOTSUPP; 1286 goto out; 1287 } 1288 1289 /* Initialize for using the CPU-measurement counter facility */ 1290 cpumf_hw_inuse(); 1291 event->destroy = hw_perf_event_destroy; 1292 1293 err = cfdiag_event_init2(event); 1294 if (unlikely(err)) 1295 event->destroy(event); 1296 out: 1297 return err; 1298 } 1299 1300 /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1301 * to collect the complete counter sets for a scheduled process. Target 1302 * are complete counter sets attached as raw data to the artificial event. 1303 * This results in complete counter sets available when a process is 1304 * scheduled. Contains the delta of every counter while the process was 1305 * running. 1306 */ 1307 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1308 1309 static struct attribute *cfdiag_events_attr[] = { 1310 CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1311 NULL, 1312 }; 1313 1314 PMU_FORMAT_ATTR(event, "config:0-63"); 1315 1316 static struct attribute *cfdiag_format_attr[] = { 1317 &format_attr_event.attr, 1318 NULL, 1319 }; 1320 1321 static struct attribute_group cfdiag_events_group = { 1322 .name = "events", 1323 .attrs = cfdiag_events_attr, 1324 }; 1325 static struct attribute_group cfdiag_format_group = { 1326 .name = "format", 1327 .attrs = cfdiag_format_attr, 1328 }; 1329 static const struct attribute_group *cfdiag_attr_groups[] = { 1330 &cfdiag_events_group, 1331 &cfdiag_format_group, 1332 NULL, 1333 }; 1334 1335 /* Performance monitoring unit for event CF_DIAG. Since this event 1336 * is also started and stopped via the perf_event_open() system call, use 1337 * the same event enable/disable call back functions. They do not 1338 * have a pointer to the perf_event strcture as first parameter. 1339 * 1340 * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1341 * Reuse them and distinguish the event (always first parameter) via 1342 * 'config' member. 1343 */ 1344 static struct pmu cf_diag = { 1345 .task_ctx_nr = perf_sw_context, 1346 .event_init = cfdiag_event_init, 1347 .pmu_enable = cpumf_pmu_enable, 1348 .pmu_disable = cpumf_pmu_disable, 1349 .add = cpumf_pmu_add, 1350 .del = cpumf_pmu_del, 1351 .start = cpumf_pmu_start, 1352 .stop = cpumf_pmu_stop, 1353 .read = cfdiag_read, 1354 1355 .attr_groups = cfdiag_attr_groups 1356 }; 1357 1358 /* Calculate memory needed to store all counter sets together with header and 1359 * trailer data. This is independent of the counter set authorization which 1360 * can vary depending on the configuration. 1361 */ 1362 static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1363 { 1364 size_t max_size = sizeof(struct cf_trailer_entry); 1365 enum cpumf_ctr_set i; 1366 1367 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1368 size_t size = cpum_cf_ctrset_size(i, info); 1369 1370 if (size) 1371 max_size += size * sizeof(u64) + 1372 sizeof(struct cf_ctrset_entry); 1373 } 1374 return max_size; 1375 } 1376 1377 /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1378 static void cfdiag_get_cpu_speed(void) 1379 { 1380 if (cpum_sf_avail()) { /* Sampling facility first */ 1381 struct hws_qsi_info_block si; 1382 1383 memset(&si, 0, sizeof(si)); 1384 if (!qsi(&si)) { 1385 cfdiag_cpu_speed = si.cpu_speed; 1386 return; 1387 } 1388 } 1389 1390 /* Fallback: CPU speed extract static part. Used in case 1391 * CPU Measurement Sampling Facility is turned off. 1392 */ 1393 if (test_facility(34)) { 1394 unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1395 1396 if (mhz != -1UL) 1397 cfdiag_cpu_speed = mhz & 0xffffffff; 1398 } 1399 } 1400 1401 static int cfset_init(void) 1402 { 1403 struct cpumf_ctr_info info; 1404 size_t need; 1405 int rc; 1406 1407 if (qctri(&info)) 1408 return -ENODEV; 1409 1410 cfdiag_get_cpu_speed(); 1411 /* Make sure the counter set data fits into predefined buffer. */ 1412 need = cfdiag_maxsize(&info); 1413 if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1414 pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1415 need); 1416 return -ENOMEM; 1417 } 1418 1419 rc = misc_register(&cfset_dev); 1420 if (rc) { 1421 pr_err("Registration of /dev/%s failed rc=%i\n", 1422 cfset_dev.name, rc); 1423 goto out; 1424 } 1425 1426 rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1427 if (rc) { 1428 misc_deregister(&cfset_dev); 1429 pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1430 rc); 1431 } 1432 out: 1433 return rc; 1434 } 1435 1436 device_initcall(cpumf_pmu_init); 1437