1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support for s390x - CPU-measurement Counter Facility 4 * 5 * Copyright IBM Corp. 2012, 2021 6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 * Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define KMSG_COMPONENT "cpum_cf" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/percpu.h> 15 #include <linux/notifier.h> 16 #include <linux/init.h> 17 #include <linux/export.h> 18 #include <linux/miscdevice.h> 19 20 #include <asm/cpu_mcf.h> 21 #include <asm/hwctrset.h> 22 #include <asm/debug.h> 23 24 static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 25 static debug_info_t *cf_dbg; 26 27 #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 28 /* interval in seconds */ 29 30 /* Counter sets are stored as data stream in a page sized memory buffer and 31 * exported to user space via raw data attached to the event sample data. 32 * Each counter set starts with an eight byte header consisting of: 33 * - a two byte eye catcher (0xfeef) 34 * - a one byte counter set number 35 * - a two byte counter set size (indicates the number of counters in this set) 36 * - a three byte reserved value (must be zero) to make the header the same 37 * size as a counter value. 38 * All counter values are eight byte in size. 39 * 40 * All counter sets are followed by a 64 byte trailer. 41 * The trailer consists of a: 42 * - flag field indicating valid fields when corresponding bit set 43 * - the counter facility first and second version number 44 * - the CPU speed if nonzero 45 * - the time stamp the counter sets have been collected 46 * - the time of day (TOD) base value 47 * - the machine type. 48 * 49 * The counter sets are saved when the process is prepared to be executed on a 50 * CPU and saved again when the process is going to be removed from a CPU. 51 * The difference of both counter sets are calculated and stored in the event 52 * sample data area. 53 */ 54 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 55 unsigned int def:16; /* 0-15 Data Entry Format */ 56 unsigned int set:16; /* 16-31 Counter set identifier */ 57 unsigned int ctr:16; /* 32-47 Number of stored counters */ 58 unsigned int res1:16; /* 48-63 Reserved */ 59 }; 60 61 struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 62 /* 0 - 7 */ 63 union { 64 struct { 65 unsigned int clock_base:1; /* TOD clock base set */ 66 unsigned int speed:1; /* CPU speed set */ 67 /* Measurement alerts */ 68 unsigned int mtda:1; /* Loss of MT ctr. data alert */ 69 unsigned int caca:1; /* Counter auth. change alert */ 70 unsigned int lcda:1; /* Loss of counter data alert */ 71 }; 72 unsigned long flags; /* 0-63 All indicators */ 73 }; 74 /* 8 - 15 */ 75 unsigned int cfvn:16; /* 64-79 Ctr First Version */ 76 unsigned int csvn:16; /* 80-95 Ctr Second Version */ 77 unsigned int cpu_speed:32; /* 96-127 CPU speed */ 78 /* 16 - 23 */ 79 unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 80 /* 24 - 55 */ 81 union { 82 struct { 83 unsigned long progusage1; 84 unsigned long progusage2; 85 unsigned long progusage3; 86 unsigned long tod_base; 87 }; 88 unsigned long progusage[4]; 89 }; 90 /* 56 - 63 */ 91 unsigned int mach_type:16; /* Machine type */ 92 unsigned int res1:16; /* Reserved */ 93 unsigned int res2:32; /* Reserved */ 94 }; 95 96 /* Create the trailer data at the end of a page. */ 97 static void cfdiag_trailer(struct cf_trailer_entry *te) 98 { 99 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 100 struct cpuid cpuid; 101 102 te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ 103 te->csvn = cpuhw->info.csvn; 104 105 get_cpu_id(&cpuid); /* Machine type */ 106 te->mach_type = cpuid.machine; 107 te->cpu_speed = cfdiag_cpu_speed; 108 if (te->cpu_speed) 109 te->speed = 1; 110 te->clock_base = 1; /* Save clock base */ 111 te->tod_base = tod_clock_base.tod; 112 te->timestamp = get_tod_clock_fast(); 113 } 114 115 /* Read a counter set. The counter set number determines the counter set and 116 * the CPUM-CF first and second version number determine the number of 117 * available counters in each counter set. 118 * Each counter set starts with header containing the counter set number and 119 * the number of eight byte counters. 120 * 121 * The functions returns the number of bytes occupied by this counter set 122 * including the header. 123 * If there is no counter in the counter set, this counter set is useless and 124 * zero is returned on this case. 125 * 126 * Note that the counter sets may not be enabled or active and the stcctm 127 * instruction might return error 3. Depending on error_ok value this is ok, 128 * for example when called from cpumf_pmu_start() call back function. 129 */ 130 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 131 size_t room, bool error_ok) 132 { 133 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 134 size_t ctrset_size, need = 0; 135 int rc = 3; /* Assume write failure */ 136 137 ctrdata->def = CF_DIAG_CTRSET_DEF; 138 ctrdata->set = ctrset; 139 ctrdata->res1 = 0; 140 ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info); 141 142 if (ctrset_size) { /* Save data */ 143 need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 144 if (need <= room) { 145 rc = ctr_stcctm(ctrset, ctrset_size, 146 (u64 *)(ctrdata + 1)); 147 } 148 if (rc != 3 || error_ok) 149 ctrdata->ctr = ctrset_size; 150 else 151 need = 0; 152 } 153 154 debug_sprintf_event(cf_dbg, 3, 155 "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" 156 " need %zd rc %d\n", __func__, ctrset, ctrset_size, 157 cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); 158 return need; 159 } 160 161 static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { 162 [CPUMF_CTR_SET_BASIC] = 0x02, 163 [CPUMF_CTR_SET_USER] = 0x04, 164 [CPUMF_CTR_SET_CRYPTO] = 0x08, 165 [CPUMF_CTR_SET_EXT] = 0x01, 166 [CPUMF_CTR_SET_MT_DIAG] = 0x20, 167 }; 168 169 /* Read out all counter sets and save them in the provided data buffer. 170 * The last 64 byte host an artificial trailer entry. 171 */ 172 static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 173 bool error_ok) 174 { 175 struct cf_trailer_entry *trailer; 176 size_t offset = 0, done; 177 int i; 178 179 memset(data, 0, sz); 180 sz -= sizeof(*trailer); /* Always room for trailer */ 181 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 182 struct cf_ctrset_entry *ctrdata = data + offset; 183 184 if (!(auth & cpumf_ctr_ctl[i])) 185 continue; /* Counter set not authorized */ 186 187 done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 188 offset += done; 189 } 190 trailer = data + offset; 191 cfdiag_trailer(trailer); 192 return offset + sizeof(*trailer); 193 } 194 195 /* Calculate the difference for each counter in a counter set. */ 196 static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 197 { 198 for (; --counters >= 0; ++pstart, ++pstop) 199 if (*pstop >= *pstart) 200 *pstop -= *pstart; 201 else 202 *pstop = *pstart - *pstop + 1; 203 } 204 205 /* Scan the counter sets and calculate the difference of each counter 206 * in each set. The result is the increment of each counter during the 207 * period the counter set has been activated. 208 * 209 * Return true on success. 210 */ 211 static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 212 { 213 struct cf_trailer_entry *trailer_start, *trailer_stop; 214 struct cf_ctrset_entry *ctrstart, *ctrstop; 215 size_t offset = 0; 216 217 auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; 218 do { 219 ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 220 ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 221 222 if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 223 pr_err_once("cpum_cf_diag counter set compare error " 224 "in set %i\n", ctrstart->set); 225 return 0; 226 } 227 auth &= ~cpumf_ctr_ctl[ctrstart->set]; 228 if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 229 cfdiag_diffctrset((u64 *)(ctrstart + 1), 230 (u64 *)(ctrstop + 1), ctrstart->ctr); 231 offset += ctrstart->ctr * sizeof(u64) + 232 sizeof(*ctrstart); 233 } 234 } while (ctrstart->def && auth); 235 236 /* Save time_stamp from start of event in stop's trailer */ 237 trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 238 trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 239 trailer_stop->progusage[0] = trailer_start->timestamp; 240 241 return 1; 242 } 243 244 static enum cpumf_ctr_set get_counter_set(u64 event) 245 { 246 int set = CPUMF_CTR_SET_MAX; 247 248 if (event < 32) 249 set = CPUMF_CTR_SET_BASIC; 250 else if (event < 64) 251 set = CPUMF_CTR_SET_USER; 252 else if (event < 128) 253 set = CPUMF_CTR_SET_CRYPTO; 254 else if (event < 288) 255 set = CPUMF_CTR_SET_EXT; 256 else if (event >= 448 && event < 496) 257 set = CPUMF_CTR_SET_MT_DIAG; 258 259 return set; 260 } 261 262 static int validate_ctr_version(const struct hw_perf_event *hwc, 263 enum cpumf_ctr_set set) 264 { 265 struct cpu_cf_events *cpuhw; 266 int err = 0; 267 u16 mtdiag_ctl; 268 269 cpuhw = &get_cpu_var(cpu_cf_events); 270 271 /* check required version for counter sets */ 272 switch (set) { 273 case CPUMF_CTR_SET_BASIC: 274 case CPUMF_CTR_SET_USER: 275 if (cpuhw->info.cfvn < 1) 276 err = -EOPNOTSUPP; 277 break; 278 case CPUMF_CTR_SET_CRYPTO: 279 if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 && 280 hwc->config > 79) || 281 (cpuhw->info.csvn >= 6 && hwc->config > 83)) 282 err = -EOPNOTSUPP; 283 break; 284 case CPUMF_CTR_SET_EXT: 285 if (cpuhw->info.csvn < 1) 286 err = -EOPNOTSUPP; 287 if ((cpuhw->info.csvn == 1 && hwc->config > 159) || 288 (cpuhw->info.csvn == 2 && hwc->config > 175) || 289 (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5 290 && hwc->config > 255) || 291 (cpuhw->info.csvn >= 6 && hwc->config > 287)) 292 err = -EOPNOTSUPP; 293 break; 294 case CPUMF_CTR_SET_MT_DIAG: 295 if (cpuhw->info.csvn <= 3) 296 err = -EOPNOTSUPP; 297 /* 298 * MT-diagnostic counters are read-only. The counter set 299 * is automatically enabled and activated on all CPUs with 300 * multithreading (SMT). Deactivation of multithreading 301 * also disables the counter set. State changes are ignored 302 * by lcctl(). Because Linux controls SMT enablement through 303 * a kernel parameter only, the counter set is either disabled 304 * or enabled and active. 305 * 306 * Thus, the counters can only be used if SMT is on and the 307 * counter set is enabled and active. 308 */ 309 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 310 if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && 311 (cpuhw->info.enable_ctl & mtdiag_ctl) && 312 (cpuhw->info.act_ctl & mtdiag_ctl))) 313 err = -EOPNOTSUPP; 314 break; 315 case CPUMF_CTR_SET_MAX: 316 err = -EOPNOTSUPP; 317 } 318 319 put_cpu_var(cpu_cf_events); 320 return err; 321 } 322 323 static int validate_ctr_auth(const struct hw_perf_event *hwc) 324 { 325 struct cpu_cf_events *cpuhw; 326 int err = 0; 327 328 cpuhw = &get_cpu_var(cpu_cf_events); 329 330 /* Check authorization for cpu counter sets. 331 * If the particular CPU counter set is not authorized, 332 * return with -ENOENT in order to fall back to other 333 * PMUs that might suffice the event request. 334 */ 335 if (!(hwc->config_base & cpuhw->info.auth_ctl)) 336 err = -ENOENT; 337 338 put_cpu_var(cpu_cf_events); 339 return err; 340 } 341 342 /* 343 * Change the CPUMF state to active. 344 * Enable and activate the CPU-counter sets according 345 * to the per-cpu control state. 346 */ 347 static void cpumf_pmu_enable(struct pmu *pmu) 348 { 349 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 350 int err; 351 352 if (cpuhw->flags & PMU_F_ENABLED) 353 return; 354 355 err = lcctl(cpuhw->state | cpuhw->dev_state); 356 if (err) { 357 pr_err("Enabling the performance measuring unit " 358 "failed with rc=%x\n", err); 359 return; 360 } 361 362 cpuhw->flags |= PMU_F_ENABLED; 363 } 364 365 /* 366 * Change the CPUMF state to inactive. 367 * Disable and enable (inactive) the CPU-counter sets according 368 * to the per-cpu control state. 369 */ 370 static void cpumf_pmu_disable(struct pmu *pmu) 371 { 372 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 373 int err; 374 u64 inactive; 375 376 if (!(cpuhw->flags & PMU_F_ENABLED)) 377 return; 378 379 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 380 inactive |= cpuhw->dev_state; 381 err = lcctl(inactive); 382 if (err) { 383 pr_err("Disabling the performance measuring unit " 384 "failed with rc=%x\n", err); 385 return; 386 } 387 388 cpuhw->flags &= ~PMU_F_ENABLED; 389 } 390 391 392 /* Number of perf events counting hardware events */ 393 static atomic_t num_events = ATOMIC_INIT(0); 394 /* Used to avoid races in calling reserve/release_cpumf_hardware */ 395 static DEFINE_MUTEX(pmc_reserve_mutex); 396 397 /* Release the PMU if event is the last perf event */ 398 static void hw_perf_event_destroy(struct perf_event *event) 399 { 400 if (!atomic_add_unless(&num_events, -1, 1)) { 401 mutex_lock(&pmc_reserve_mutex); 402 if (atomic_dec_return(&num_events) == 0) 403 __kernel_cpumcf_end(); 404 mutex_unlock(&pmc_reserve_mutex); 405 } 406 } 407 408 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 409 static const int cpumf_generic_events_basic[] = { 410 [PERF_COUNT_HW_CPU_CYCLES] = 0, 411 [PERF_COUNT_HW_INSTRUCTIONS] = 1, 412 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 413 [PERF_COUNT_HW_CACHE_MISSES] = -1, 414 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 415 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 416 [PERF_COUNT_HW_BUS_CYCLES] = -1, 417 }; 418 /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 419 static const int cpumf_generic_events_user[] = { 420 [PERF_COUNT_HW_CPU_CYCLES] = 32, 421 [PERF_COUNT_HW_INSTRUCTIONS] = 33, 422 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 423 [PERF_COUNT_HW_CACHE_MISSES] = -1, 424 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 425 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 426 [PERF_COUNT_HW_BUS_CYCLES] = -1, 427 }; 428 429 static void cpumf_hw_inuse(void) 430 { 431 mutex_lock(&pmc_reserve_mutex); 432 if (atomic_inc_return(&num_events) == 1) 433 __kernel_cpumcf_begin(); 434 mutex_unlock(&pmc_reserve_mutex); 435 } 436 437 static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 438 { 439 struct perf_event_attr *attr = &event->attr; 440 struct hw_perf_event *hwc = &event->hw; 441 enum cpumf_ctr_set set; 442 int err = 0; 443 u64 ev; 444 445 switch (type) { 446 case PERF_TYPE_RAW: 447 /* Raw events are used to access counters directly, 448 * hence do not permit excludes */ 449 if (attr->exclude_kernel || attr->exclude_user || 450 attr->exclude_hv) 451 return -EOPNOTSUPP; 452 ev = attr->config; 453 break; 454 455 case PERF_TYPE_HARDWARE: 456 if (is_sampling_event(event)) /* No sampling support */ 457 return -ENOENT; 458 ev = attr->config; 459 /* Count user space (problem-state) only */ 460 if (!attr->exclude_user && attr->exclude_kernel) { 461 if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 462 return -EOPNOTSUPP; 463 ev = cpumf_generic_events_user[ev]; 464 465 /* No support for kernel space counters only */ 466 } else if (!attr->exclude_kernel && attr->exclude_user) { 467 return -EOPNOTSUPP; 468 } else { /* Count user and kernel space */ 469 if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 470 return -EOPNOTSUPP; 471 ev = cpumf_generic_events_basic[ev]; 472 } 473 break; 474 475 default: 476 return -ENOENT; 477 } 478 479 if (ev == -1) 480 return -ENOENT; 481 482 if (ev > PERF_CPUM_CF_MAX_CTR) 483 return -ENOENT; 484 485 /* Obtain the counter set to which the specified counter belongs */ 486 set = get_counter_set(ev); 487 switch (set) { 488 case CPUMF_CTR_SET_BASIC: 489 case CPUMF_CTR_SET_USER: 490 case CPUMF_CTR_SET_CRYPTO: 491 case CPUMF_CTR_SET_EXT: 492 case CPUMF_CTR_SET_MT_DIAG: 493 /* 494 * Use the hardware perf event structure to store the 495 * counter number in the 'config' member and the counter 496 * set number in the 'config_base' as bit mask. 497 * It is later used to enable/disable the counter(s). 498 */ 499 hwc->config = ev; 500 hwc->config_base = cpumf_ctr_ctl[set]; 501 break; 502 case CPUMF_CTR_SET_MAX: 503 /* The counter could not be associated to a counter set */ 504 return -EINVAL; 505 } 506 507 /* Initialize for using the CPU-measurement counter facility */ 508 cpumf_hw_inuse(); 509 event->destroy = hw_perf_event_destroy; 510 511 /* Finally, validate version and authorization of the counter set */ 512 err = validate_ctr_auth(hwc); 513 if (!err) 514 err = validate_ctr_version(hwc, set); 515 516 return err; 517 } 518 519 static int cpumf_pmu_event_init(struct perf_event *event) 520 { 521 unsigned int type = event->attr.type; 522 int err; 523 524 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 525 err = __hw_perf_event_init(event, type); 526 else if (event->pmu->type == type) 527 /* Registered as unknown PMU */ 528 err = __hw_perf_event_init(event, PERF_TYPE_RAW); 529 else 530 return -ENOENT; 531 532 if (unlikely(err) && event->destroy) 533 event->destroy(event); 534 535 return err; 536 } 537 538 static int hw_perf_event_reset(struct perf_event *event) 539 { 540 u64 prev, new; 541 int err; 542 543 do { 544 prev = local64_read(&event->hw.prev_count); 545 err = ecctr(event->hw.config, &new); 546 if (err) { 547 if (err != 3) 548 break; 549 /* The counter is not (yet) available. This 550 * might happen if the counter set to which 551 * this counter belongs is in the disabled 552 * state. 553 */ 554 new = 0; 555 } 556 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 557 558 return err; 559 } 560 561 static void hw_perf_event_update(struct perf_event *event) 562 { 563 u64 prev, new, delta; 564 int err; 565 566 do { 567 prev = local64_read(&event->hw.prev_count); 568 err = ecctr(event->hw.config, &new); 569 if (err) 570 return; 571 } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); 572 573 delta = (prev <= new) ? new - prev 574 : (-1ULL - prev) + new + 1; /* overflow */ 575 local64_add(delta, &event->count); 576 } 577 578 static void cpumf_pmu_read(struct perf_event *event) 579 { 580 if (event->hw.state & PERF_HES_STOPPED) 581 return; 582 583 hw_perf_event_update(event); 584 } 585 586 static void cpumf_pmu_start(struct perf_event *event, int flags) 587 { 588 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 589 struct hw_perf_event *hwc = &event->hw; 590 int i; 591 592 if (!(hwc->state & PERF_HES_STOPPED)) 593 return; 594 595 hwc->state = 0; 596 597 /* (Re-)enable and activate the counter set */ 598 ctr_set_enable(&cpuhw->state, hwc->config_base); 599 ctr_set_start(&cpuhw->state, hwc->config_base); 600 601 /* The counter set to which this counter belongs can be already active. 602 * Because all counters in a set are active, the event->hw.prev_count 603 * needs to be synchronized. At this point, the counter set can be in 604 * the inactive or disabled state. 605 */ 606 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 607 cpuhw->usedss = cfdiag_getctr(cpuhw->start, 608 sizeof(cpuhw->start), 609 hwc->config_base, true); 610 } else { 611 hw_perf_event_reset(event); 612 } 613 614 /* Increment refcount for counter sets */ 615 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 616 if ((hwc->config_base & cpumf_ctr_ctl[i])) 617 atomic_inc(&cpuhw->ctr_set[i]); 618 } 619 620 /* Create perf event sample with the counter sets as raw data. The sample 621 * is then pushed to the event subsystem and the function checks for 622 * possible event overflows. If an event overflow occurs, the PMU is 623 * stopped. 624 * 625 * Return non-zero if an event overflow occurred. 626 */ 627 static int cfdiag_push_sample(struct perf_event *event, 628 struct cpu_cf_events *cpuhw) 629 { 630 struct perf_sample_data data; 631 struct perf_raw_record raw; 632 struct pt_regs regs; 633 int overflow; 634 635 /* Setup perf sample */ 636 perf_sample_data_init(&data, 0, event->hw.last_period); 637 memset(®s, 0, sizeof(regs)); 638 memset(&raw, 0, sizeof(raw)); 639 640 if (event->attr.sample_type & PERF_SAMPLE_CPU) 641 data.cpu_entry.cpu = event->cpu; 642 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 643 raw.frag.size = cpuhw->usedss; 644 raw.frag.data = cpuhw->stop; 645 raw.size = raw.frag.size; 646 data.raw = &raw; 647 } 648 649 overflow = perf_event_overflow(event, &data, ®s); 650 debug_sprintf_event(cf_dbg, 3, 651 "%s event %#llx sample_type %#llx raw %d ov %d\n", 652 __func__, event->hw.config, 653 event->attr.sample_type, raw.size, overflow); 654 if (overflow) 655 event->pmu->stop(event, 0); 656 657 perf_event_update_userpage(event); 658 return overflow; 659 } 660 661 static void cpumf_pmu_stop(struct perf_event *event, int flags) 662 { 663 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 664 struct hw_perf_event *hwc = &event->hw; 665 int i; 666 667 if (!(hwc->state & PERF_HES_STOPPED)) { 668 /* Decrement reference count for this counter set and if this 669 * is the last used counter in the set, clear activation 670 * control and set the counter set state to inactive. 671 */ 672 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 673 if (!(hwc->config_base & cpumf_ctr_ctl[i])) 674 continue; 675 if (!atomic_dec_return(&cpuhw->ctr_set[i])) 676 ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 677 } 678 hwc->state |= PERF_HES_STOPPED; 679 } 680 681 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 682 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 683 local64_inc(&event->count); 684 cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 685 sizeof(cpuhw->stop), 686 event->hw.config_base, 687 false); 688 if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 689 cfdiag_push_sample(event, cpuhw); 690 } else if (cpuhw->flags & PMU_F_RESERVED) { 691 /* Only update when PMU not hotplugged off */ 692 hw_perf_event_update(event); 693 } 694 hwc->state |= PERF_HES_UPTODATE; 695 } 696 } 697 698 static int cpumf_pmu_add(struct perf_event *event, int flags) 699 { 700 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 701 702 ctr_set_enable(&cpuhw->state, event->hw.config_base); 703 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 704 705 if (flags & PERF_EF_START) 706 cpumf_pmu_start(event, PERF_EF_RELOAD); 707 708 return 0; 709 } 710 711 static void cpumf_pmu_del(struct perf_event *event, int flags) 712 { 713 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 714 int i; 715 716 cpumf_pmu_stop(event, PERF_EF_UPDATE); 717 718 /* Check if any counter in the counter set is still used. If not used, 719 * change the counter set to the disabled state. This also clears the 720 * content of all counters in the set. 721 * 722 * When a new perf event has been added but not yet started, this can 723 * clear enable control and resets all counters in a set. Therefore, 724 * cpumf_pmu_start() always has to reenable a counter set. 725 */ 726 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 727 if (!atomic_read(&cpuhw->ctr_set[i])) 728 ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 729 } 730 731 /* Performance monitoring unit for s390x */ 732 static struct pmu cpumf_pmu = { 733 .task_ctx_nr = perf_sw_context, 734 .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 735 .pmu_enable = cpumf_pmu_enable, 736 .pmu_disable = cpumf_pmu_disable, 737 .event_init = cpumf_pmu_event_init, 738 .add = cpumf_pmu_add, 739 .del = cpumf_pmu_del, 740 .start = cpumf_pmu_start, 741 .stop = cpumf_pmu_stop, 742 .read = cpumf_pmu_read, 743 }; 744 745 static int cfset_init(void); 746 static int __init cpumf_pmu_init(void) 747 { 748 int rc; 749 750 if (!kernel_cpumcf_avail()) 751 return -ENODEV; 752 753 /* Setup s390dbf facility */ 754 cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); 755 if (!cf_dbg) { 756 pr_err("Registration of s390dbf(cpum_cf) failed\n"); 757 return -ENOMEM; 758 } 759 debug_register_view(cf_dbg, &debug_sprintf_view); 760 761 cpumf_pmu.attr_groups = cpumf_cf_event_group(); 762 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 763 if (rc) { 764 debug_unregister_view(cf_dbg, &debug_sprintf_view); 765 debug_unregister(cf_dbg); 766 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 767 } else if (stccm_avail()) { /* Setup counter set device */ 768 cfset_init(); 769 } 770 return rc; 771 } 772 773 /* Support for the CPU Measurement Facility counter set extraction using 774 * device /dev/hwctr. This allows user space programs to extract complete 775 * counter set via normal file operations. 776 */ 777 778 static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ 779 static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ 780 struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 781 unsigned int sets; /* Counter set bit mask */ 782 atomic_t cpus_ack; /* # CPUs successfully executed func */ 783 }; 784 785 static struct cfset_session { /* CPUs and counter set bit mask */ 786 struct list_head head; /* Head of list of active processes */ 787 } cfset_session = { 788 .head = LIST_HEAD_INIT(cfset_session.head) 789 }; 790 791 struct cfset_request { /* CPUs and counter set bit mask */ 792 unsigned long ctrset; /* Bit mask of counter set to read */ 793 cpumask_t mask; /* CPU mask to read from */ 794 struct list_head node; /* Chain to cfset_session.head */ 795 }; 796 797 static void cfset_session_init(void) 798 { 799 INIT_LIST_HEAD(&cfset_session.head); 800 } 801 802 /* Remove current request from global bookkeeping. Maintain a counter set bit 803 * mask on a per CPU basis. 804 * Done in process context under mutex protection. 805 */ 806 static void cfset_session_del(struct cfset_request *p) 807 { 808 list_del(&p->node); 809 } 810 811 /* Add current request to global bookkeeping. Maintain a counter set bit mask 812 * on a per CPU basis. 813 * Done in process context under mutex protection. 814 */ 815 static void cfset_session_add(struct cfset_request *p) 816 { 817 list_add(&p->node, &cfset_session.head); 818 } 819 820 /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 821 * path is currently used. 822 * The cpu_cf_events::dev_state is used to denote counter sets in use by this 823 * interface. It is always or'ed in. If this interface is not active, its 824 * value is zero and no additional counter sets will be included. 825 * 826 * The cpu_cf_events::state is used by the perf_event_open SVC and remains 827 * unchanged. 828 * 829 * perf_pmu_enable() and perf_pmu_enable() and its call backs 830 * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 831 * performance measurement subsystem to enable per process 832 * CPU Measurement counter facility. 833 * The XXX_enable() and XXX_disable functions are used to turn off 834 * x86 performance monitoring interrupt (PMI) during scheduling. 835 * s390 uses these calls to temporarily stop and resume the active CPU 836 * counters sets during scheduling. 837 * 838 * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 839 * device access. The perf_event_open() SVC interface makes a lot of effort 840 * to only run the counters while the calling process is actively scheduled 841 * to run. 842 * When /dev/hwctr interface is also used at the same time, the counter sets 843 * will keep running, even when the process is scheduled off a CPU. 844 * However this is not a problem and does not lead to wrong counter values 845 * for the perf_event_open() SVC. The current counter value will be recorded 846 * during schedule-in. At schedule-out time the current counter value is 847 * extracted again and the delta is calculated and added to the event. 848 */ 849 /* Stop all counter sets via ioctl interface */ 850 static void cfset_ioctl_off(void *parm) 851 { 852 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 853 struct cfset_call_on_cpu_parm *p = parm; 854 int rc; 855 856 /* Check if any counter set used by /dev/hwc */ 857 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 858 if ((p->sets & cpumf_ctr_ctl[rc])) { 859 if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { 860 ctr_set_disable(&cpuhw->dev_state, 861 cpumf_ctr_ctl[rc]); 862 ctr_set_stop(&cpuhw->dev_state, 863 cpumf_ctr_ctl[rc]); 864 } 865 } 866 /* Keep perf_event_open counter sets */ 867 rc = lcctl(cpuhw->dev_state | cpuhw->state); 868 if (rc) 869 pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 870 cpuhw->state, S390_HWCTR_DEVICE, rc); 871 if (!cpuhw->dev_state) 872 cpuhw->flags &= ~PMU_F_IN_USE; 873 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 874 __func__, rc, cpuhw->state, cpuhw->dev_state); 875 } 876 877 /* Start counter sets on particular CPU */ 878 static void cfset_ioctl_on(void *parm) 879 { 880 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 881 struct cfset_call_on_cpu_parm *p = parm; 882 int rc; 883 884 cpuhw->flags |= PMU_F_IN_USE; 885 ctr_set_enable(&cpuhw->dev_state, p->sets); 886 ctr_set_start(&cpuhw->dev_state, p->sets); 887 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 888 if ((p->sets & cpumf_ctr_ctl[rc])) 889 atomic_inc(&cpuhw->ctr_set[rc]); 890 rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 891 if (!rc) 892 atomic_inc(&p->cpus_ack); 893 else 894 pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 895 cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 896 debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n", 897 __func__, rc, cpuhw->state, cpuhw->dev_state); 898 } 899 900 static void cfset_release_cpu(void *p) 901 { 902 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 903 int rc; 904 905 debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", 906 __func__, cpuhw->state, cpuhw->dev_state); 907 cpuhw->dev_state = 0; 908 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 909 if (rc) 910 pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 911 cpuhw->state, S390_HWCTR_DEVICE, rc); 912 } 913 914 /* This modifies the process CPU mask to adopt it to the currently online 915 * CPUs. Offline CPUs can not be addresses. This call terminates the access 916 * and is usually followed by close() or a new iotcl(..., START, ...) which 917 * creates a new request structure. 918 */ 919 static void cfset_all_stop(struct cfset_request *req) 920 { 921 struct cfset_call_on_cpu_parm p = { 922 .sets = req->ctrset, 923 }; 924 925 cpumask_and(&req->mask, &req->mask, cpu_online_mask); 926 on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); 927 } 928 929 /* Release function is also called when application gets terminated without 930 * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 931 */ 932 static int cfset_release(struct inode *inode, struct file *file) 933 { 934 mutex_lock(&cfset_ctrset_mutex); 935 /* Open followed by close/exit has no private_data */ 936 if (file->private_data) { 937 cfset_all_stop(file->private_data); 938 cfset_session_del(file->private_data); 939 kfree(file->private_data); 940 file->private_data = NULL; 941 } 942 if (!atomic_dec_return(&cfset_opencnt)) 943 on_each_cpu(cfset_release_cpu, NULL, 1); 944 mutex_unlock(&cfset_ctrset_mutex); 945 946 hw_perf_event_destroy(NULL); 947 return 0; 948 } 949 950 static int cfset_open(struct inode *inode, struct file *file) 951 { 952 if (!capable(CAP_SYS_ADMIN)) 953 return -EPERM; 954 mutex_lock(&cfset_ctrset_mutex); 955 if (atomic_inc_return(&cfset_opencnt) == 1) 956 cfset_session_init(); 957 mutex_unlock(&cfset_ctrset_mutex); 958 959 cpumf_hw_inuse(); 960 file->private_data = NULL; 961 /* nonseekable_open() never fails */ 962 return nonseekable_open(inode, file); 963 } 964 965 static int cfset_all_start(struct cfset_request *req) 966 { 967 struct cfset_call_on_cpu_parm p = { 968 .sets = req->ctrset, 969 .cpus_ack = ATOMIC_INIT(0), 970 }; 971 cpumask_var_t mask; 972 int rc = 0; 973 974 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 975 return -ENOMEM; 976 cpumask_and(mask, &req->mask, cpu_online_mask); 977 on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 978 if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 979 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 980 rc = -EIO; 981 debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__); 982 } 983 free_cpumask_var(mask); 984 return rc; 985 } 986 987 988 /* Return the maximum required space for all possible CPUs in case one 989 * CPU will be onlined during the START, READ, STOP cycles. 990 * To find out the size of the counter sets, any one CPU will do. They 991 * all have the same counter sets. 992 */ 993 static size_t cfset_needspace(unsigned int sets) 994 { 995 struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); 996 size_t bytes = 0; 997 int i; 998 999 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1000 if (!(sets & cpumf_ctr_ctl[i])) 1001 continue; 1002 bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) + 1003 sizeof(((struct s390_ctrset_setdata *)0)->set) + 1004 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 1005 } 1006 bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 1007 (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 1008 sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 1009 put_cpu_ptr(&cpu_cf_events); 1010 return bytes; 1011 } 1012 1013 static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 1014 { 1015 struct s390_ctrset_read __user *ctrset_read; 1016 unsigned int cpu, cpus, rc; 1017 void __user *uptr; 1018 1019 ctrset_read = (struct s390_ctrset_read __user *)arg; 1020 uptr = ctrset_read->data; 1021 for_each_cpu(cpu, mask) { 1022 struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); 1023 struct s390_ctrset_cpudata __user *ctrset_cpudata; 1024 1025 ctrset_cpudata = uptr; 1026 rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 1027 rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 1028 rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 1029 cpuhw->used); 1030 if (rc) 1031 return -EFAULT; 1032 uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 1033 cond_resched(); 1034 } 1035 cpus = cpumask_weight(mask); 1036 if (put_user(cpus, &ctrset_read->no_cpus)) 1037 return -EFAULT; 1038 debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__, 1039 uptr - (void __user *)ctrset_read->data); 1040 return 0; 1041 } 1042 1043 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 1044 int ctrset_size, size_t room) 1045 { 1046 size_t need = 0; 1047 int rc = -1; 1048 1049 need = sizeof(*p) + sizeof(u64) * ctrset_size; 1050 if (need <= room) { 1051 p->set = cpumf_ctr_ctl[ctrset]; 1052 p->no_cnts = ctrset_size; 1053 rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1054 if (rc == 3) /* Nothing stored */ 1055 need = 0; 1056 } 1057 return need; 1058 } 1059 1060 /* Read all counter sets. */ 1061 static void cfset_cpu_read(void *parm) 1062 { 1063 struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); 1064 struct cfset_call_on_cpu_parm *p = parm; 1065 int set, set_size; 1066 size_t space; 1067 1068 /* No data saved yet */ 1069 cpuhw->used = 0; 1070 cpuhw->sets = 0; 1071 memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1072 1073 /* Scan the counter sets */ 1074 for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1075 struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1076 cpuhw->used; 1077 1078 if (!(p->sets & cpumf_ctr_ctl[set])) 1079 continue; /* Counter set not in list */ 1080 set_size = cpum_cf_ctrset_size(set, &cpuhw->info); 1081 space = sizeof(cpuhw->data) - cpuhw->used; 1082 space = cfset_cpuset_read(sp, set, set_size, space); 1083 if (space) { 1084 cpuhw->used += space; 1085 cpuhw->sets += 1; 1086 } 1087 } 1088 debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, 1089 cpuhw->sets, cpuhw->used); 1090 } 1091 1092 static int cfset_all_read(unsigned long arg, struct cfset_request *req) 1093 { 1094 struct cfset_call_on_cpu_parm p; 1095 cpumask_var_t mask; 1096 int rc; 1097 1098 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1099 return -ENOMEM; 1100 1101 p.sets = req->ctrset; 1102 cpumask_and(mask, &req->mask, cpu_online_mask); 1103 on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1104 rc = cfset_all_copy(arg, mask); 1105 free_cpumask_var(mask); 1106 return rc; 1107 } 1108 1109 static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) 1110 { 1111 struct s390_ctrset_read read; 1112 int ret = -ENODATA; 1113 1114 if (req && req->ctrset) { 1115 if (copy_from_user(&read, (char __user *)arg, sizeof(read))) 1116 return -EFAULT; 1117 ret = cfset_all_read(arg, req); 1118 } 1119 return ret; 1120 } 1121 1122 static long cfset_ioctl_stop(struct file *file) 1123 { 1124 struct cfset_request *req = file->private_data; 1125 int ret = -ENXIO; 1126 1127 if (req) { 1128 cfset_all_stop(req); 1129 cfset_session_del(req); 1130 kfree(req); 1131 file->private_data = NULL; 1132 ret = 0; 1133 } 1134 return ret; 1135 } 1136 1137 static long cfset_ioctl_start(unsigned long arg, struct file *file) 1138 { 1139 struct s390_ctrset_start __user *ustart; 1140 struct s390_ctrset_start start; 1141 struct cfset_request *preq; 1142 void __user *umask; 1143 unsigned int len; 1144 int ret = 0; 1145 size_t need; 1146 1147 if (file->private_data) 1148 return -EBUSY; 1149 ustart = (struct s390_ctrset_start __user *)arg; 1150 if (copy_from_user(&start, ustart, sizeof(start))) 1151 return -EFAULT; 1152 if (start.version != S390_HWCTR_START_VERSION) 1153 return -EINVAL; 1154 if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1155 cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1156 cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1157 cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1158 cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1159 return -EINVAL; /* Invalid counter set */ 1160 if (!start.counter_sets) 1161 return -EINVAL; /* No counter set at all? */ 1162 1163 preq = kzalloc(sizeof(*preq), GFP_KERNEL); 1164 if (!preq) 1165 return -ENOMEM; 1166 cpumask_clear(&preq->mask); 1167 len = min_t(u64, start.cpumask_len, cpumask_size()); 1168 umask = (void __user *)start.cpumask; 1169 if (copy_from_user(&preq->mask, umask, len)) { 1170 kfree(preq); 1171 return -EFAULT; 1172 } 1173 if (cpumask_empty(&preq->mask)) { 1174 kfree(preq); 1175 return -EINVAL; 1176 } 1177 need = cfset_needspace(start.counter_sets); 1178 if (put_user(need, &ustart->data_bytes)) { 1179 kfree(preq); 1180 return -EFAULT; 1181 } 1182 preq->ctrset = start.counter_sets; 1183 ret = cfset_all_start(preq); 1184 if (!ret) { 1185 cfset_session_add(preq); 1186 file->private_data = preq; 1187 debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n", 1188 __func__, preq->ctrset, need, ret); 1189 } else { 1190 kfree(preq); 1191 } 1192 return ret; 1193 } 1194 1195 /* Entry point to the /dev/hwctr device interface. 1196 * The ioctl system call supports three subcommands: 1197 * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1198 * counter set keeps running until explicitly stopped. Returns the number 1199 * of bytes needed to store the counter values. If another S390_HWCTR_START 1200 * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 1201 * command on the same file descriptor, -EBUSY is returned. 1202 * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1203 * with the S390_HWCTR_START command. 1204 * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1205 * previous S390_HWCTR_START subcommand. 1206 */ 1207 static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1208 { 1209 int ret; 1210 1211 cpus_read_lock(); 1212 mutex_lock(&cfset_ctrset_mutex); 1213 switch (cmd) { 1214 case S390_HWCTR_START: 1215 ret = cfset_ioctl_start(arg, file); 1216 break; 1217 case S390_HWCTR_STOP: 1218 ret = cfset_ioctl_stop(file); 1219 break; 1220 case S390_HWCTR_READ: 1221 ret = cfset_ioctl_read(arg, file->private_data); 1222 break; 1223 default: 1224 ret = -ENOTTY; 1225 break; 1226 } 1227 mutex_unlock(&cfset_ctrset_mutex); 1228 cpus_read_unlock(); 1229 return ret; 1230 } 1231 1232 static const struct file_operations cfset_fops = { 1233 .owner = THIS_MODULE, 1234 .open = cfset_open, 1235 .release = cfset_release, 1236 .unlocked_ioctl = cfset_ioctl, 1237 .compat_ioctl = cfset_ioctl, 1238 .llseek = no_llseek 1239 }; 1240 1241 static struct miscdevice cfset_dev = { 1242 .name = S390_HWCTR_DEVICE, 1243 .minor = MISC_DYNAMIC_MINOR, 1244 .fops = &cfset_fops, 1245 }; 1246 1247 /* Hotplug add of a CPU. Scan through all active processes and add 1248 * that CPU to the list of CPUs supplied with ioctl(..., START, ...). 1249 */ 1250 int cfset_online_cpu(unsigned int cpu) 1251 { 1252 struct cfset_call_on_cpu_parm p; 1253 struct cfset_request *rp; 1254 1255 mutex_lock(&cfset_ctrset_mutex); 1256 if (!list_empty(&cfset_session.head)) { 1257 list_for_each_entry(rp, &cfset_session.head, node) { 1258 p.sets = rp->ctrset; 1259 cfset_ioctl_on(&p); 1260 cpumask_set_cpu(cpu, &rp->mask); 1261 } 1262 } 1263 mutex_unlock(&cfset_ctrset_mutex); 1264 return 0; 1265 } 1266 1267 /* Hotplug remove of a CPU. Scan through all active processes and clear 1268 * that CPU from the list of CPUs supplied with ioctl(..., START, ...). 1269 */ 1270 int cfset_offline_cpu(unsigned int cpu) 1271 { 1272 struct cfset_call_on_cpu_parm p; 1273 struct cfset_request *rp; 1274 1275 mutex_lock(&cfset_ctrset_mutex); 1276 if (!list_empty(&cfset_session.head)) { 1277 list_for_each_entry(rp, &cfset_session.head, node) { 1278 p.sets = rp->ctrset; 1279 cfset_ioctl_off(&p); 1280 cpumask_clear_cpu(cpu, &rp->mask); 1281 } 1282 } 1283 mutex_unlock(&cfset_ctrset_mutex); 1284 return 0; 1285 } 1286 1287 static void cfdiag_read(struct perf_event *event) 1288 { 1289 debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__, 1290 event->attr.config, local64_read(&event->count)); 1291 } 1292 1293 static int get_authctrsets(void) 1294 { 1295 struct cpu_cf_events *cpuhw; 1296 unsigned long auth = 0; 1297 enum cpumf_ctr_set i; 1298 1299 cpuhw = &get_cpu_var(cpu_cf_events); 1300 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1301 if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) 1302 auth |= cpumf_ctr_ctl[i]; 1303 } 1304 put_cpu_var(cpu_cf_events); 1305 return auth; 1306 } 1307 1308 /* Setup the event. Test for authorized counter sets and only include counter 1309 * sets which are authorized at the time of the setup. Including unauthorized 1310 * counter sets result in specification exception (and panic). 1311 */ 1312 static int cfdiag_event_init2(struct perf_event *event) 1313 { 1314 struct perf_event_attr *attr = &event->attr; 1315 int err = 0; 1316 1317 /* Set sample_period to indicate sampling */ 1318 event->hw.config = attr->config; 1319 event->hw.sample_period = attr->sample_period; 1320 local64_set(&event->hw.period_left, event->hw.sample_period); 1321 local64_set(&event->count, 0); 1322 event->hw.last_period = event->hw.sample_period; 1323 1324 /* Add all authorized counter sets to config_base. The 1325 * the hardware init function is either called per-cpu or just once 1326 * for all CPUS (event->cpu == -1). This depends on the whether 1327 * counting is started for all CPUs or on a per workload base where 1328 * the perf event moves from one CPU to another CPU. 1329 * Checking the authorization on any CPU is fine as the hardware 1330 * applies the same authorization settings to all CPUs. 1331 */ 1332 event->hw.config_base = get_authctrsets(); 1333 1334 /* No authorized counter sets, nothing to count/sample */ 1335 if (!event->hw.config_base) 1336 err = -EINVAL; 1337 1338 debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n", 1339 __func__, err, event->hw.config_base); 1340 return err; 1341 } 1342 1343 static int cfdiag_event_init(struct perf_event *event) 1344 { 1345 struct perf_event_attr *attr = &event->attr; 1346 int err = -ENOENT; 1347 1348 if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1349 event->attr.type != event->pmu->type) 1350 goto out; 1351 1352 /* Raw events are used to access counters directly, 1353 * hence do not permit excludes. 1354 * This event is useless without PERF_SAMPLE_RAW to return counter set 1355 * values as raw data. 1356 */ 1357 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1358 !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1359 err = -EOPNOTSUPP; 1360 goto out; 1361 } 1362 1363 /* Initialize for using the CPU-measurement counter facility */ 1364 cpumf_hw_inuse(); 1365 event->destroy = hw_perf_event_destroy; 1366 1367 err = cfdiag_event_init2(event); 1368 if (unlikely(err)) 1369 event->destroy(event); 1370 out: 1371 return err; 1372 } 1373 1374 /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1375 * to collect the complete counter sets for a scheduled process. Target 1376 * are complete counter sets attached as raw data to the artificial event. 1377 * This results in complete counter sets available when a process is 1378 * scheduled. Contains the delta of every counter while the process was 1379 * running. 1380 */ 1381 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1382 1383 static struct attribute *cfdiag_events_attr[] = { 1384 CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1385 NULL, 1386 }; 1387 1388 PMU_FORMAT_ATTR(event, "config:0-63"); 1389 1390 static struct attribute *cfdiag_format_attr[] = { 1391 &format_attr_event.attr, 1392 NULL, 1393 }; 1394 1395 static struct attribute_group cfdiag_events_group = { 1396 .name = "events", 1397 .attrs = cfdiag_events_attr, 1398 }; 1399 static struct attribute_group cfdiag_format_group = { 1400 .name = "format", 1401 .attrs = cfdiag_format_attr, 1402 }; 1403 static const struct attribute_group *cfdiag_attr_groups[] = { 1404 &cfdiag_events_group, 1405 &cfdiag_format_group, 1406 NULL, 1407 }; 1408 1409 /* Performance monitoring unit for event CF_DIAG. Since this event 1410 * is also started and stopped via the perf_event_open() system call, use 1411 * the same event enable/disable call back functions. They do not 1412 * have a pointer to the perf_event strcture as first parameter. 1413 * 1414 * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1415 * Reuse them and distinguish the event (always first parameter) via 1416 * 'config' member. 1417 */ 1418 static struct pmu cf_diag = { 1419 .task_ctx_nr = perf_sw_context, 1420 .event_init = cfdiag_event_init, 1421 .pmu_enable = cpumf_pmu_enable, 1422 .pmu_disable = cpumf_pmu_disable, 1423 .add = cpumf_pmu_add, 1424 .del = cpumf_pmu_del, 1425 .start = cpumf_pmu_start, 1426 .stop = cpumf_pmu_stop, 1427 .read = cfdiag_read, 1428 1429 .attr_groups = cfdiag_attr_groups 1430 }; 1431 1432 /* Calculate memory needed to store all counter sets together with header and 1433 * trailer data. This is independent of the counter set authorization which 1434 * can vary depending on the configuration. 1435 */ 1436 static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1437 { 1438 size_t max_size = sizeof(struct cf_trailer_entry); 1439 enum cpumf_ctr_set i; 1440 1441 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1442 size_t size = cpum_cf_ctrset_size(i, info); 1443 1444 if (size) 1445 max_size += size * sizeof(u64) + 1446 sizeof(struct cf_ctrset_entry); 1447 } 1448 return max_size; 1449 } 1450 1451 /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1452 static void cfdiag_get_cpu_speed(void) 1453 { 1454 unsigned long mhz; 1455 1456 if (cpum_sf_avail()) { /* Sampling facility first */ 1457 struct hws_qsi_info_block si; 1458 1459 memset(&si, 0, sizeof(si)); 1460 if (!qsi(&si)) { 1461 cfdiag_cpu_speed = si.cpu_speed; 1462 return; 1463 } 1464 } 1465 1466 /* Fallback: CPU speed extract static part. Used in case 1467 * CPU Measurement Sampling Facility is turned off. 1468 */ 1469 mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1470 if (mhz != -1UL) 1471 cfdiag_cpu_speed = mhz & 0xffffffff; 1472 } 1473 1474 static int cfset_init(void) 1475 { 1476 struct cpumf_ctr_info info; 1477 size_t need; 1478 int rc; 1479 1480 if (qctri(&info)) 1481 return -ENODEV; 1482 1483 cfdiag_get_cpu_speed(); 1484 /* Make sure the counter set data fits into predefined buffer. */ 1485 need = cfdiag_maxsize(&info); 1486 if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1487 pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1488 need); 1489 return -ENOMEM; 1490 } 1491 1492 rc = misc_register(&cfset_dev); 1493 if (rc) { 1494 pr_err("Registration of /dev/%s failed rc=%i\n", 1495 cfset_dev.name, rc); 1496 goto out; 1497 } 1498 1499 rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1500 if (rc) { 1501 misc_deregister(&cfset_dev); 1502 pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1503 rc); 1504 } 1505 out: 1506 return rc; 1507 } 1508 1509 device_initcall(cpumf_pmu_init); 1510