1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support - Processor Activity Instrumentation Extension 4 * Facility 5 * 6 * Copyright IBM Corp. 2022 7 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define KMSG_COMPONENT "pai_ext" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/percpu.h> 15 #include <linux/notifier.h> 16 #include <linux/init.h> 17 #include <linux/export.h> 18 #include <linux/io.h> 19 20 #include <asm/cpu_mcf.h> 21 #include <asm/ctl_reg.h> 22 #include <asm/pai.h> 23 #include <asm/debug.h> 24 25 #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ 26 #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ 27 28 static debug_info_t *paiext_dbg; 29 static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ 30 31 struct pai_userdata { 32 u16 num; 33 u64 value; 34 } __packed; 35 36 /* Create the PAI extension 1 control block area. 37 * The PAI extension control block 1 is pointed to by lowcore 38 * address 0x1508 for each CPU. This control block is 512 bytes in size 39 * and requires a 512 byte boundary alignment. 40 */ 41 struct paiext_cb { /* PAI extension 1 control block */ 42 u64 header; /* Not used */ 43 u64 reserved1; 44 u64 acc; /* Addr to analytics counter control block */ 45 u8 reserved2[488]; 46 } __packed; 47 48 struct paiext_map { 49 unsigned long *area; /* Area for CPU to store counters */ 50 struct pai_userdata *save; /* Area to store non-zero counters */ 51 enum paievt_mode mode; /* Type of event */ 52 unsigned int active_events; /* # of PAI Extension users */ 53 unsigned int refcnt; 54 struct perf_event *event; /* Perf event for sampling */ 55 struct paiext_cb *paiext_cb; /* PAI extension control block area */ 56 }; 57 58 struct paiext_mapptr { 59 struct paiext_map *mapptr; 60 }; 61 62 static struct paiext_root { /* Anchor to per CPU data */ 63 int refcnt; /* Overall active events */ 64 struct paiext_mapptr __percpu *mapptr; 65 } paiext_root; 66 67 /* Free per CPU data when the last event is removed. */ 68 static void paiext_root_free(void) 69 { 70 if (!--paiext_root.refcnt) { 71 free_percpu(paiext_root.mapptr); 72 paiext_root.mapptr = NULL; 73 } 74 } 75 76 /* On initialization of first event also allocate per CPU data dynamically. 77 * Start with an array of pointers, the array size is the maximum number of 78 * CPUs possible, which might be larger than the number of CPUs currently 79 * online. 80 */ 81 static int paiext_root_alloc(void) 82 { 83 if (++paiext_root.refcnt == 1) { 84 /* The memory is already zeroed. */ 85 paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); 86 if (!paiext_root.mapptr) { 87 /* Returing without refcnt adjustment is ok. The 88 * error code is handled by paiext_alloc() which 89 * decrements refcnt when an event can not be 90 * created. 91 */ 92 return -ENOMEM; 93 } 94 } 95 return 0; 96 } 97 98 /* Protects against concurrent increment of sampler and counter member 99 * increments at the same time and prohibits concurrent execution of 100 * counting and sampling events. 101 * Ensures that analytics counter block is deallocated only when the 102 * sampling and counting on that cpu is zero. 103 * For details see paiext_alloc(). 104 */ 105 static DEFINE_MUTEX(paiext_reserve_mutex); 106 107 /* Free all memory allocated for event counting/sampling setup */ 108 static void paiext_free(struct paiext_mapptr *mp) 109 { 110 kfree(mp->mapptr->area); 111 kfree(mp->mapptr->paiext_cb); 112 kvfree(mp->mapptr->save); 113 kfree(mp->mapptr); 114 mp->mapptr = NULL; 115 } 116 117 /* Release the PMU if event is the last perf event */ 118 static void paiext_event_destroy(struct perf_event *event) 119 { 120 struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 121 struct paiext_map *cpump = mp->mapptr; 122 123 mutex_lock(&paiext_reserve_mutex); 124 cpump->event = NULL; 125 if (!--cpump->refcnt) /* Last reference gone */ 126 paiext_free(mp); 127 paiext_root_free(); 128 mutex_unlock(&paiext_reserve_mutex); 129 debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__, 130 event->cpu, mp->mapptr); 131 132 } 133 134 /* Used to avoid races in checking concurrent access of counting and 135 * sampling for pai_extension events. 136 * 137 * Only one instance of event pai_ext/NNPA_ALL/ for sampling is 138 * allowed and when this event is running, no counting event is allowed. 139 * Several counting events are allowed in parallel, but no sampling event 140 * is allowed while one (or more) counting events are running. 141 * 142 * This function is called in process context and it is safe to block. 143 * When the event initialization functions fails, no other call back will 144 * be invoked. 145 * 146 * Allocate the memory for the event. 147 */ 148 static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) 149 { 150 struct paiext_mapptr *mp; 151 struct paiext_map *cpump; 152 int rc; 153 154 mutex_lock(&paiext_reserve_mutex); 155 156 rc = paiext_root_alloc(); 157 if (rc) 158 goto unlock; 159 160 mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 161 cpump = mp->mapptr; 162 if (!cpump) { /* Paiext_map allocated? */ 163 rc = -ENOMEM; 164 cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); 165 if (!cpump) 166 goto unlock; 167 168 /* Allocate memory for counter area and counter extraction. 169 * These are 170 * - a 512 byte block and requires 512 byte boundary alignment. 171 * - a 1KB byte block and requires 1KB boundary alignment. 172 * Only the first counting event has to allocate the area. 173 * 174 * Note: This works with commit 59bb47985c1d by default. 175 * Backporting this to kernels without this commit might 176 * need adjustment. 177 */ 178 mp->mapptr = cpump; 179 cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); 180 cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); 181 cpump->save = kvmalloc_array(paiext_cnt + 1, 182 sizeof(struct pai_userdata), 183 GFP_KERNEL); 184 if (!cpump->save || !cpump->area || !cpump->paiext_cb) { 185 paiext_free(mp); 186 goto unlock; 187 } 188 cpump->mode = a->sample_period ? PAI_MODE_SAMPLING 189 : PAI_MODE_COUNTING; 190 } else { 191 /* Multiple invocation, check whats active. 192 * Supported are multiple counter events or only one sampling 193 * event concurrently at any one time. 194 */ 195 if (cpump->mode == PAI_MODE_SAMPLING || 196 (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { 197 rc = -EBUSY; 198 goto unlock; 199 } 200 } 201 202 rc = 0; 203 cpump->event = event; 204 ++cpump->refcnt; 205 206 unlock: 207 if (rc) { 208 /* Error in allocation of event, decrement anchor. Since 209 * the event in not created, its destroy() function is never 210 * invoked. Adjust the reference counter for the anchor. 211 */ 212 paiext_root_free(); 213 } 214 mutex_unlock(&paiext_reserve_mutex); 215 /* If rc is non-zero, no increment of counter/sampler was done. */ 216 return rc; 217 } 218 219 /* The PAI extension 1 control block supports up to 128 entries. Return 220 * the index within PAIE1_CB given the event number. Also validate event 221 * number. 222 */ 223 static int paiext_event_valid(struct perf_event *event) 224 { 225 u64 cfg = event->attr.config; 226 227 if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { 228 /* Offset NNPA in paiext_cb */ 229 event->hw.config_base = offsetof(struct paiext_cb, acc); 230 return 0; 231 } 232 return -EINVAL; 233 } 234 235 /* Might be called on different CPU than the one the event is intended for. */ 236 static int paiext_event_init(struct perf_event *event) 237 { 238 struct perf_event_attr *a = &event->attr; 239 int rc; 240 241 /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ 242 if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) 243 return -ENOENT; 244 /* PAI extension event must be valid and in supported range */ 245 rc = paiext_event_valid(event); 246 if (rc) 247 return rc; 248 /* Allow only CPU wide operation, no process context for now. */ 249 if (event->hw.target || event->cpu == -1) 250 return -ENOENT; 251 /* Allow only event NNPA_ALL for sampling. */ 252 if (a->sample_period && a->config != PAI_NNPA_BASE) 253 return -EINVAL; 254 /* Prohibit exclude_user event selection */ 255 if (a->exclude_user) 256 return -EINVAL; 257 258 rc = paiext_alloc(a, event); 259 if (rc) 260 return rc; 261 event->hw.last_tag = 0; 262 event->destroy = paiext_event_destroy; 263 264 if (a->sample_period) { 265 a->sample_period = 1; 266 a->freq = 0; 267 /* Register for paicrypt_sched_task() to be called */ 268 event->attach_state |= PERF_ATTACH_SCHED_CB; 269 /* Add raw data which are the memory mapped counters */ 270 a->sample_type |= PERF_SAMPLE_RAW; 271 /* Turn off inheritance */ 272 a->inherit = 0; 273 } 274 275 return 0; 276 } 277 278 static u64 paiext_getctr(struct paiext_map *cpump, int nr) 279 { 280 return cpump->area[nr]; 281 } 282 283 /* Read the counter values. Return value from location in buffer. For event 284 * NNPA_ALL sum up all events. 285 */ 286 static u64 paiext_getdata(struct perf_event *event) 287 { 288 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 289 struct paiext_map *cpump = mp->mapptr; 290 u64 sum = 0; 291 int i; 292 293 if (event->attr.config != PAI_NNPA_BASE) 294 return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); 295 296 for (i = 1; i <= paiext_cnt; i++) 297 sum += paiext_getctr(cpump, i); 298 299 return sum; 300 } 301 302 static u64 paiext_getall(struct perf_event *event) 303 { 304 return paiext_getdata(event); 305 } 306 307 static void paiext_read(struct perf_event *event) 308 { 309 u64 prev, new, delta; 310 311 prev = local64_read(&event->hw.prev_count); 312 new = paiext_getall(event); 313 local64_set(&event->hw.prev_count, new); 314 delta = new - prev; 315 local64_add(delta, &event->count); 316 } 317 318 static void paiext_start(struct perf_event *event, int flags) 319 { 320 u64 sum; 321 322 if (event->hw.last_tag) 323 return; 324 event->hw.last_tag = 1; 325 sum = paiext_getall(event); /* Get current value */ 326 local64_set(&event->hw.prev_count, sum); 327 local64_set(&event->count, 0); 328 } 329 330 static int paiext_add(struct perf_event *event, int flags) 331 { 332 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 333 struct paiext_map *cpump = mp->mapptr; 334 struct paiext_cb *pcb = cpump->paiext_cb; 335 336 if (++cpump->active_events == 1) { 337 S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); 338 pcb->acc = virt_to_phys(cpump->area) | 0x1; 339 /* Enable CPU instruction lookup for PAIE1 control block */ 340 __ctl_set_bit(0, 49); 341 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 342 __func__, S390_lowcore.aicd, pcb->acc); 343 } 344 if (flags & PERF_EF_START && !event->attr.sample_period) { 345 /* Only counting needs initial counter value */ 346 paiext_start(event, PERF_EF_RELOAD); 347 } 348 event->hw.state = 0; 349 if (event->attr.sample_period) { 350 cpump->event = event; 351 perf_sched_cb_inc(event->pmu); 352 } 353 return 0; 354 } 355 356 static void paiext_stop(struct perf_event *event, int flags) 357 { 358 paiext_read(event); 359 event->hw.state = PERF_HES_STOPPED; 360 } 361 362 static void paiext_del(struct perf_event *event, int flags) 363 { 364 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 365 struct paiext_map *cpump = mp->mapptr; 366 struct paiext_cb *pcb = cpump->paiext_cb; 367 368 if (event->attr.sample_period) 369 perf_sched_cb_dec(event->pmu); 370 if (!event->attr.sample_period) { 371 /* Only counting needs to read counter */ 372 paiext_stop(event, PERF_EF_UPDATE); 373 } 374 if (--cpump->active_events == 0) { 375 /* Disable CPU instruction lookup for PAIE1 control block */ 376 __ctl_clear_bit(0, 49); 377 pcb->acc = 0; 378 S390_lowcore.aicd = 0; 379 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 380 __func__, S390_lowcore.aicd, pcb->acc); 381 } 382 } 383 384 /* Create raw data and save it in buffer. Returns number of bytes copied. 385 * Saves only positive counter entries of the form 386 * 2 bytes: Number of counter 387 * 8 bytes: Value of counter 388 */ 389 static size_t paiext_copy(struct paiext_map *cpump) 390 { 391 struct pai_userdata *userdata = cpump->save; 392 int i, outidx = 0; 393 394 for (i = 1; i <= paiext_cnt; i++) { 395 u64 val = paiext_getctr(cpump, i); 396 397 if (val) { 398 userdata[outidx].num = i; 399 userdata[outidx].value = val; 400 outidx++; 401 } 402 } 403 return outidx * sizeof(*userdata); 404 } 405 406 /* Write sample when one or more counters values are nonzero. 407 * 408 * Note: The function paiext_sched_task() and paiext_push_sample() are not 409 * invoked after function paiext_del() has been called because of function 410 * perf_sched_cb_dec(). 411 * The function paiext_sched_task() and paiext_push_sample() are only 412 * called when sampling is active. Function perf_sched_cb_inc() 413 * has been invoked to install function paiext_sched_task() as call back 414 * to run at context switch time (see paiext_add()). 415 * 416 * This causes function perf_event_context_sched_out() and 417 * perf_event_context_sched_in() to check whether the PMU has installed an 418 * sched_task() callback. That callback is not active after paiext_del() 419 * returns and has deleted the event on that CPU. 420 */ 421 static int paiext_push_sample(void) 422 { 423 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 424 struct paiext_map *cpump = mp->mapptr; 425 struct perf_event *event = cpump->event; 426 struct perf_sample_data data; 427 struct perf_raw_record raw; 428 struct pt_regs regs; 429 size_t rawsize; 430 int overflow; 431 432 rawsize = paiext_copy(cpump); 433 if (!rawsize) /* No incremented counters */ 434 return 0; 435 436 /* Setup perf sample */ 437 memset(®s, 0, sizeof(regs)); 438 memset(&raw, 0, sizeof(raw)); 439 memset(&data, 0, sizeof(data)); 440 perf_sample_data_init(&data, 0, event->hw.last_period); 441 if (event->attr.sample_type & PERF_SAMPLE_TID) { 442 data.tid_entry.pid = task_tgid_nr(current); 443 data.tid_entry.tid = task_pid_nr(current); 444 } 445 if (event->attr.sample_type & PERF_SAMPLE_TIME) 446 data.time = event->clock(); 447 if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) 448 data.id = event->id; 449 if (event->attr.sample_type & PERF_SAMPLE_CPU) 450 data.cpu_entry.cpu = smp_processor_id(); 451 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 452 raw.frag.size = rawsize; 453 raw.frag.data = cpump->save; 454 raw.size = raw.frag.size; 455 data.raw = &raw; 456 data.sample_flags |= PERF_SAMPLE_RAW; 457 } 458 459 overflow = perf_event_overflow(event, &data, ®s); 460 perf_event_update_userpage(event); 461 /* Clear lowcore area after read */ 462 memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ); 463 return overflow; 464 } 465 466 /* Called on schedule-in and schedule-out. No access to event structure, 467 * but for sampling only event NNPA_ALL is allowed. 468 */ 469 static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 470 { 471 /* We started with a clean page on event installation. So read out 472 * results on schedule_out and if page was dirty, clear values. 473 */ 474 if (!sched_in) 475 paiext_push_sample(); 476 } 477 478 /* Attribute definitions for pai extension1 interface. As with other CPU 479 * Measurement Facilities, there is one attribute per mapped counter. 480 * The number of mapped counters may vary per machine generation. Use 481 * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction 482 * to determine the number of mapped counters. The instructions returns 483 * a positive number, which is the highest number of supported counters. 484 * All counters less than this number are also supported, there are no 485 * holes. A returned number of zero means no support for mapped counters. 486 * 487 * The identification of the counter is a unique number. The chosen range 488 * is 0x1800 + offset in mapped kernel page. 489 * All CPU Measurement Facility counters identifiers must be unique and 490 * the numbers from 0 to 496 are already used for the CPU Measurement 491 * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography 492 * counters. 493 * Numbers 0xb0000, 0xbc000 and 0xbd000 are already 494 * used for the CPU Measurement Sampling facility. 495 */ 496 PMU_FORMAT_ATTR(event, "config:0-63"); 497 498 static struct attribute *paiext_format_attr[] = { 499 &format_attr_event.attr, 500 NULL, 501 }; 502 503 static struct attribute_group paiext_events_group = { 504 .name = "events", 505 .attrs = NULL, /* Filled in attr_event_init() */ 506 }; 507 508 static struct attribute_group paiext_format_group = { 509 .name = "format", 510 .attrs = paiext_format_attr, 511 }; 512 513 static const struct attribute_group *paiext_attr_groups[] = { 514 &paiext_events_group, 515 &paiext_format_group, 516 NULL, 517 }; 518 519 /* Performance monitoring unit for mapped counters */ 520 static struct pmu paiext = { 521 .task_ctx_nr = perf_invalid_context, 522 .event_init = paiext_event_init, 523 .add = paiext_add, 524 .del = paiext_del, 525 .start = paiext_start, 526 .stop = paiext_stop, 527 .read = paiext_read, 528 .sched_task = paiext_sched_task, 529 .attr_groups = paiext_attr_groups, 530 }; 531 532 /* List of symbolic PAI extension 1 NNPA counter names. */ 533 static const char * const paiext_ctrnames[] = { 534 [0] = "NNPA_ALL", 535 [1] = "NNPA_ADD", 536 [2] = "NNPA_SUB", 537 [3] = "NNPA_MUL", 538 [4] = "NNPA_DIV", 539 [5] = "NNPA_MIN", 540 [6] = "NNPA_MAX", 541 [7] = "NNPA_LOG", 542 [8] = "NNPA_EXP", 543 [9] = "NNPA_IBM_RESERVED_9", 544 [10] = "NNPA_RELU", 545 [11] = "NNPA_TANH", 546 [12] = "NNPA_SIGMOID", 547 [13] = "NNPA_SOFTMAX", 548 [14] = "NNPA_BATCHNORM", 549 [15] = "NNPA_MAXPOOL2D", 550 [16] = "NNPA_AVGPOOL2D", 551 [17] = "NNPA_LSTMACT", 552 [18] = "NNPA_GRUACT", 553 [19] = "NNPA_CONVOLUTION", 554 [20] = "NNPA_MATMUL_OP", 555 [21] = "NNPA_MATMUL_OP_BCAST23", 556 [22] = "NNPA_SMALLBATCH", 557 [23] = "NNPA_LARGEDIM", 558 [24] = "NNPA_SMALLTENSOR", 559 [25] = "NNPA_1MFRAME", 560 [26] = "NNPA_2GFRAME", 561 [27] = "NNPA_ACCESSEXCEPT", 562 }; 563 564 static void __init attr_event_free(struct attribute **attrs, int num) 565 { 566 struct perf_pmu_events_attr *pa; 567 struct device_attribute *dap; 568 int i; 569 570 for (i = 0; i < num; i++) { 571 dap = container_of(attrs[i], struct device_attribute, attr); 572 pa = container_of(dap, struct perf_pmu_events_attr, attr); 573 kfree(pa); 574 } 575 kfree(attrs); 576 } 577 578 static int __init attr_event_init_one(struct attribute **attrs, int num) 579 { 580 struct perf_pmu_events_attr *pa; 581 582 pa = kzalloc(sizeof(*pa), GFP_KERNEL); 583 if (!pa) 584 return -ENOMEM; 585 586 sysfs_attr_init(&pa->attr.attr); 587 pa->id = PAI_NNPA_BASE + num; 588 pa->attr.attr.name = paiext_ctrnames[num]; 589 pa->attr.attr.mode = 0444; 590 pa->attr.show = cpumf_events_sysfs_show; 591 pa->attr.store = NULL; 592 attrs[num] = &pa->attr.attr; 593 return 0; 594 } 595 596 /* Create PMU sysfs event attributes on the fly. */ 597 static int __init attr_event_init(void) 598 { 599 struct attribute **attrs; 600 int ret, i; 601 602 attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs), 603 GFP_KERNEL); 604 if (!attrs) 605 return -ENOMEM; 606 for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { 607 ret = attr_event_init_one(attrs, i); 608 if (ret) { 609 attr_event_free(attrs, i - 1); 610 return ret; 611 } 612 } 613 attrs[i] = NULL; 614 paiext_events_group.attrs = attrs; 615 return 0; 616 } 617 618 static int __init paiext_init(void) 619 { 620 struct qpaci_info_block ib; 621 int rc = -ENOMEM; 622 623 if (!test_facility(197)) 624 return 0; 625 626 qpaci(&ib); 627 paiext_cnt = ib.num_nnpa; 628 if (paiext_cnt >= PAI_NNPA_MAXCTR) 629 paiext_cnt = PAI_NNPA_MAXCTR; 630 if (!paiext_cnt) 631 return 0; 632 633 rc = attr_event_init(); 634 if (rc) { 635 pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); 636 return rc; 637 } 638 639 /* Setup s390dbf facility */ 640 paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); 641 if (!paiext_dbg) { 642 pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); 643 rc = -ENOMEM; 644 goto out_init; 645 } 646 debug_register_view(paiext_dbg, &debug_sprintf_view); 647 648 rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); 649 if (rc) { 650 pr_err("Registration of " KMSG_COMPONENT " PMU failed with " 651 "rc=%i\n", rc); 652 goto out_pmu; 653 } 654 655 return 0; 656 657 out_pmu: 658 debug_unregister_view(paiext_dbg, &debug_sprintf_view); 659 debug_unregister(paiext_dbg); 660 out_init: 661 attr_event_free(paiext_events_group.attrs, 662 ARRAY_SIZE(paiext_ctrnames) + 1); 663 return rc; 664 } 665 666 device_initcall(paiext_init); 667