1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support - Processor Activity Instrumentation Extension 4 * Facility 5 * 6 * Copyright IBM Corp. 2022 7 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define KMSG_COMPONENT "pai_ext" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/percpu.h> 15 #include <linux/notifier.h> 16 #include <linux/init.h> 17 #include <linux/export.h> 18 #include <linux/io.h> 19 #include <linux/perf_event.h> 20 21 #include <asm/ctl_reg.h> 22 #include <asm/pai.h> 23 #include <asm/debug.h> 24 25 #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */ 26 #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */ 27 28 static debug_info_t *paiext_dbg; 29 static unsigned int paiext_cnt; /* Extracted with QPACI instruction */ 30 31 struct pai_userdata { 32 u16 num; 33 u64 value; 34 } __packed; 35 36 /* Create the PAI extension 1 control block area. 37 * The PAI extension control block 1 is pointed to by lowcore 38 * address 0x1508 for each CPU. This control block is 512 bytes in size 39 * and requires a 512 byte boundary alignment. 40 */ 41 struct paiext_cb { /* PAI extension 1 control block */ 42 u64 header; /* Not used */ 43 u64 reserved1; 44 u64 acc; /* Addr to analytics counter control block */ 45 u8 reserved2[488]; 46 } __packed; 47 48 struct paiext_map { 49 unsigned long *area; /* Area for CPU to store counters */ 50 struct pai_userdata *save; /* Area to store non-zero counters */ 51 enum paievt_mode mode; /* Type of event */ 52 unsigned int active_events; /* # of PAI Extension users */ 53 unsigned int refcnt; 54 struct perf_event *event; /* Perf event for sampling */ 55 struct paiext_cb *paiext_cb; /* PAI extension control block area */ 56 }; 57 58 struct paiext_mapptr { 59 struct paiext_map *mapptr; 60 }; 61 62 static struct paiext_root { /* Anchor to per CPU data */ 63 int refcnt; /* Overall active events */ 64 struct paiext_mapptr __percpu *mapptr; 65 } paiext_root; 66 67 /* Free per CPU data when the last event is removed. */ 68 static void paiext_root_free(void) 69 { 70 if (!--paiext_root.refcnt) { 71 free_percpu(paiext_root.mapptr); 72 paiext_root.mapptr = NULL; 73 } 74 } 75 76 /* On initialization of first event also allocate per CPU data dynamically. 77 * Start with an array of pointers, the array size is the maximum number of 78 * CPUs possible, which might be larger than the number of CPUs currently 79 * online. 80 */ 81 static int paiext_root_alloc(void) 82 { 83 if (++paiext_root.refcnt == 1) { 84 /* The memory is already zeroed. */ 85 paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); 86 if (!paiext_root.mapptr) { 87 /* Returing without refcnt adjustment is ok. The 88 * error code is handled by paiext_alloc() which 89 * decrements refcnt when an event can not be 90 * created. 91 */ 92 return -ENOMEM; 93 } 94 } 95 return 0; 96 } 97 98 /* Protects against concurrent increment of sampler and counter member 99 * increments at the same time and prohibits concurrent execution of 100 * counting and sampling events. 101 * Ensures that analytics counter block is deallocated only when the 102 * sampling and counting on that cpu is zero. 103 * For details see paiext_alloc(). 104 */ 105 static DEFINE_MUTEX(paiext_reserve_mutex); 106 107 /* Free all memory allocated for event counting/sampling setup */ 108 static void paiext_free(struct paiext_mapptr *mp) 109 { 110 kfree(mp->mapptr->area); 111 kfree(mp->mapptr->paiext_cb); 112 kvfree(mp->mapptr->save); 113 kfree(mp->mapptr); 114 mp->mapptr = NULL; 115 } 116 117 /* Release the PMU if event is the last perf event */ 118 static void paiext_event_destroy(struct perf_event *event) 119 { 120 struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 121 struct paiext_map *cpump = mp->mapptr; 122 123 mutex_lock(&paiext_reserve_mutex); 124 cpump->event = NULL; 125 if (!--cpump->refcnt) /* Last reference gone */ 126 paiext_free(mp); 127 paiext_root_free(); 128 mutex_unlock(&paiext_reserve_mutex); 129 debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__, 130 event->cpu, mp->mapptr); 131 132 } 133 134 /* Used to avoid races in checking concurrent access of counting and 135 * sampling for pai_extension events. 136 * 137 * Only one instance of event pai_ext/NNPA_ALL/ for sampling is 138 * allowed and when this event is running, no counting event is allowed. 139 * Several counting events are allowed in parallel, but no sampling event 140 * is allowed while one (or more) counting events are running. 141 * 142 * This function is called in process context and it is safe to block. 143 * When the event initialization functions fails, no other call back will 144 * be invoked. 145 * 146 * Allocate the memory for the event. 147 */ 148 static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) 149 { 150 struct paiext_mapptr *mp; 151 struct paiext_map *cpump; 152 int rc; 153 154 mutex_lock(&paiext_reserve_mutex); 155 156 rc = paiext_root_alloc(); 157 if (rc) 158 goto unlock; 159 160 mp = per_cpu_ptr(paiext_root.mapptr, event->cpu); 161 cpump = mp->mapptr; 162 if (!cpump) { /* Paiext_map allocated? */ 163 rc = -ENOMEM; 164 cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); 165 if (!cpump) 166 goto unlock; 167 168 /* Allocate memory for counter area and counter extraction. 169 * These are 170 * - a 512 byte block and requires 512 byte boundary alignment. 171 * - a 1KB byte block and requires 1KB boundary alignment. 172 * Only the first counting event has to allocate the area. 173 * 174 * Note: This works with commit 59bb47985c1d by default. 175 * Backporting this to kernels without this commit might 176 * need adjustment. 177 */ 178 mp->mapptr = cpump; 179 cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL); 180 cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL); 181 cpump->save = kvmalloc_array(paiext_cnt + 1, 182 sizeof(struct pai_userdata), 183 GFP_KERNEL); 184 if (!cpump->save || !cpump->area || !cpump->paiext_cb) { 185 paiext_free(mp); 186 goto unlock; 187 } 188 cpump->mode = a->sample_period ? PAI_MODE_SAMPLING 189 : PAI_MODE_COUNTING; 190 } else { 191 /* Multiple invocation, check whats active. 192 * Supported are multiple counter events or only one sampling 193 * event concurrently at any one time. 194 */ 195 if (cpump->mode == PAI_MODE_SAMPLING || 196 (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { 197 rc = -EBUSY; 198 goto unlock; 199 } 200 } 201 202 rc = 0; 203 cpump->event = event; 204 ++cpump->refcnt; 205 206 unlock: 207 if (rc) { 208 /* Error in allocation of event, decrement anchor. Since 209 * the event in not created, its destroy() function is never 210 * invoked. Adjust the reference counter for the anchor. 211 */ 212 paiext_root_free(); 213 } 214 mutex_unlock(&paiext_reserve_mutex); 215 /* If rc is non-zero, no increment of counter/sampler was done. */ 216 return rc; 217 } 218 219 /* The PAI extension 1 control block supports up to 128 entries. Return 220 * the index within PAIE1_CB given the event number. Also validate event 221 * number. 222 */ 223 static int paiext_event_valid(struct perf_event *event) 224 { 225 u64 cfg = event->attr.config; 226 227 if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) { 228 /* Offset NNPA in paiext_cb */ 229 event->hw.config_base = offsetof(struct paiext_cb, acc); 230 return 0; 231 } 232 return -EINVAL; 233 } 234 235 /* Might be called on different CPU than the one the event is intended for. */ 236 static int paiext_event_init(struct perf_event *event) 237 { 238 struct perf_event_attr *a = &event->attr; 239 int rc; 240 241 /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */ 242 if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type) 243 return -ENOENT; 244 /* PAI extension event must be valid and in supported range */ 245 rc = paiext_event_valid(event); 246 if (rc) 247 return rc; 248 /* Allow only CPU wide operation, no process context for now. */ 249 if (event->hw.target || event->cpu == -1) 250 return -ENOENT; 251 /* Allow only event NNPA_ALL for sampling. */ 252 if (a->sample_period && a->config != PAI_NNPA_BASE) 253 return -EINVAL; 254 /* Prohibit exclude_user event selection */ 255 if (a->exclude_user) 256 return -EINVAL; 257 258 rc = paiext_alloc(a, event); 259 if (rc) 260 return rc; 261 event->hw.last_tag = 0; 262 event->destroy = paiext_event_destroy; 263 264 if (a->sample_period) { 265 a->sample_period = 1; 266 a->freq = 0; 267 /* Register for paicrypt_sched_task() to be called */ 268 event->attach_state |= PERF_ATTACH_SCHED_CB; 269 /* Add raw data which are the memory mapped counters */ 270 a->sample_type |= PERF_SAMPLE_RAW; 271 /* Turn off inheritance */ 272 a->inherit = 0; 273 } 274 275 return 0; 276 } 277 278 static u64 paiext_getctr(struct paiext_map *cpump, int nr) 279 { 280 return cpump->area[nr]; 281 } 282 283 /* Read the counter values. Return value from location in buffer. For event 284 * NNPA_ALL sum up all events. 285 */ 286 static u64 paiext_getdata(struct perf_event *event) 287 { 288 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 289 struct paiext_map *cpump = mp->mapptr; 290 u64 sum = 0; 291 int i; 292 293 if (event->attr.config != PAI_NNPA_BASE) 294 return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); 295 296 for (i = 1; i <= paiext_cnt; i++) 297 sum += paiext_getctr(cpump, i); 298 299 return sum; 300 } 301 302 static u64 paiext_getall(struct perf_event *event) 303 { 304 return paiext_getdata(event); 305 } 306 307 static void paiext_read(struct perf_event *event) 308 { 309 u64 prev, new, delta; 310 311 prev = local64_read(&event->hw.prev_count); 312 new = paiext_getall(event); 313 local64_set(&event->hw.prev_count, new); 314 delta = new - prev; 315 local64_add(delta, &event->count); 316 } 317 318 static void paiext_start(struct perf_event *event, int flags) 319 { 320 u64 sum; 321 322 if (event->hw.last_tag) 323 return; 324 event->hw.last_tag = 1; 325 sum = paiext_getall(event); /* Get current value */ 326 local64_set(&event->hw.prev_count, sum); 327 local64_set(&event->count, 0); 328 } 329 330 static int paiext_add(struct perf_event *event, int flags) 331 { 332 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 333 struct paiext_map *cpump = mp->mapptr; 334 struct paiext_cb *pcb = cpump->paiext_cb; 335 336 if (++cpump->active_events == 1) { 337 S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb); 338 pcb->acc = virt_to_phys(cpump->area) | 0x1; 339 /* Enable CPU instruction lookup for PAIE1 control block */ 340 __ctl_set_bit(0, 49); 341 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 342 __func__, S390_lowcore.aicd, pcb->acc); 343 } 344 if (flags & PERF_EF_START && !event->attr.sample_period) { 345 /* Only counting needs initial counter value */ 346 paiext_start(event, PERF_EF_RELOAD); 347 } 348 event->hw.state = 0; 349 if (event->attr.sample_period) { 350 cpump->event = event; 351 perf_sched_cb_inc(event->pmu); 352 } 353 return 0; 354 } 355 356 static void paiext_stop(struct perf_event *event, int flags) 357 { 358 paiext_read(event); 359 event->hw.state = PERF_HES_STOPPED; 360 } 361 362 static void paiext_del(struct perf_event *event, int flags) 363 { 364 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 365 struct paiext_map *cpump = mp->mapptr; 366 struct paiext_cb *pcb = cpump->paiext_cb; 367 368 if (event->attr.sample_period) 369 perf_sched_cb_dec(event->pmu); 370 if (!event->attr.sample_period) { 371 /* Only counting needs to read counter */ 372 paiext_stop(event, PERF_EF_UPDATE); 373 } 374 if (--cpump->active_events == 0) { 375 /* Disable CPU instruction lookup for PAIE1 control block */ 376 __ctl_clear_bit(0, 49); 377 pcb->acc = 0; 378 S390_lowcore.aicd = 0; 379 debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", 380 __func__, S390_lowcore.aicd, pcb->acc); 381 } 382 } 383 384 /* Create raw data and save it in buffer. Returns number of bytes copied. 385 * Saves only positive counter entries of the form 386 * 2 bytes: Number of counter 387 * 8 bytes: Value of counter 388 */ 389 static size_t paiext_copy(struct paiext_map *cpump) 390 { 391 struct pai_userdata *userdata = cpump->save; 392 int i, outidx = 0; 393 394 for (i = 1; i <= paiext_cnt; i++) { 395 u64 val = paiext_getctr(cpump, i); 396 397 if (val) { 398 userdata[outidx].num = i; 399 userdata[outidx].value = val; 400 outidx++; 401 } 402 } 403 return outidx * sizeof(*userdata); 404 } 405 406 /* Write sample when one or more counters values are nonzero. 407 * 408 * Note: The function paiext_sched_task() and paiext_push_sample() are not 409 * invoked after function paiext_del() has been called because of function 410 * perf_sched_cb_dec(). 411 * The function paiext_sched_task() and paiext_push_sample() are only 412 * called when sampling is active. Function perf_sched_cb_inc() 413 * has been invoked to install function paiext_sched_task() as call back 414 * to run at context switch time (see paiext_add()). 415 * 416 * This causes function perf_event_context_sched_out() and 417 * perf_event_context_sched_in() to check whether the PMU has installed an 418 * sched_task() callback. That callback is not active after paiext_del() 419 * returns and has deleted the event on that CPU. 420 */ 421 static int paiext_push_sample(void) 422 { 423 struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); 424 struct paiext_map *cpump = mp->mapptr; 425 struct perf_event *event = cpump->event; 426 struct perf_sample_data data; 427 struct perf_raw_record raw; 428 struct pt_regs regs; 429 size_t rawsize; 430 int overflow; 431 432 rawsize = paiext_copy(cpump); 433 if (!rawsize) /* No incremented counters */ 434 return 0; 435 436 /* Setup perf sample */ 437 memset(®s, 0, sizeof(regs)); 438 memset(&raw, 0, sizeof(raw)); 439 memset(&data, 0, sizeof(data)); 440 perf_sample_data_init(&data, 0, event->hw.last_period); 441 if (event->attr.sample_type & PERF_SAMPLE_TID) { 442 data.tid_entry.pid = task_tgid_nr(current); 443 data.tid_entry.tid = task_pid_nr(current); 444 } 445 if (event->attr.sample_type & PERF_SAMPLE_TIME) 446 data.time = event->clock(); 447 if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) 448 data.id = event->id; 449 if (event->attr.sample_type & PERF_SAMPLE_CPU) 450 data.cpu_entry.cpu = smp_processor_id(); 451 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 452 raw.frag.size = rawsize; 453 raw.frag.data = cpump->save; 454 perf_sample_save_raw_data(&data, &raw); 455 } 456 457 overflow = perf_event_overflow(event, &data, ®s); 458 perf_event_update_userpage(event); 459 /* Clear lowcore area after read */ 460 memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ); 461 return overflow; 462 } 463 464 /* Called on schedule-in and schedule-out. No access to event structure, 465 * but for sampling only event NNPA_ALL is allowed. 466 */ 467 static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 468 { 469 /* We started with a clean page on event installation. So read out 470 * results on schedule_out and if page was dirty, clear values. 471 */ 472 if (!sched_in) 473 paiext_push_sample(); 474 } 475 476 /* Attribute definitions for pai extension1 interface. As with other CPU 477 * Measurement Facilities, there is one attribute per mapped counter. 478 * The number of mapped counters may vary per machine generation. Use 479 * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction 480 * to determine the number of mapped counters. The instructions returns 481 * a positive number, which is the highest number of supported counters. 482 * All counters less than this number are also supported, there are no 483 * holes. A returned number of zero means no support for mapped counters. 484 * 485 * The identification of the counter is a unique number. The chosen range 486 * is 0x1800 + offset in mapped kernel page. 487 * All CPU Measurement Facility counters identifiers must be unique and 488 * the numbers from 0 to 496 are already used for the CPU Measurement 489 * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography 490 * counters. 491 * Numbers 0xb0000, 0xbc000 and 0xbd000 are already 492 * used for the CPU Measurement Sampling facility. 493 */ 494 PMU_FORMAT_ATTR(event, "config:0-63"); 495 496 static struct attribute *paiext_format_attr[] = { 497 &format_attr_event.attr, 498 NULL, 499 }; 500 501 static struct attribute_group paiext_events_group = { 502 .name = "events", 503 .attrs = NULL, /* Filled in attr_event_init() */ 504 }; 505 506 static struct attribute_group paiext_format_group = { 507 .name = "format", 508 .attrs = paiext_format_attr, 509 }; 510 511 static const struct attribute_group *paiext_attr_groups[] = { 512 &paiext_events_group, 513 &paiext_format_group, 514 NULL, 515 }; 516 517 /* Performance monitoring unit for mapped counters */ 518 static struct pmu paiext = { 519 .task_ctx_nr = perf_invalid_context, 520 .event_init = paiext_event_init, 521 .add = paiext_add, 522 .del = paiext_del, 523 .start = paiext_start, 524 .stop = paiext_stop, 525 .read = paiext_read, 526 .sched_task = paiext_sched_task, 527 .attr_groups = paiext_attr_groups, 528 }; 529 530 /* List of symbolic PAI extension 1 NNPA counter names. */ 531 static const char * const paiext_ctrnames[] = { 532 [0] = "NNPA_ALL", 533 [1] = "NNPA_ADD", 534 [2] = "NNPA_SUB", 535 [3] = "NNPA_MUL", 536 [4] = "NNPA_DIV", 537 [5] = "NNPA_MIN", 538 [6] = "NNPA_MAX", 539 [7] = "NNPA_LOG", 540 [8] = "NNPA_EXP", 541 [9] = "NNPA_IBM_RESERVED_9", 542 [10] = "NNPA_RELU", 543 [11] = "NNPA_TANH", 544 [12] = "NNPA_SIGMOID", 545 [13] = "NNPA_SOFTMAX", 546 [14] = "NNPA_BATCHNORM", 547 [15] = "NNPA_MAXPOOL2D", 548 [16] = "NNPA_AVGPOOL2D", 549 [17] = "NNPA_LSTMACT", 550 [18] = "NNPA_GRUACT", 551 [19] = "NNPA_CONVOLUTION", 552 [20] = "NNPA_MATMUL_OP", 553 [21] = "NNPA_MATMUL_OP_BCAST23", 554 [22] = "NNPA_SMALLBATCH", 555 [23] = "NNPA_LARGEDIM", 556 [24] = "NNPA_SMALLTENSOR", 557 [25] = "NNPA_1MFRAME", 558 [26] = "NNPA_2GFRAME", 559 [27] = "NNPA_ACCESSEXCEPT", 560 }; 561 562 static void __init attr_event_free(struct attribute **attrs, int num) 563 { 564 struct perf_pmu_events_attr *pa; 565 struct device_attribute *dap; 566 int i; 567 568 for (i = 0; i < num; i++) { 569 dap = container_of(attrs[i], struct device_attribute, attr); 570 pa = container_of(dap, struct perf_pmu_events_attr, attr); 571 kfree(pa); 572 } 573 kfree(attrs); 574 } 575 576 static int __init attr_event_init_one(struct attribute **attrs, int num) 577 { 578 struct perf_pmu_events_attr *pa; 579 580 pa = kzalloc(sizeof(*pa), GFP_KERNEL); 581 if (!pa) 582 return -ENOMEM; 583 584 sysfs_attr_init(&pa->attr.attr); 585 pa->id = PAI_NNPA_BASE + num; 586 pa->attr.attr.name = paiext_ctrnames[num]; 587 pa->attr.attr.mode = 0444; 588 pa->attr.show = cpumf_events_sysfs_show; 589 pa->attr.store = NULL; 590 attrs[num] = &pa->attr.attr; 591 return 0; 592 } 593 594 /* Create PMU sysfs event attributes on the fly. */ 595 static int __init attr_event_init(void) 596 { 597 struct attribute **attrs; 598 int ret, i; 599 600 attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs), 601 GFP_KERNEL); 602 if (!attrs) 603 return -ENOMEM; 604 for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) { 605 ret = attr_event_init_one(attrs, i); 606 if (ret) { 607 attr_event_free(attrs, i - 1); 608 return ret; 609 } 610 } 611 attrs[i] = NULL; 612 paiext_events_group.attrs = attrs; 613 return 0; 614 } 615 616 static int __init paiext_init(void) 617 { 618 struct qpaci_info_block ib; 619 int rc = -ENOMEM; 620 621 if (!test_facility(197)) 622 return 0; 623 624 qpaci(&ib); 625 paiext_cnt = ib.num_nnpa; 626 if (paiext_cnt >= PAI_NNPA_MAXCTR) 627 paiext_cnt = PAI_NNPA_MAXCTR; 628 if (!paiext_cnt) 629 return 0; 630 631 rc = attr_event_init(); 632 if (rc) { 633 pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n"); 634 return rc; 635 } 636 637 /* Setup s390dbf facility */ 638 paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128); 639 if (!paiext_dbg) { 640 pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n"); 641 rc = -ENOMEM; 642 goto out_init; 643 } 644 debug_register_view(paiext_dbg, &debug_sprintf_view); 645 646 rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1); 647 if (rc) { 648 pr_err("Registration of " KMSG_COMPONENT " PMU failed with " 649 "rc=%i\n", rc); 650 goto out_pmu; 651 } 652 653 return 0; 654 655 out_pmu: 656 debug_unregister_view(paiext_dbg, &debug_sprintf_view); 657 debug_unregister(paiext_dbg); 658 out_init: 659 attr_event_free(paiext_events_group.attrs, 660 ARRAY_SIZE(paiext_ctrnames) + 1); 661 return rc; 662 } 663 664 device_initcall(paiext_init); 665