1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2018 4 * Auxtrace support for s390 CPU-Measurement Sampling Facility 5 * 6 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 7 * 8 * Auxiliary traces are collected during 'perf record' using rbd000 event. 9 * Several PERF_RECORD_XXX are generated during recording: 10 * 11 * PERF_RECORD_AUX: 12 * Records that new data landed in the AUX buffer part. 13 * PERF_RECORD_AUXTRACE: 14 * Defines auxtrace data. Followed by the actual data. The contents of 15 * the auxtrace data is dependent on the event and the CPU. 16 * This record is generated by perf record command. For details 17 * see Documentation/perf.data-file-format.txt. 18 * PERF_RECORD_AUXTRACE_INFO: 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 * record is generated during 'perf record' command. Each record contains 21 * up to 256 entries describing offset and size of the AUXTRACE data in the 22 * perf.data file. 23 * PERF_RECORD_AUXTRACE_ERROR: 24 * Indicates an error during AUXTRACE collection such as buffer overflow. 25 * PERF_RECORD_FINISHED_ROUND: 26 * Perf events are not necessarily in time stamp order, as they can be 27 * collected in parallel on different CPUs. If the events should be 28 * processed in time order they need to be sorted first. 29 * Perf report guarantees that there is no reordering over a 30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a 31 * time stamp lower than this record are processed (and displayed) before 32 * the succeeding perf record are processed. 33 * 34 * These records are evaluated during perf report command. 35 * 36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for 37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() 38 * below. 39 * Auxiliary trace data is collected per CPU. To merge the data into the report 40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace 41 * data is in ascending order. 42 * 43 * Each queue has a double linked list of auxtrace_buffers. This list contains 44 * the offset and size of a CPU's auxtrace data. During auxtrace processing 45 * the data portion is mmap()'ed. 46 * 47 * To sort the queues in chronological order, all queue access is controlled 48 * by the auxtrace_heap. This is basically a stack, each stack element has two 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 * (nearest) time stamp is at the top. That sort order is maintained at all 52 * times! 53 * 54 * After the auxtrace infrastructure has been setup, the auxtrace queues are 55 * filled with data (offset/size pairs) and the auxtrace_heap is populated. 56 * 57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. 58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of 59 * the perf record is compared with the time stamp located on the auxtrace_heap 60 * top element. If that time stamp is lower than the time stamp from the 61 * record sample, the auxtrace queues will be processed. As auxtrace queues 62 * control many auxtrace_buffers and each buffer can be quite large, the 63 * auxtrace buffer might be processed only partially. In this case the 64 * position in the auxtrace_buffer of that queue is remembered and the time 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 * current auxtrace_heap top. 67 * 68 * 3. Auxtrace_queues might run of out data and are fed by the 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 * 71 * Event Generation 72 * Each sampling-data entry in the auxiliary trace data generates a perf sample. 73 * This sample is filled 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 * etc. This sample is processed with perf_session__deliver_synth_event() to 76 * be included into the GUI. 77 * 78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining 79 * auxiliary traces entries until the time stamp of this record is reached 80 * auxtrace_heap top. This is triggered by ordered_event->deliver(). 81 * 82 * 83 * Perf event processing. 84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. 85 * This is the function call sequence: 86 * 87 * __cmd_report() 88 * | 89 * perf_session__process_events() 90 * | 91 * __perf_session__process_events() 92 * | 93 * perf_session__process_event() 94 * | This functions splits the PERF_RECORD_XXX records. 95 * | - Those generated by perf record command (type number equal or higher 96 * | than PERF_RECORD_USER_TYPE_START) are handled by 97 * | perf_session__process_user_event(see below) 98 * | - Those generated by the kernel are handled by 99 * | evlist__parse_sample_timestamp() 100 * | 101 * evlist__parse_sample_timestamp() 102 * | Extract time stamp from sample data. 103 * | 104 * perf_session__queue_event() 105 * | If timestamp is positive the sample is entered into an ordered_event 106 * | list, sort order is the timestamp. The event processing is deferred until 107 * | later (see perf_session__process_user_event()). 108 * | Other timestamps (0 or -1) are handled immediately by 109 * | perf_session__deliver_event(). These are events generated at start up 110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* 111 * | records. They are needed to create a list of running processes and its 112 * | memory mappings and layout. They are needed at the beginning to enable 113 * | command perf report to create process trees and memory mappings. 114 * | 115 * perf_session__deliver_event() 116 * | Delivers a PERF_RECORD_XXX entry for handling. 117 * | 118 * auxtrace__process_event() 119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with 120 * | time stamps from the auxiliary trace buffers. This enables 121 * | synchronization between auxiliary trace data and the events on the 122 * | perf.data file. 123 * | 124 * machine__deliver_event() 125 * | Handles the PERF_RECORD_XXX event. This depends on the record type. 126 * It might update the process tree, update a process memory map or enter 127 * a sample with IP and call back chain data into GUI data pool. 128 * 129 * 130 * Deferred processing determined by perf_session__process_user_event() is 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These 132 * are generated during command perf record. 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was 135 * built up while reading the perf.data file. 136 * Each event is now processed by calling perf_session__deliver_event(). 137 * This enables time synchronization between the data in the perf.data file and 138 * the data in the auxiliary trace buffers. 139 */ 140 141 #include <endian.h> 142 #include <errno.h> 143 #include <byteswap.h> 144 #include <inttypes.h> 145 #include <linux/kernel.h> 146 #include <linux/types.h> 147 #include <linux/bitops.h> 148 #include <linux/log2.h> 149 #include <linux/zalloc.h> 150 151 #include <sys/stat.h> 152 #include <sys/types.h> 153 154 #include "color.h" 155 #include "evsel.h" 156 #include "evlist.h" 157 #include "machine.h" 158 #include "session.h" 159 #include "tool.h" 160 #include "debug.h" 161 #include "auxtrace.h" 162 #include "s390-cpumsf.h" 163 #include "s390-cpumsf-kernel.h" 164 #include "s390-cpumcf-kernel.h" 165 #include "config.h" 166 167 struct s390_cpumsf { 168 struct auxtrace auxtrace; 169 struct auxtrace_queues queues; 170 struct auxtrace_heap heap; 171 struct perf_session *session; 172 struct machine *machine; 173 u32 auxtrace_type; 174 u32 pmu_type; 175 u16 machine_type; 176 bool data_queued; 177 bool use_logfile; 178 char *logdir; 179 }; 180 181 struct s390_cpumsf_queue { 182 struct s390_cpumsf *sf; 183 unsigned int queue_nr; 184 struct auxtrace_buffer *buffer; 185 int cpu; 186 FILE *logfile; 187 FILE *logfile_ctr; 188 }; 189 190 /* Check if the raw data should be dumped to file. If this is the case and 191 * the file to dump to has not been opened for writing, do so. 192 * 193 * Return 0 on success and greater zero on error so processing continues. 194 */ 195 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, 196 struct perf_sample *sample) 197 { 198 struct s390_cpumsf_queue *sfq; 199 struct auxtrace_queue *q; 200 int rc = 0; 201 202 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) 203 return rc; 204 205 q = &sf->queues.queue_array[sample->cpu]; 206 sfq = q->priv; 207 if (!sfq) /* Queue not yet allocated */ 208 return rc; 209 210 if (!sfq->logfile_ctr) { 211 char *name; 212 213 rc = (sf->logdir) 214 ? asprintf(&name, "%s/aux.ctr.%02x", 215 sf->logdir, sample->cpu) 216 : asprintf(&name, "aux.ctr.%02x", sample->cpu); 217 if (rc > 0) 218 sfq->logfile_ctr = fopen(name, "w"); 219 if (sfq->logfile_ctr == NULL) { 220 pr_err("Failed to open counter set log file %s, " 221 "continue...\n", name); 222 rc = 1; 223 } 224 free(name); 225 } 226 227 if (sfq->logfile_ctr) { 228 /* See comment above for -4 */ 229 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, 230 sfq->logfile_ctr); 231 if (n != 1) { 232 pr_err("Failed to write counter set data\n"); 233 rc = 1; 234 } 235 } 236 return rc; 237 } 238 239 /* Display s390 CPU measurement facility basic-sampling data entry 240 * Data written on s390 in big endian byte order and contains bit 241 * fields across byte boundaries. 242 */ 243 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 244 struct hws_basic_entry *basicp) 245 { 246 struct hws_basic_entry *basic = basicp; 247 #if __BYTE_ORDER == __LITTLE_ENDIAN 248 struct hws_basic_entry local; 249 unsigned long long word = be64toh(*(unsigned long long *)basicp); 250 251 memset(&local, 0, sizeof(local)); 252 local.def = be16toh(basicp->def); 253 local.prim_asn = word & 0xffff; 254 local.CL = word >> 30 & 0x3; 255 local.I = word >> 32 & 0x1; 256 local.AS = word >> 33 & 0x3; 257 local.P = word >> 35 & 0x1; 258 local.W = word >> 36 & 0x1; 259 local.T = word >> 37 & 0x1; 260 local.U = word >> 40 & 0xf; 261 local.ia = be64toh(basicp->ia); 262 local.gpp = be64toh(basicp->gpp); 263 local.hpp = be64toh(basicp->hpp); 264 basic = &local; 265 #endif 266 if (basic->def != 1) { 267 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 268 return false; 269 } 270 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" 271 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" 272 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", 273 pos, basic->def, basic->U, 274 basic->T ? 'T' : ' ', 275 basic->W ? 'W' : ' ', 276 basic->P ? 'P' : ' ', 277 basic->I ? 'I' : ' ', 278 basic->AS, basic->prim_asn, basic->ia, basic->CL, 279 basic->hpp, basic->gpp); 280 return true; 281 } 282 283 /* Display s390 CPU measurement facility diagnostic-sampling data entry. 284 * Data written on s390 in big endian byte order and contains bit 285 * fields across byte boundaries. 286 */ 287 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 288 struct hws_diag_entry *diagp) 289 { 290 struct hws_diag_entry *diag = diagp; 291 #if __BYTE_ORDER == __LITTLE_ENDIAN 292 struct hws_diag_entry local; 293 unsigned long long word = be64toh(*(unsigned long long *)diagp); 294 295 local.def = be16toh(diagp->def); 296 local.I = word >> 32 & 0x1; 297 diag = &local; 298 #endif 299 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 300 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 301 return false; 302 } 303 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", 304 pos, diag->def, diag->I ? 'I' : ' '); 305 return true; 306 } 307 308 /* Return TOD timestamp contained in an trailer entry */ 309 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, 310 int idx) 311 { 312 /* te->t set: TOD in STCKE format, bytes 8-15 313 * to->t not set: TOD in STCK format, bytes 0-7 314 */ 315 unsigned long long ts; 316 317 memcpy(&ts, &te->timestamp[idx], sizeof(ts)); 318 return be64toh(ts); 319 } 320 321 /* Display s390 CPU measurement facility trailer entry */ 322 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 323 struct hws_trailer_entry *te) 324 { 325 #if __BYTE_ORDER == __LITTLE_ENDIAN 326 struct hws_trailer_entry local; 327 const unsigned long long flags = be64toh(te->flags); 328 329 memset(&local, 0, sizeof(local)); 330 local.f = flags >> 63 & 0x1; 331 local.a = flags >> 62 & 0x1; 332 local.t = flags >> 61 & 0x1; 333 local.bsdes = be16toh((flags >> 16 & 0xffff)); 334 local.dsdes = be16toh((flags & 0xffff)); 335 memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp)); 336 local.overflow = be64toh(te->overflow); 337 local.clock_base = be64toh(te->progusage[0]) >> 63 & 1; 338 local.progusage2 = be64toh(te->progusage2); 339 te = &local; 340 #endif 341 if (te->bsdes != sizeof(struct hws_basic_entry)) { 342 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 343 return false; 344 } 345 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 346 " dsdes:%d Overflow:%lld Time:%#llx\n" 347 "\t\tC:%d TOD:%#lx\n", 348 pos, 349 te->f ? 'F' : ' ', 350 te->a ? 'A' : ' ', 351 te->t ? 'T' : ' ', 352 te->bsdes, te->dsdes, te->overflow, 353 trailer_timestamp(te, te->clock_base), 354 te->clock_base, te->progusage2); 355 return true; 356 } 357 358 /* Test a sample data block. It must be 4KB or a multiple thereof in size and 359 * 4KB page aligned. Each sample data page has a trailer entry at the 360 * end which contains the sample entry data sizes. 361 * 362 * Return true if the sample data block passes the checks and set the 363 * basic set entry size and diagnostic set entry size. 364 * 365 * Return false on failure. 366 * 367 * Note: Old hardware does not set the basic or diagnostic entry sizes 368 * in the trailer entry. Use the type number instead. 369 */ 370 static bool s390_cpumsf_validate(int machine_type, 371 unsigned char *buf, size_t len, 372 unsigned short *bsdes, 373 unsigned short *dsdes) 374 { 375 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; 376 struct hws_trailer_entry *te; 377 378 *dsdes = *bsdes = 0; 379 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 380 return false; 381 if (be16toh(basic->def) != 1) /* No basic set entry, must be first */ 382 return false; 383 /* Check for trailer entry at end of SDB */ 384 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 385 - sizeof(*te)); 386 *bsdes = be16toh(te->bsdes); 387 *dsdes = be16toh(te->dsdes); 388 if (!te->bsdes && !te->dsdes) { 389 /* Very old hardware, use CPUID */ 390 switch (machine_type) { 391 case 2097: 392 case 2098: 393 *dsdes = 64; 394 *bsdes = 32; 395 break; 396 case 2817: 397 case 2818: 398 *dsdes = 74; 399 *bsdes = 32; 400 break; 401 case 2827: 402 case 2828: 403 *dsdes = 85; 404 *bsdes = 32; 405 break; 406 case 2964: 407 case 2965: 408 *dsdes = 112; 409 *bsdes = 32; 410 break; 411 default: 412 /* Illegal trailer entry */ 413 return false; 414 } 415 } 416 return true; 417 } 418 419 /* Return true if there is room for another entry */ 420 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) 421 { 422 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); 423 424 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) 425 return false; 426 return true; 427 } 428 429 /* Dump an auxiliary buffer. These buffers are multiple of 430 * 4KB SDB pages. 431 */ 432 static void s390_cpumsf_dump(struct s390_cpumsf *sf, 433 unsigned char *buf, size_t len) 434 { 435 const char *color = PERF_COLOR_BLUE; 436 struct hws_basic_entry *basic; 437 struct hws_diag_entry *diag; 438 unsigned short bsdes, dsdes; 439 size_t pos = 0; 440 441 color_fprintf(stdout, color, 442 ". ... s390 AUX data: size %zu bytes\n", 443 len); 444 445 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 446 &dsdes)) { 447 pr_err("Invalid AUX trace data block size:%zu" 448 " (type:%d bsdes:%hd dsdes:%hd)\n", 449 len, sf->machine_type, bsdes, dsdes); 450 return; 451 } 452 453 /* s390 kernel always returns 4KB blocks fully occupied, 454 * no partially filled SDBs. 455 */ 456 while (pos < len) { 457 /* Handle Basic entry */ 458 basic = (struct hws_basic_entry *)(buf + pos); 459 if (s390_cpumsf_basic_show(color, pos, basic)) 460 pos += bsdes; 461 else 462 return; 463 464 /* Handle Diagnostic entry */ 465 diag = (struct hws_diag_entry *)(buf + pos); 466 if (s390_cpumsf_diag_show(color, pos, diag)) 467 pos += dsdes; 468 else 469 return; 470 471 /* Check for trailer entry */ 472 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 473 /* Show trailer entry */ 474 struct hws_trailer_entry te; 475 476 pos = (pos + S390_CPUMSF_PAGESZ) 477 & ~(S390_CPUMSF_PAGESZ - 1); 478 pos -= sizeof(te); 479 memcpy(&te, buf + pos, sizeof(te)); 480 /* Set descriptor sizes in case of old hardware 481 * where these values are not set. 482 */ 483 te.bsdes = bsdes; 484 te.dsdes = dsdes; 485 if (s390_cpumsf_trailer_show(color, pos, &te)) 486 pos += sizeof(te); 487 else 488 return; 489 } 490 } 491 } 492 493 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, 494 size_t len) 495 { 496 printf(".\n"); 497 s390_cpumsf_dump(sf, buf, len); 498 } 499 500 #define S390_LPP_PID_MASK 0xffffffff 501 502 static bool s390_cpumsf_make_event(size_t pos, 503 struct hws_basic_entry *basic, 504 struct s390_cpumsf_queue *sfq) 505 { 506 struct perf_sample sample = { 507 .ip = basic->ia, 508 .pid = basic->hpp & S390_LPP_PID_MASK, 509 .tid = basic->hpp & S390_LPP_PID_MASK, 510 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, 511 .cpu = sfq->cpu, 512 .period = 1 513 }; 514 union perf_event event; 515 516 memset(&event, 0, sizeof(event)); 517 if (basic->CL == 1) /* Native LPAR mode */ 518 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 519 : PERF_RECORD_MISC_KERNEL; 520 else if (basic->CL == 2) /* Guest kernel/user space */ 521 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 522 : PERF_RECORD_MISC_GUEST_KERNEL; 523 else if (basic->gpp || basic->prim_asn != 0xffff) 524 /* Use heuristics on old hardware */ 525 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 526 : PERF_RECORD_MISC_GUEST_KERNEL; 527 else 528 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 529 : PERF_RECORD_MISC_KERNEL; 530 531 event.sample.header.type = PERF_RECORD_SAMPLE; 532 event.sample.header.misc = sample.cpumode; 533 event.sample.header.size = sizeof(struct perf_event_header); 534 535 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", 536 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, 537 sample.tid, sample.cpumode, sample.cpu); 538 if (perf_session__deliver_synth_event(sfq->sf->session, &event, 539 &sample)) { 540 pr_err("s390 Auxiliary Trace: failed to deliver event\n"); 541 return false; 542 } 543 return true; 544 } 545 546 static unsigned long long get_trailer_time(const unsigned char *buf) 547 { 548 struct hws_trailer_entry *te; 549 unsigned long long aux_time, progusage2; 550 bool clock_base; 551 552 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 553 - sizeof(*te)); 554 555 #if __BYTE_ORDER == __LITTLE_ENDIAN 556 clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; 557 progusage2 = be64toh(te->progusage[1]); 558 #else 559 clock_base = te->clock_base; 560 progusage2 = te->progusage2; 561 #endif 562 if (!clock_base) /* TOD_CLOCK_BASE value missing */ 563 return 0; 564 565 /* Correct calculation to convert time stamp in trailer entry to 566 * nano seconds (taken from arch/s390 function tod_to_ns()). 567 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 568 */ 569 aux_time = trailer_timestamp(te, clock_base) - progusage2; 570 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 571 return aux_time; 572 } 573 574 /* Process the data samples of a single queue. The first parameter is a 575 * pointer to the queue, the second parameter is the time stamp. This 576 * is the time stamp: 577 * - of the event that triggered this processing. 578 * - or the time stamp when the last processing of this queue stopped. 579 * In this case it stopped at a 4KB page boundary and record the 580 * position on where to continue processing on the next invocation 581 * (see buffer->use_data and buffer->use_size). 582 * 583 * When this function returns the second parameter is updated to 584 * reflect the time stamp of the last processed auxiliary data entry 585 * (taken from the trailer entry of that page). The caller uses this 586 * returned time stamp to record the last processed entry in this 587 * queue. 588 * 589 * The function returns: 590 * 0: Processing successful. The second parameter returns the 591 * time stamp from the trailer entry until which position 592 * processing took place. Subsequent calls resume from this 593 * position. 594 * <0: An error occurred during processing. The second parameter 595 * returns the maximum time stamp. 596 * >0: Done on this queue. The second parameter returns the 597 * maximum time stamp. 598 */ 599 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) 600 { 601 struct s390_cpumsf *sf = sfq->sf; 602 unsigned char *buf = sfq->buffer->use_data; 603 size_t len = sfq->buffer->use_size; 604 struct hws_basic_entry *basic; 605 unsigned short bsdes, dsdes; 606 size_t pos = 0; 607 int err = 1; 608 u64 aux_ts; 609 610 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 611 &dsdes)) { 612 *ts = ~0ULL; 613 return -1; 614 } 615 616 /* Get trailer entry time stamp and check if entries in 617 * this auxiliary page are ready for processing. If the 618 * time stamp of the first entry is too high, whole buffer 619 * can be skipped. In this case return time stamp. 620 */ 621 aux_ts = get_trailer_time(buf); 622 if (!aux_ts) { 623 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", 624 (s64)sfq->buffer->data_offset); 625 aux_ts = ~0ULL; 626 goto out; 627 } 628 if (aux_ts > *ts) { 629 *ts = aux_ts; 630 return 0; 631 } 632 633 while (pos < len) { 634 /* Handle Basic entry */ 635 basic = (struct hws_basic_entry *)(buf + pos); 636 if (s390_cpumsf_make_event(pos, basic, sfq)) 637 pos += bsdes; 638 else { 639 err = -EBADF; 640 goto out; 641 } 642 643 pos += dsdes; /* Skip diagnostic entry */ 644 645 /* Check for trailer entry */ 646 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 647 pos = (pos + S390_CPUMSF_PAGESZ) 648 & ~(S390_CPUMSF_PAGESZ - 1); 649 /* Check existence of next page */ 650 if (pos >= len) 651 break; 652 aux_ts = get_trailer_time(buf + pos); 653 if (!aux_ts) { 654 aux_ts = ~0ULL; 655 goto out; 656 } 657 if (aux_ts > *ts) { 658 *ts = aux_ts; 659 sfq->buffer->use_data += pos; 660 sfq->buffer->use_size -= pos; 661 return 0; 662 } 663 } 664 } 665 out: 666 *ts = aux_ts; 667 sfq->buffer->use_size = 0; 668 sfq->buffer->use_data = NULL; 669 return err; /* Buffer completely scanned or error */ 670 } 671 672 /* Run the s390 auxiliary trace decoder. 673 * Select the queue buffer to operate on, the caller already selected 674 * the proper queue, depending on second parameter 'ts'. 675 * This is the time stamp until which the auxiliary entries should 676 * be processed. This value is updated by called functions and 677 * returned to the caller. 678 * 679 * Resume processing in the current buffer. If there is no buffer 680 * get a new buffer from the queue and setup start position for 681 * processing. 682 * When a buffer is completely processed remove it from the queue 683 * before returning. 684 * 685 * This function returns 686 * 1: When the queue is empty. Second parameter will be set to 687 * maximum time stamp. 688 * 0: Normal processing done. 689 * <0: Error during queue buffer setup. This causes the caller 690 * to stop processing completely. 691 */ 692 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, 693 u64 *ts) 694 { 695 696 struct auxtrace_buffer *buffer; 697 struct auxtrace_queue *queue; 698 int err; 699 700 queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; 701 702 /* Get buffer and last position in buffer to resume 703 * decoding the auxiliary entries. One buffer might be large 704 * and decoding might stop in between. This depends on the time 705 * stamp of the trailer entry in each page of the auxiliary 706 * data and the time stamp of the event triggering the decoding. 707 */ 708 if (sfq->buffer == NULL) { 709 sfq->buffer = buffer = auxtrace_buffer__next(queue, 710 sfq->buffer); 711 if (!buffer) { 712 *ts = ~0ULL; 713 return 1; /* Processing done on this queue */ 714 } 715 /* Start with a new buffer on this queue */ 716 if (buffer->data) { 717 buffer->use_size = buffer->size; 718 buffer->use_data = buffer->data; 719 } 720 if (sfq->logfile) { /* Write into log file */ 721 size_t rc = fwrite(buffer->data, buffer->size, 1, 722 sfq->logfile); 723 if (rc != 1) 724 pr_err("Failed to write auxiliary data\n"); 725 } 726 } else 727 buffer = sfq->buffer; 728 729 if (!buffer->data) { 730 int fd = perf_data__fd(sfq->sf->session->data); 731 732 buffer->data = auxtrace_buffer__get_data(buffer, fd); 733 if (!buffer->data) 734 return -ENOMEM; 735 buffer->use_size = buffer->size; 736 buffer->use_data = buffer->data; 737 738 if (sfq->logfile) { /* Write into log file */ 739 size_t rc = fwrite(buffer->data, buffer->size, 1, 740 sfq->logfile); 741 if (rc != 1) 742 pr_err("Failed to write auxiliary data\n"); 743 } 744 } 745 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", 746 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, 747 buffer->size, buffer->use_size); 748 err = s390_cpumsf_samples(sfq, ts); 749 750 /* If non-zero, there is either an error (err < 0) or the buffer is 751 * completely done (err > 0). The error is unrecoverable, usually 752 * some descriptors could not be read successfully, so continue with 753 * the next buffer. 754 * In both cases the parameter 'ts' has been updated. 755 */ 756 if (err) { 757 sfq->buffer = NULL; 758 list_del_init(&buffer->list); 759 auxtrace_buffer__free(buffer); 760 if (err > 0) /* Buffer done, no error */ 761 err = 0; 762 } 763 return err; 764 } 765 766 static struct s390_cpumsf_queue * 767 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) 768 { 769 struct s390_cpumsf_queue *sfq; 770 771 sfq = zalloc(sizeof(struct s390_cpumsf_queue)); 772 if (sfq == NULL) 773 return NULL; 774 775 sfq->sf = sf; 776 sfq->queue_nr = queue_nr; 777 sfq->cpu = -1; 778 if (sf->use_logfile) { 779 char *name; 780 int rc; 781 782 rc = (sf->logdir) 783 ? asprintf(&name, "%s/aux.smp.%02x", 784 sf->logdir, queue_nr) 785 : asprintf(&name, "aux.smp.%02x", queue_nr); 786 if (rc > 0) 787 sfq->logfile = fopen(name, "w"); 788 if (sfq->logfile == NULL) { 789 pr_err("Failed to open auxiliary log file %s," 790 "continue...\n", name); 791 sf->use_logfile = false; 792 } 793 free(name); 794 } 795 return sfq; 796 } 797 798 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, 799 struct auxtrace_queue *queue, 800 unsigned int queue_nr, u64 ts) 801 { 802 struct s390_cpumsf_queue *sfq = queue->priv; 803 804 if (list_empty(&queue->head)) 805 return 0; 806 807 if (sfq == NULL) { 808 sfq = s390_cpumsf_alloc_queue(sf, queue_nr); 809 if (!sfq) 810 return -ENOMEM; 811 queue->priv = sfq; 812 813 if (queue->cpu != -1) 814 sfq->cpu = queue->cpu; 815 } 816 return auxtrace_heap__add(&sf->heap, queue_nr, ts); 817 } 818 819 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) 820 { 821 unsigned int i; 822 int ret = 0; 823 824 for (i = 0; i < sf->queues.nr_queues; i++) { 825 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], 826 i, ts); 827 if (ret) 828 break; 829 } 830 return ret; 831 } 832 833 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) 834 { 835 if (!sf->queues.new_data) 836 return 0; 837 838 sf->queues.new_data = false; 839 return s390_cpumsf_setup_queues(sf, ts); 840 } 841 842 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) 843 { 844 unsigned int queue_nr; 845 u64 ts; 846 int ret; 847 848 while (1) { 849 struct auxtrace_queue *queue; 850 struct s390_cpumsf_queue *sfq; 851 852 if (!sf->heap.heap_cnt) 853 return 0; 854 855 if (sf->heap.heap_array[0].ordinal >= timestamp) 856 return 0; 857 858 queue_nr = sf->heap.heap_array[0].queue_nr; 859 queue = &sf->queues.queue_array[queue_nr]; 860 sfq = queue->priv; 861 862 auxtrace_heap__pop(&sf->heap); 863 if (sf->heap.heap_cnt) { 864 ts = sf->heap.heap_array[0].ordinal + 1; 865 if (ts > timestamp) 866 ts = timestamp; 867 } else { 868 ts = timestamp; 869 } 870 871 ret = s390_cpumsf_run_decoder(sfq, &ts); 872 if (ret < 0) { 873 auxtrace_heap__add(&sf->heap, queue_nr, ts); 874 return ret; 875 } 876 if (!ret) { 877 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); 878 if (ret < 0) 879 return ret; 880 } 881 } 882 return 0; 883 } 884 885 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 886 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 887 { 888 char msg[MAX_AUXTRACE_ERROR_MSG]; 889 union perf_event event; 890 int err; 891 892 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 893 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 894 code, cpu, pid, tid, ip, msg, timestamp); 895 896 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 897 if (err) 898 pr_err("s390 Auxiliary Trace: failed to deliver error event," 899 "error %d\n", err); 900 return err; 901 } 902 903 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 904 { 905 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 906 sample->pid, sample->tid, 0, 907 sample->time); 908 } 909 910 static int 911 s390_cpumsf_process_event(struct perf_session *session, 912 union perf_event *event, 913 struct perf_sample *sample, 914 struct perf_tool *tool) 915 { 916 struct s390_cpumsf *sf = container_of(session->auxtrace, 917 struct s390_cpumsf, 918 auxtrace); 919 u64 timestamp = sample->time; 920 struct evsel *ev_bc000; 921 922 int err = 0; 923 924 if (dump_trace) 925 return 0; 926 927 if (!tool->ordered_events) { 928 pr_err("s390 Auxiliary Trace requires ordered events\n"); 929 return -EINVAL; 930 } 931 932 if (event->header.type == PERF_RECORD_SAMPLE && 933 sample->raw_size) { 934 /* Handle event with raw data */ 935 ev_bc000 = evlist__event2evsel(session->evlist, event); 936 if (ev_bc000 && 937 ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) 938 err = s390_cpumcf_dumpctr(sf, sample); 939 return err; 940 } 941 942 if (event->header.type == PERF_RECORD_AUX && 943 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 944 return s390_cpumsf_lost(sf, sample); 945 946 if (timestamp) { 947 err = s390_cpumsf_update_queues(sf, timestamp); 948 if (!err) 949 err = s390_cpumsf_process_queues(sf, timestamp); 950 } 951 return err; 952 } 953 954 struct s390_cpumsf_synth { 955 struct perf_tool cpumsf_tool; 956 struct perf_session *session; 957 }; 958 959 static int 960 s390_cpumsf_process_auxtrace_event(struct perf_session *session, 961 union perf_event *event __maybe_unused, 962 struct perf_tool *tool __maybe_unused) 963 { 964 struct s390_cpumsf *sf = container_of(session->auxtrace, 965 struct s390_cpumsf, 966 auxtrace); 967 968 int fd = perf_data__fd(session->data); 969 struct auxtrace_buffer *buffer; 970 off_t data_offset; 971 int err; 972 973 if (sf->data_queued) 974 return 0; 975 976 if (perf_data__is_pipe(session->data)) { 977 data_offset = 0; 978 } else { 979 data_offset = lseek(fd, 0, SEEK_CUR); 980 if (data_offset == -1) 981 return -errno; 982 } 983 984 err = auxtrace_queues__add_event(&sf->queues, session, event, 985 data_offset, &buffer); 986 if (err) 987 return err; 988 989 /* Dump here after copying piped trace out of the pipe */ 990 if (dump_trace) { 991 if (auxtrace_buffer__get_data(buffer, fd)) { 992 s390_cpumsf_dump_event(sf, buffer->data, 993 buffer->size); 994 auxtrace_buffer__put_data(buffer); 995 } 996 } 997 return 0; 998 } 999 1000 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) 1001 { 1002 } 1003 1004 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, 1005 struct perf_tool *tool __maybe_unused) 1006 { 1007 return 0; 1008 } 1009 1010 static void s390_cpumsf_free_queues(struct perf_session *session) 1011 { 1012 struct s390_cpumsf *sf = container_of(session->auxtrace, 1013 struct s390_cpumsf, 1014 auxtrace); 1015 struct auxtrace_queues *queues = &sf->queues; 1016 unsigned int i; 1017 1018 for (i = 0; i < queues->nr_queues; i++) { 1019 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) 1020 queues->queue_array[i].priv; 1021 1022 if (sfq != NULL) { 1023 if (sfq->logfile) { 1024 fclose(sfq->logfile); 1025 sfq->logfile = NULL; 1026 } 1027 if (sfq->logfile_ctr) { 1028 fclose(sfq->logfile_ctr); 1029 sfq->logfile_ctr = NULL; 1030 } 1031 } 1032 zfree(&queues->queue_array[i].priv); 1033 } 1034 auxtrace_queues__free(queues); 1035 } 1036 1037 static void s390_cpumsf_free(struct perf_session *session) 1038 { 1039 struct s390_cpumsf *sf = container_of(session->auxtrace, 1040 struct s390_cpumsf, 1041 auxtrace); 1042 1043 auxtrace_heap__free(&sf->heap); 1044 s390_cpumsf_free_queues(session); 1045 session->auxtrace = NULL; 1046 zfree(&sf->logdir); 1047 free(sf); 1048 } 1049 1050 static bool 1051 s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused, 1052 struct evsel *evsel) 1053 { 1054 return evsel->core.attr.type == PERF_TYPE_RAW && 1055 evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG; 1056 } 1057 1058 static int s390_cpumsf_get_type(const char *cpuid) 1059 { 1060 int ret, family = 0; 1061 1062 ret = sscanf(cpuid, "%*[^,],%u", &family); 1063 return (ret == 1) ? family : 0; 1064 } 1065 1066 /* Check itrace options set on perf report command. 1067 * Return true, if none are set or all options specified can be 1068 * handled on s390 (currently only option 'd' for logging. 1069 * Return false otherwise. 1070 */ 1071 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) 1072 { 1073 bool ison = false; 1074 1075 if (!itops || !itops->set) 1076 return true; 1077 ison = itops->inject || itops->instructions || itops->branches || 1078 itops->transactions || itops->ptwrites || 1079 itops->pwr_events || itops->errors || 1080 itops->dont_decode || itops->calls || itops->returns || 1081 itops->callchain || itops->thread_stack || 1082 itops->last_branch || itops->add_callchain || 1083 itops->add_last_branch; 1084 if (!ison) 1085 return true; 1086 pr_err("Unsupported --itrace options specified\n"); 1087 return false; 1088 } 1089 1090 /* Check for AUXTRACE dump directory if it is needed. 1091 * On failure print an error message but continue. 1092 * Return 0 on wrong keyword in config file and 1 otherwise. 1093 */ 1094 static int s390_cpumsf__config(const char *var, const char *value, void *cb) 1095 { 1096 struct s390_cpumsf *sf = cb; 1097 struct stat stbuf; 1098 int rc; 1099 1100 if (strcmp(var, "auxtrace.dumpdir")) 1101 return 0; 1102 sf->logdir = strdup(value); 1103 if (sf->logdir == NULL) { 1104 pr_err("Failed to find auxtrace log directory %s," 1105 " continue with current directory...\n", value); 1106 return 1; 1107 } 1108 rc = stat(sf->logdir, &stbuf); 1109 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { 1110 pr_err("Missing auxtrace log directory %s," 1111 " continue with current directory...\n", value); 1112 zfree(&sf->logdir); 1113 } 1114 return 1; 1115 } 1116 1117 int s390_cpumsf_process_auxtrace_info(union perf_event *event, 1118 struct perf_session *session) 1119 { 1120 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1121 struct s390_cpumsf *sf; 1122 int err; 1123 1124 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info)) 1125 return -EINVAL; 1126 1127 sf = zalloc(sizeof(struct s390_cpumsf)); 1128 if (sf == NULL) 1129 return -ENOMEM; 1130 1131 if (!check_auxtrace_itrace(session->itrace_synth_opts)) { 1132 err = -EINVAL; 1133 goto err_free; 1134 } 1135 sf->use_logfile = session->itrace_synth_opts->log; 1136 if (sf->use_logfile) 1137 perf_config(s390_cpumsf__config, sf); 1138 1139 err = auxtrace_queues__init(&sf->queues); 1140 if (err) 1141 goto err_free; 1142 1143 sf->session = session; 1144 sf->machine = &session->machines.host; /* No kvm support */ 1145 sf->auxtrace_type = auxtrace_info->type; 1146 sf->pmu_type = PERF_TYPE_RAW; 1147 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); 1148 1149 sf->auxtrace.process_event = s390_cpumsf_process_event; 1150 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; 1151 sf->auxtrace.flush_events = s390_cpumsf_flush; 1152 sf->auxtrace.free_events = s390_cpumsf_free_events; 1153 sf->auxtrace.free = s390_cpumsf_free; 1154 sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace; 1155 session->auxtrace = &sf->auxtrace; 1156 1157 if (dump_trace) 1158 return 0; 1159 1160 err = auxtrace_queues__process_index(&sf->queues, session); 1161 if (err) 1162 goto err_free_queues; 1163 1164 if (sf->queues.populated) 1165 sf->data_queued = true; 1166 1167 return 0; 1168 1169 err_free_queues: 1170 auxtrace_queues__free(&sf->queues); 1171 session->auxtrace = NULL; 1172 err_free: 1173 zfree(&sf->logdir); 1174 free(sf); 1175 return err; 1176 } 1177