1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2018 4 * Auxtrace support for s390 CPU-Measurement Sampling Facility 5 * 6 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 7 * 8 * Auxiliary traces are collected during 'perf record' using rbd000 event. 9 * Several PERF_RECORD_XXX are generated during recording: 10 * 11 * PERF_RECORD_AUX: 12 * Records that new data landed in the AUX buffer part. 13 * PERF_RECORD_AUXTRACE: 14 * Defines auxtrace data. Followed by the actual data. The contents of 15 * the auxtrace data is dependent on the event and the CPU. 16 * This record is generated by perf record command. For details 17 * see Documentation/perf.data-file-format.txt. 18 * PERF_RECORD_AUXTRACE_INFO: 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 * record is generated during 'perf record' command. Each record contains 21 * up to 256 entries describing offset and size of the AUXTRACE data in the 22 * perf.data file. 23 * PERF_RECORD_AUXTRACE_ERROR: 24 * Indicates an error during AUXTRACE collection such as buffer overflow. 25 * PERF_RECORD_FINISHED_ROUND: 26 * Perf events are not necessarily in time stamp order, as they can be 27 * collected in parallel on different CPUs. If the events should be 28 * processed in time order they need to be sorted first. 29 * Perf report guarantees that there is no reordering over a 30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a 31 * time stamp lower than this record are processed (and displayed) before 32 * the succeeding perf record are processed. 33 * 34 * These records are evaluated during perf report command. 35 * 36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for 37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() 38 * below. 39 * Auxiliary trace data is collected per CPU. To merge the data into the report 40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace 41 * data is in ascending order. 42 * 43 * Each queue has a double linked list of auxtrace_buffers. This list contains 44 * the offset and size of a CPU's auxtrace data. During auxtrace processing 45 * the data portion is mmap()'ed. 46 * 47 * To sort the queues in chronological order, all queue access is controlled 48 * by the auxtrace_heap. This is basicly a stack, each stack element has two 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 * (nearest) time stamp is at the top. That sort order is maintained at all 52 * times! 53 * 54 * After the auxtrace infrastructure has been setup, the auxtrace queues are 55 * filled with data (offset/size pairs) and the auxtrace_heap is populated. 56 * 57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. 58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of 59 * the perf record is compared with the time stamp located on the auxtrace_heap 60 * top element. If that time stamp is lower than the time stamp from the 61 * record sample, the auxtrace queues will be processed. As auxtrace queues 62 * control many auxtrace_buffers and each buffer can be quite large, the 63 * auxtrace buffer might be processed only partially. In this case the 64 * position in the auxtrace_buffer of that queue is remembered and the time 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 * current auxtrace_heap top. 67 * 68 * 3. Auxtrace_queues might run of out data and are feeded by the 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 * 71 * Event Generation 72 * Each sampling-data entry in the auxilary trace data generates a perf sample. 73 * This sample is filled 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 * etc. This sample is processed with perf_session__deliver_synth_event() to 76 * be included into the GUI. 77 * 78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining 79 * auxiliary traces entries until the time stamp of this record is reached 80 * auxtrace_heap top. This is triggered by ordered_event->deliver(). 81 * 82 * 83 * Perf event processing. 84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. 85 * This is the function call sequence: 86 * 87 * __cmd_report() 88 * | 89 * perf_session__process_events() 90 * | 91 * __perf_session__process_events() 92 * | 93 * perf_session__process_event() 94 * | This functions splits the PERF_RECORD_XXX records. 95 * | - Those generated by perf record command (type number equal or higher 96 * | than PERF_RECORD_USER_TYPE_START) are handled by 97 * | perf_session__process_user_event(see below) 98 * | - Those generated by the kernel are handled by 99 * | perf_evlist__parse_sample_timestamp() 100 * | 101 * perf_evlist__parse_sample_timestamp() 102 * | Extract time stamp from sample data. 103 * | 104 * perf_session__queue_event() 105 * | If timestamp is positive the sample is entered into an ordered_event 106 * | list, sort order is the timestamp. The event processing is deferred until 107 * | later (see perf_session__process_user_event()). 108 * | Other timestamps (0 or -1) are handled immediately by 109 * | perf_session__deliver_event(). These are events generated at start up 110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* 111 * | records. They are needed to create a list of running processes and its 112 * | memory mappings and layout. They are needed at the beginning to enable 113 * | command perf report to create process trees and memory mappings. 114 * | 115 * perf_session__deliver_event() 116 * | Delivers a PERF_RECORD_XXX entry for handling. 117 * | 118 * auxtrace__process_event() 119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with 120 * | time stamps from the auxiliary trace buffers. This enables 121 * | synchronization between auxiliary trace data and the events on the 122 * | perf.data file. 123 * | 124 * machine__deliver_event() 125 * | Handles the PERF_RECORD_XXX event. This depends on the record type. 126 * It might update the process tree, update a process memory map or enter 127 * a sample with IP and call back chain data into GUI data pool. 128 * 129 * 130 * Deferred processing determined by perf_session__process_user_event() is 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These 132 * are generated during command perf record. 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was 135 * built up while reading the perf.data file. 136 * Each event is now processed by calling perf_session__deliver_event(). 137 * This enables time synchronization between the data in the perf.data file and 138 * the data in the auxiliary trace buffers. 139 */ 140 141 #include <endian.h> 142 #include <errno.h> 143 #include <byteswap.h> 144 #include <inttypes.h> 145 #include <linux/kernel.h> 146 #include <linux/types.h> 147 #include <linux/bitops.h> 148 #include <linux/log2.h> 149 #include <linux/zalloc.h> 150 151 #include <sys/stat.h> 152 #include <sys/types.h> 153 154 #include "cpumap.h" 155 #include "color.h" 156 #include "evsel.h" 157 #include "evlist.h" 158 #include "machine.h" 159 #include "session.h" 160 #include "tool.h" 161 #include "thread.h" 162 #include "debug.h" 163 #include "auxtrace.h" 164 #include "s390-cpumsf.h" 165 #include "s390-cpumsf-kernel.h" 166 #include "s390-cpumcf-kernel.h" 167 #include "config.h" 168 169 struct s390_cpumsf { 170 struct auxtrace auxtrace; 171 struct auxtrace_queues queues; 172 struct auxtrace_heap heap; 173 struct perf_session *session; 174 struct machine *machine; 175 u32 auxtrace_type; 176 u32 pmu_type; 177 u16 machine_type; 178 bool data_queued; 179 bool use_logfile; 180 char *logdir; 181 }; 182 183 struct s390_cpumsf_queue { 184 struct s390_cpumsf *sf; 185 unsigned int queue_nr; 186 struct auxtrace_buffer *buffer; 187 int cpu; 188 FILE *logfile; 189 FILE *logfile_ctr; 190 }; 191 192 /* Check if the raw data should be dumped to file. If this is the case and 193 * the file to dump to has not been opened for writing, do so. 194 * 195 * Return 0 on success and greater zero on error so processing continues. 196 */ 197 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, 198 struct perf_sample *sample) 199 { 200 struct s390_cpumsf_queue *sfq; 201 struct auxtrace_queue *q; 202 int rc = 0; 203 204 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) 205 return rc; 206 207 q = &sf->queues.queue_array[sample->cpu]; 208 sfq = q->priv; 209 if (!sfq) /* Queue not yet allocated */ 210 return rc; 211 212 if (!sfq->logfile_ctr) { 213 char *name; 214 215 rc = (sf->logdir) 216 ? asprintf(&name, "%s/aux.ctr.%02x", 217 sf->logdir, sample->cpu) 218 : asprintf(&name, "aux.ctr.%02x", sample->cpu); 219 if (rc > 0) 220 sfq->logfile_ctr = fopen(name, "w"); 221 if (sfq->logfile_ctr == NULL) { 222 pr_err("Failed to open counter set log file %s, " 223 "continue...\n", name); 224 rc = 1; 225 } 226 free(name); 227 } 228 229 if (sfq->logfile_ctr) { 230 /* See comment above for -4 */ 231 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, 232 sfq->logfile_ctr); 233 if (n != 1) { 234 pr_err("Failed to write counter set data\n"); 235 rc = 1; 236 } 237 } 238 return rc; 239 } 240 241 /* Display s390 CPU measurement facility basic-sampling data entry 242 * Data written on s390 in big endian byte order and contains bit 243 * fields across byte boundaries. 244 */ 245 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 246 struct hws_basic_entry *basicp) 247 { 248 struct hws_basic_entry *basic = basicp; 249 #if __BYTE_ORDER == __LITTLE_ENDIAN 250 struct hws_basic_entry local; 251 unsigned long long word = be64toh(*(unsigned long long *)basicp); 252 253 memset(&local, 0, sizeof(local)); 254 local.def = be16toh(basicp->def); 255 local.prim_asn = word & 0xffff; 256 local.CL = word >> 30 & 0x3; 257 local.I = word >> 32 & 0x1; 258 local.AS = word >> 33 & 0x3; 259 local.P = word >> 35 & 0x1; 260 local.W = word >> 36 & 0x1; 261 local.T = word >> 37 & 0x1; 262 local.U = word >> 40 & 0xf; 263 local.ia = be64toh(basicp->ia); 264 local.gpp = be64toh(basicp->gpp); 265 local.hpp = be64toh(basicp->hpp); 266 basic = &local; 267 #endif 268 if (basic->def != 1) { 269 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 270 return false; 271 } 272 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" 273 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" 274 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", 275 pos, basic->def, basic->U, 276 basic->T ? 'T' : ' ', 277 basic->W ? 'W' : ' ', 278 basic->P ? 'P' : ' ', 279 basic->I ? 'I' : ' ', 280 basic->AS, basic->prim_asn, basic->ia, basic->CL, 281 basic->hpp, basic->gpp); 282 return true; 283 } 284 285 /* Display s390 CPU measurement facility diagnostic-sampling data entry. 286 * Data written on s390 in big endian byte order and contains bit 287 * fields across byte boundaries. 288 */ 289 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 290 struct hws_diag_entry *diagp) 291 { 292 struct hws_diag_entry *diag = diagp; 293 #if __BYTE_ORDER == __LITTLE_ENDIAN 294 struct hws_diag_entry local; 295 unsigned long long word = be64toh(*(unsigned long long *)diagp); 296 297 local.def = be16toh(diagp->def); 298 local.I = word >> 32 & 0x1; 299 diag = &local; 300 #endif 301 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 302 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 303 return false; 304 } 305 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", 306 pos, diag->def, diag->I ? 'I' : ' '); 307 return true; 308 } 309 310 /* Return TOD timestamp contained in an trailer entry */ 311 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, 312 int idx) 313 { 314 /* te->t set: TOD in STCKE format, bytes 8-15 315 * to->t not set: TOD in STCK format, bytes 0-7 316 */ 317 unsigned long long ts; 318 319 memcpy(&ts, &te->timestamp[idx], sizeof(ts)); 320 return be64toh(ts); 321 } 322 323 /* Display s390 CPU measurement facility trailer entry */ 324 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 325 struct hws_trailer_entry *te) 326 { 327 #if __BYTE_ORDER == __LITTLE_ENDIAN 328 struct hws_trailer_entry local; 329 const unsigned long long flags = be64toh(te->flags); 330 331 memset(&local, 0, sizeof(local)); 332 local.f = flags >> 63 & 0x1; 333 local.a = flags >> 62 & 0x1; 334 local.t = flags >> 61 & 0x1; 335 local.bsdes = be16toh((flags >> 16 & 0xffff)); 336 local.dsdes = be16toh((flags & 0xffff)); 337 memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp)); 338 local.overflow = be64toh(te->overflow); 339 local.clock_base = be64toh(te->progusage[0]) >> 63 & 1; 340 local.progusage2 = be64toh(te->progusage2); 341 te = &local; 342 #endif 343 if (te->bsdes != sizeof(struct hws_basic_entry)) { 344 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 345 return false; 346 } 347 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 348 " dsdes:%d Overflow:%lld Time:%#llx\n" 349 "\t\tC:%d TOD:%#lx\n", 350 pos, 351 te->f ? 'F' : ' ', 352 te->a ? 'A' : ' ', 353 te->t ? 'T' : ' ', 354 te->bsdes, te->dsdes, te->overflow, 355 trailer_timestamp(te, te->clock_base), 356 te->clock_base, te->progusage2); 357 return true; 358 } 359 360 /* Test a sample data block. It must be 4KB or a multiple thereof in size and 361 * 4KB page aligned. Each sample data page has a trailer entry at the 362 * end which contains the sample entry data sizes. 363 * 364 * Return true if the sample data block passes the checks and set the 365 * basic set entry size and diagnostic set entry size. 366 * 367 * Return false on failure. 368 * 369 * Note: Old hardware does not set the basic or diagnostic entry sizes 370 * in the trailer entry. Use the type number instead. 371 */ 372 static bool s390_cpumsf_validate(int machine_type, 373 unsigned char *buf, size_t len, 374 unsigned short *bsdes, 375 unsigned short *dsdes) 376 { 377 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; 378 struct hws_trailer_entry *te; 379 380 *dsdes = *bsdes = 0; 381 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 382 return false; 383 if (be16toh(basic->def) != 1) /* No basic set entry, must be first */ 384 return false; 385 /* Check for trailer entry at end of SDB */ 386 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 387 - sizeof(*te)); 388 *bsdes = be16toh(te->bsdes); 389 *dsdes = be16toh(te->dsdes); 390 if (!te->bsdes && !te->dsdes) { 391 /* Very old hardware, use CPUID */ 392 switch (machine_type) { 393 case 2097: 394 case 2098: 395 *dsdes = 64; 396 *bsdes = 32; 397 break; 398 case 2817: 399 case 2818: 400 *dsdes = 74; 401 *bsdes = 32; 402 break; 403 case 2827: 404 case 2828: 405 *dsdes = 85; 406 *bsdes = 32; 407 break; 408 case 2964: 409 case 2965: 410 *dsdes = 112; 411 *bsdes = 32; 412 break; 413 default: 414 /* Illegal trailer entry */ 415 return false; 416 } 417 } 418 return true; 419 } 420 421 /* Return true if there is room for another entry */ 422 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) 423 { 424 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); 425 426 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) 427 return false; 428 return true; 429 } 430 431 /* Dump an auxiliary buffer. These buffers are multiple of 432 * 4KB SDB pages. 433 */ 434 static void s390_cpumsf_dump(struct s390_cpumsf *sf, 435 unsigned char *buf, size_t len) 436 { 437 const char *color = PERF_COLOR_BLUE; 438 struct hws_basic_entry *basic; 439 struct hws_diag_entry *diag; 440 unsigned short bsdes, dsdes; 441 size_t pos = 0; 442 443 color_fprintf(stdout, color, 444 ". ... s390 AUX data: size %zu bytes\n", 445 len); 446 447 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 448 &dsdes)) { 449 pr_err("Invalid AUX trace data block size:%zu" 450 " (type:%d bsdes:%hd dsdes:%hd)\n", 451 len, sf->machine_type, bsdes, dsdes); 452 return; 453 } 454 455 /* s390 kernel always returns 4KB blocks fully occupied, 456 * no partially filled SDBs. 457 */ 458 while (pos < len) { 459 /* Handle Basic entry */ 460 basic = (struct hws_basic_entry *)(buf + pos); 461 if (s390_cpumsf_basic_show(color, pos, basic)) 462 pos += bsdes; 463 else 464 return; 465 466 /* Handle Diagnostic entry */ 467 diag = (struct hws_diag_entry *)(buf + pos); 468 if (s390_cpumsf_diag_show(color, pos, diag)) 469 pos += dsdes; 470 else 471 return; 472 473 /* Check for trailer entry */ 474 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 475 /* Show trailer entry */ 476 struct hws_trailer_entry te; 477 478 pos = (pos + S390_CPUMSF_PAGESZ) 479 & ~(S390_CPUMSF_PAGESZ - 1); 480 pos -= sizeof(te); 481 memcpy(&te, buf + pos, sizeof(te)); 482 /* Set descriptor sizes in case of old hardware 483 * where these values are not set. 484 */ 485 te.bsdes = bsdes; 486 te.dsdes = dsdes; 487 if (s390_cpumsf_trailer_show(color, pos, &te)) 488 pos += sizeof(te); 489 else 490 return; 491 } 492 } 493 } 494 495 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, 496 size_t len) 497 { 498 printf(".\n"); 499 s390_cpumsf_dump(sf, buf, len); 500 } 501 502 #define S390_LPP_PID_MASK 0xffffffff 503 504 static bool s390_cpumsf_make_event(size_t pos, 505 struct hws_basic_entry *basic, 506 struct s390_cpumsf_queue *sfq) 507 { 508 struct perf_sample sample = { 509 .ip = basic->ia, 510 .pid = basic->hpp & S390_LPP_PID_MASK, 511 .tid = basic->hpp & S390_LPP_PID_MASK, 512 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, 513 .cpu = sfq->cpu, 514 .period = 1 515 }; 516 union perf_event event; 517 518 memset(&event, 0, sizeof(event)); 519 if (basic->CL == 1) /* Native LPAR mode */ 520 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 521 : PERF_RECORD_MISC_KERNEL; 522 else if (basic->CL == 2) /* Guest kernel/user space */ 523 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 524 : PERF_RECORD_MISC_GUEST_KERNEL; 525 else if (basic->gpp || basic->prim_asn != 0xffff) 526 /* Use heuristics on old hardware */ 527 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 528 : PERF_RECORD_MISC_GUEST_KERNEL; 529 else 530 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 531 : PERF_RECORD_MISC_KERNEL; 532 533 event.sample.header.type = PERF_RECORD_SAMPLE; 534 event.sample.header.misc = sample.cpumode; 535 event.sample.header.size = sizeof(struct perf_event_header); 536 537 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", 538 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, 539 sample.tid, sample.cpumode, sample.cpu); 540 if (perf_session__deliver_synth_event(sfq->sf->session, &event, 541 &sample)) { 542 pr_err("s390 Auxiliary Trace: failed to deliver event\n"); 543 return false; 544 } 545 return true; 546 } 547 548 static unsigned long long get_trailer_time(const unsigned char *buf) 549 { 550 struct hws_trailer_entry *te; 551 unsigned long long aux_time, progusage2; 552 bool clock_base; 553 554 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 555 - sizeof(*te)); 556 557 #if __BYTE_ORDER == __LITTLE_ENDIAN 558 clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; 559 progusage2 = be64toh(te->progusage[1]); 560 #else 561 clock_base = te->clock_base; 562 progusage2 = te->progusage2; 563 #endif 564 if (!clock_base) /* TOD_CLOCK_BASE value missing */ 565 return 0; 566 567 /* Correct calculation to convert time stamp in trailer entry to 568 * nano seconds (taken from arch/s390 function tod_to_ns()). 569 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 570 */ 571 aux_time = trailer_timestamp(te, clock_base) - progusage2; 572 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 573 return aux_time; 574 } 575 576 /* Process the data samples of a single queue. The first parameter is a 577 * pointer to the queue, the second parameter is the time stamp. This 578 * is the time stamp: 579 * - of the event that triggered this processing. 580 * - or the time stamp when the last proccesing of this queue stopped. 581 * In this case it stopped at a 4KB page boundary and record the 582 * position on where to continue processing on the next invocation 583 * (see buffer->use_data and buffer->use_size). 584 * 585 * When this function returns the second parameter is updated to 586 * reflect the time stamp of the last processed auxiliary data entry 587 * (taken from the trailer entry of that page). The caller uses this 588 * returned time stamp to record the last processed entry in this 589 * queue. 590 * 591 * The function returns: 592 * 0: Processing successful. The second parameter returns the 593 * time stamp from the trailer entry until which position 594 * processing took place. Subsequent calls resume from this 595 * position. 596 * <0: An error occurred during processing. The second parameter 597 * returns the maximum time stamp. 598 * >0: Done on this queue. The second parameter returns the 599 * maximum time stamp. 600 */ 601 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) 602 { 603 struct s390_cpumsf *sf = sfq->sf; 604 unsigned char *buf = sfq->buffer->use_data; 605 size_t len = sfq->buffer->use_size; 606 struct hws_basic_entry *basic; 607 unsigned short bsdes, dsdes; 608 size_t pos = 0; 609 int err = 1; 610 u64 aux_ts; 611 612 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 613 &dsdes)) { 614 *ts = ~0ULL; 615 return -1; 616 } 617 618 /* Get trailer entry time stamp and check if entries in 619 * this auxiliary page are ready for processing. If the 620 * time stamp of the first entry is too high, whole buffer 621 * can be skipped. In this case return time stamp. 622 */ 623 aux_ts = get_trailer_time(buf); 624 if (!aux_ts) { 625 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", 626 (s64)sfq->buffer->data_offset); 627 aux_ts = ~0ULL; 628 goto out; 629 } 630 if (aux_ts > *ts) { 631 *ts = aux_ts; 632 return 0; 633 } 634 635 while (pos < len) { 636 /* Handle Basic entry */ 637 basic = (struct hws_basic_entry *)(buf + pos); 638 if (s390_cpumsf_make_event(pos, basic, sfq)) 639 pos += bsdes; 640 else { 641 err = -EBADF; 642 goto out; 643 } 644 645 pos += dsdes; /* Skip diagnositic entry */ 646 647 /* Check for trailer entry */ 648 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 649 pos = (pos + S390_CPUMSF_PAGESZ) 650 & ~(S390_CPUMSF_PAGESZ - 1); 651 /* Check existence of next page */ 652 if (pos >= len) 653 break; 654 aux_ts = get_trailer_time(buf + pos); 655 if (!aux_ts) { 656 aux_ts = ~0ULL; 657 goto out; 658 } 659 if (aux_ts > *ts) { 660 *ts = aux_ts; 661 sfq->buffer->use_data += pos; 662 sfq->buffer->use_size -= pos; 663 return 0; 664 } 665 } 666 } 667 out: 668 *ts = aux_ts; 669 sfq->buffer->use_size = 0; 670 sfq->buffer->use_data = NULL; 671 return err; /* Buffer completely scanned or error */ 672 } 673 674 /* Run the s390 auxiliary trace decoder. 675 * Select the queue buffer to operate on, the caller already selected 676 * the proper queue, depending on second parameter 'ts'. 677 * This is the time stamp until which the auxiliary entries should 678 * be processed. This value is updated by called functions and 679 * returned to the caller. 680 * 681 * Resume processing in the current buffer. If there is no buffer 682 * get a new buffer from the queue and setup start position for 683 * processing. 684 * When a buffer is completely processed remove it from the queue 685 * before returning. 686 * 687 * This function returns 688 * 1: When the queue is empty. Second parameter will be set to 689 * maximum time stamp. 690 * 0: Normal processing done. 691 * <0: Error during queue buffer setup. This causes the caller 692 * to stop processing completely. 693 */ 694 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, 695 u64 *ts) 696 { 697 698 struct auxtrace_buffer *buffer; 699 struct auxtrace_queue *queue; 700 int err; 701 702 queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; 703 704 /* Get buffer and last position in buffer to resume 705 * decoding the auxiliary entries. One buffer might be large 706 * and decoding might stop in between. This depends on the time 707 * stamp of the trailer entry in each page of the auxiliary 708 * data and the time stamp of the event triggering the decoding. 709 */ 710 if (sfq->buffer == NULL) { 711 sfq->buffer = buffer = auxtrace_buffer__next(queue, 712 sfq->buffer); 713 if (!buffer) { 714 *ts = ~0ULL; 715 return 1; /* Processing done on this queue */ 716 } 717 /* Start with a new buffer on this queue */ 718 if (buffer->data) { 719 buffer->use_size = buffer->size; 720 buffer->use_data = buffer->data; 721 } 722 if (sfq->logfile) { /* Write into log file */ 723 size_t rc = fwrite(buffer->data, buffer->size, 1, 724 sfq->logfile); 725 if (rc != 1) 726 pr_err("Failed to write auxiliary data\n"); 727 } 728 } else 729 buffer = sfq->buffer; 730 731 if (!buffer->data) { 732 int fd = perf_data__fd(sfq->sf->session->data); 733 734 buffer->data = auxtrace_buffer__get_data(buffer, fd); 735 if (!buffer->data) 736 return -ENOMEM; 737 buffer->use_size = buffer->size; 738 buffer->use_data = buffer->data; 739 740 if (sfq->logfile) { /* Write into log file */ 741 size_t rc = fwrite(buffer->data, buffer->size, 1, 742 sfq->logfile); 743 if (rc != 1) 744 pr_err("Failed to write auxiliary data\n"); 745 } 746 } 747 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", 748 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, 749 buffer->size, buffer->use_size); 750 err = s390_cpumsf_samples(sfq, ts); 751 752 /* If non-zero, there is either an error (err < 0) or the buffer is 753 * completely done (err > 0). The error is unrecoverable, usually 754 * some descriptors could not be read successfully, so continue with 755 * the next buffer. 756 * In both cases the parameter 'ts' has been updated. 757 */ 758 if (err) { 759 sfq->buffer = NULL; 760 list_del_init(&buffer->list); 761 auxtrace_buffer__free(buffer); 762 if (err > 0) /* Buffer done, no error */ 763 err = 0; 764 } 765 return err; 766 } 767 768 static struct s390_cpumsf_queue * 769 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) 770 { 771 struct s390_cpumsf_queue *sfq; 772 773 sfq = zalloc(sizeof(struct s390_cpumsf_queue)); 774 if (sfq == NULL) 775 return NULL; 776 777 sfq->sf = sf; 778 sfq->queue_nr = queue_nr; 779 sfq->cpu = -1; 780 if (sf->use_logfile) { 781 char *name; 782 int rc; 783 784 rc = (sf->logdir) 785 ? asprintf(&name, "%s/aux.smp.%02x", 786 sf->logdir, queue_nr) 787 : asprintf(&name, "aux.smp.%02x", queue_nr); 788 if (rc > 0) 789 sfq->logfile = fopen(name, "w"); 790 if (sfq->logfile == NULL) { 791 pr_err("Failed to open auxiliary log file %s," 792 "continue...\n", name); 793 sf->use_logfile = false; 794 } 795 free(name); 796 } 797 return sfq; 798 } 799 800 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, 801 struct auxtrace_queue *queue, 802 unsigned int queue_nr, u64 ts) 803 { 804 struct s390_cpumsf_queue *sfq = queue->priv; 805 806 if (list_empty(&queue->head)) 807 return 0; 808 809 if (sfq == NULL) { 810 sfq = s390_cpumsf_alloc_queue(sf, queue_nr); 811 if (!sfq) 812 return -ENOMEM; 813 queue->priv = sfq; 814 815 if (queue->cpu != -1) 816 sfq->cpu = queue->cpu; 817 } 818 return auxtrace_heap__add(&sf->heap, queue_nr, ts); 819 } 820 821 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) 822 { 823 unsigned int i; 824 int ret = 0; 825 826 for (i = 0; i < sf->queues.nr_queues; i++) { 827 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], 828 i, ts); 829 if (ret) 830 break; 831 } 832 return ret; 833 } 834 835 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) 836 { 837 if (!sf->queues.new_data) 838 return 0; 839 840 sf->queues.new_data = false; 841 return s390_cpumsf_setup_queues(sf, ts); 842 } 843 844 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) 845 { 846 unsigned int queue_nr; 847 u64 ts; 848 int ret; 849 850 while (1) { 851 struct auxtrace_queue *queue; 852 struct s390_cpumsf_queue *sfq; 853 854 if (!sf->heap.heap_cnt) 855 return 0; 856 857 if (sf->heap.heap_array[0].ordinal >= timestamp) 858 return 0; 859 860 queue_nr = sf->heap.heap_array[0].queue_nr; 861 queue = &sf->queues.queue_array[queue_nr]; 862 sfq = queue->priv; 863 864 auxtrace_heap__pop(&sf->heap); 865 if (sf->heap.heap_cnt) { 866 ts = sf->heap.heap_array[0].ordinal + 1; 867 if (ts > timestamp) 868 ts = timestamp; 869 } else { 870 ts = timestamp; 871 } 872 873 ret = s390_cpumsf_run_decoder(sfq, &ts); 874 if (ret < 0) { 875 auxtrace_heap__add(&sf->heap, queue_nr, ts); 876 return ret; 877 } 878 if (!ret) { 879 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); 880 if (ret < 0) 881 return ret; 882 } 883 } 884 return 0; 885 } 886 887 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 888 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 889 { 890 char msg[MAX_AUXTRACE_ERROR_MSG]; 891 union perf_event event; 892 int err; 893 894 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 895 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 896 code, cpu, pid, tid, ip, msg, timestamp); 897 898 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 899 if (err) 900 pr_err("s390 Auxiliary Trace: failed to deliver error event," 901 "error %d\n", err); 902 return err; 903 } 904 905 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 906 { 907 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 908 sample->pid, sample->tid, 0, 909 sample->time); 910 } 911 912 static int 913 s390_cpumsf_process_event(struct perf_session *session, 914 union perf_event *event, 915 struct perf_sample *sample, 916 struct perf_tool *tool) 917 { 918 struct s390_cpumsf *sf = container_of(session->auxtrace, 919 struct s390_cpumsf, 920 auxtrace); 921 u64 timestamp = sample->time; 922 struct evsel *ev_bc000; 923 924 int err = 0; 925 926 if (dump_trace) 927 return 0; 928 929 if (!tool->ordered_events) { 930 pr_err("s390 Auxiliary Trace requires ordered events\n"); 931 return -EINVAL; 932 } 933 934 if (event->header.type == PERF_RECORD_SAMPLE && 935 sample->raw_size) { 936 /* Handle event with raw data */ 937 ev_bc000 = perf_evlist__event2evsel(session->evlist, event); 938 if (ev_bc000 && 939 ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) 940 err = s390_cpumcf_dumpctr(sf, sample); 941 return err; 942 } 943 944 if (event->header.type == PERF_RECORD_AUX && 945 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 946 return s390_cpumsf_lost(sf, sample); 947 948 if (timestamp) { 949 err = s390_cpumsf_update_queues(sf, timestamp); 950 if (!err) 951 err = s390_cpumsf_process_queues(sf, timestamp); 952 } 953 return err; 954 } 955 956 struct s390_cpumsf_synth { 957 struct perf_tool cpumsf_tool; 958 struct perf_session *session; 959 }; 960 961 static int 962 s390_cpumsf_process_auxtrace_event(struct perf_session *session, 963 union perf_event *event __maybe_unused, 964 struct perf_tool *tool __maybe_unused) 965 { 966 struct s390_cpumsf *sf = container_of(session->auxtrace, 967 struct s390_cpumsf, 968 auxtrace); 969 970 int fd = perf_data__fd(session->data); 971 struct auxtrace_buffer *buffer; 972 off_t data_offset; 973 int err; 974 975 if (sf->data_queued) 976 return 0; 977 978 if (perf_data__is_pipe(session->data)) { 979 data_offset = 0; 980 } else { 981 data_offset = lseek(fd, 0, SEEK_CUR); 982 if (data_offset == -1) 983 return -errno; 984 } 985 986 err = auxtrace_queues__add_event(&sf->queues, session, event, 987 data_offset, &buffer); 988 if (err) 989 return err; 990 991 /* Dump here after copying piped trace out of the pipe */ 992 if (dump_trace) { 993 if (auxtrace_buffer__get_data(buffer, fd)) { 994 s390_cpumsf_dump_event(sf, buffer->data, 995 buffer->size); 996 auxtrace_buffer__put_data(buffer); 997 } 998 } 999 return 0; 1000 } 1001 1002 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) 1003 { 1004 } 1005 1006 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, 1007 struct perf_tool *tool __maybe_unused) 1008 { 1009 return 0; 1010 } 1011 1012 static void s390_cpumsf_free_queues(struct perf_session *session) 1013 { 1014 struct s390_cpumsf *sf = container_of(session->auxtrace, 1015 struct s390_cpumsf, 1016 auxtrace); 1017 struct auxtrace_queues *queues = &sf->queues; 1018 unsigned int i; 1019 1020 for (i = 0; i < queues->nr_queues; i++) { 1021 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) 1022 queues->queue_array[i].priv; 1023 1024 if (sfq != NULL) { 1025 if (sfq->logfile) { 1026 fclose(sfq->logfile); 1027 sfq->logfile = NULL; 1028 } 1029 if (sfq->logfile_ctr) { 1030 fclose(sfq->logfile_ctr); 1031 sfq->logfile_ctr = NULL; 1032 } 1033 } 1034 zfree(&queues->queue_array[i].priv); 1035 } 1036 auxtrace_queues__free(queues); 1037 } 1038 1039 static void s390_cpumsf_free(struct perf_session *session) 1040 { 1041 struct s390_cpumsf *sf = container_of(session->auxtrace, 1042 struct s390_cpumsf, 1043 auxtrace); 1044 1045 auxtrace_heap__free(&sf->heap); 1046 s390_cpumsf_free_queues(session); 1047 session->auxtrace = NULL; 1048 zfree(&sf->logdir); 1049 free(sf); 1050 } 1051 1052 static int s390_cpumsf_get_type(const char *cpuid) 1053 { 1054 int ret, family = 0; 1055 1056 ret = sscanf(cpuid, "%*[^,],%u", &family); 1057 return (ret == 1) ? family : 0; 1058 } 1059 1060 /* Check itrace options set on perf report command. 1061 * Return true, if none are set or all options specified can be 1062 * handled on s390 (currently only option 'd' for logging. 1063 * Return false otherwise. 1064 */ 1065 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) 1066 { 1067 bool ison = false; 1068 1069 if (!itops || !itops->set) 1070 return true; 1071 ison = itops->inject || itops->instructions || itops->branches || 1072 itops->transactions || itops->ptwrites || 1073 itops->pwr_events || itops->errors || 1074 itops->dont_decode || itops->calls || itops->returns || 1075 itops->callchain || itops->thread_stack || 1076 itops->last_branch; 1077 if (!ison) 1078 return true; 1079 pr_err("Unsupported --itrace options specified\n"); 1080 return false; 1081 } 1082 1083 /* Check for AUXTRACE dump directory if it is needed. 1084 * On failure print an error message but continue. 1085 * Return 0 on wrong keyword in config file and 1 otherwise. 1086 */ 1087 static int s390_cpumsf__config(const char *var, const char *value, void *cb) 1088 { 1089 struct s390_cpumsf *sf = cb; 1090 struct stat stbuf; 1091 int rc; 1092 1093 if (strcmp(var, "auxtrace.dumpdir")) 1094 return 0; 1095 sf->logdir = strdup(value); 1096 if (sf->logdir == NULL) { 1097 pr_err("Failed to find auxtrace log directory %s," 1098 " continue with current directory...\n", value); 1099 return 1; 1100 } 1101 rc = stat(sf->logdir, &stbuf); 1102 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { 1103 pr_err("Missing auxtrace log directory %s," 1104 " continue with current directory...\n", value); 1105 zfree(&sf->logdir); 1106 } 1107 return 1; 1108 } 1109 1110 int s390_cpumsf_process_auxtrace_info(union perf_event *event, 1111 struct perf_session *session) 1112 { 1113 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1114 struct s390_cpumsf *sf; 1115 int err; 1116 1117 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info)) 1118 return -EINVAL; 1119 1120 sf = zalloc(sizeof(struct s390_cpumsf)); 1121 if (sf == NULL) 1122 return -ENOMEM; 1123 1124 if (!check_auxtrace_itrace(session->itrace_synth_opts)) { 1125 err = -EINVAL; 1126 goto err_free; 1127 } 1128 sf->use_logfile = session->itrace_synth_opts->log; 1129 if (sf->use_logfile) 1130 perf_config(s390_cpumsf__config, sf); 1131 1132 err = auxtrace_queues__init(&sf->queues); 1133 if (err) 1134 goto err_free; 1135 1136 sf->session = session; 1137 sf->machine = &session->machines.host; /* No kvm support */ 1138 sf->auxtrace_type = auxtrace_info->type; 1139 sf->pmu_type = PERF_TYPE_RAW; 1140 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); 1141 1142 sf->auxtrace.process_event = s390_cpumsf_process_event; 1143 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; 1144 sf->auxtrace.flush_events = s390_cpumsf_flush; 1145 sf->auxtrace.free_events = s390_cpumsf_free_events; 1146 sf->auxtrace.free = s390_cpumsf_free; 1147 session->auxtrace = &sf->auxtrace; 1148 1149 if (dump_trace) 1150 return 0; 1151 1152 err = auxtrace_queues__process_index(&sf->queues, session); 1153 if (err) 1154 goto err_free_queues; 1155 1156 if (sf->queues.populated) 1157 sf->data_queued = true; 1158 1159 return 0; 1160 1161 err_free_queues: 1162 auxtrace_queues__free(&sf->queues); 1163 session->auxtrace = NULL; 1164 err_free: 1165 zfree(&sf->logdir); 1166 free(sf); 1167 return err; 1168 } 1169