1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2018 4 * Auxtrace support for s390 CPU-Measurement Sampling Facility 5 * 6 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 7 * 8 * Auxiliary traces are collected during 'perf record' using rbd000 event. 9 * Several PERF_RECORD_XXX are generated during recording: 10 * 11 * PERF_RECORD_AUX: 12 * Records that new data landed in the AUX buffer part. 13 * PERF_RECORD_AUXTRACE: 14 * Defines auxtrace data. Followed by the actual data. The contents of 15 * the auxtrace data is dependent on the event and the CPU. 16 * This record is generated by perf record command. For details 17 * see Documentation/perf.data-file-format.txt. 18 * PERF_RECORD_AUXTRACE_INFO: 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 * record is generated during 'perf record' command. Each record contains up 21 * to 256 entries describing offset and size of the AUXTRACE data in the 22 * perf.data file. 23 * PERF_RECORD_AUXTRACE_ERROR: 24 * Indicates an error during AUXTRACE collection such as buffer overflow. 25 * PERF_RECORD_FINISHED_ROUND: 26 * Perf events are not necessarily in time stamp order, as they can be 27 * collected in parallel on different CPUs. If the events should be 28 * processed in time order they need to be sorted first. 29 * Perf report guarantees that there is no reordering over a 30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a 31 * time stamp lower than this record are processed (and displayed) before 32 * the succeeding perf record are processed. 33 * 34 * These records are evaluated during perf report command. 35 * 36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for 37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() 38 * below. 39 * Auxiliary trace data is collected per CPU. To merge the data into the report 40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace 41 * data is in ascending order. 42 * 43 * Each queue has a double linked list of auxtrace_buffers. This list contains 44 * the offset and size of a CPU's auxtrace data. During auxtrace processing 45 * the data portion is mmap()'ed. 46 * 47 * To sort the queues in chronological order, all queue access is controlled 48 * by the auxtrace_heap. This is basicly a stack, each stack element has two 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 * (nearest) time stamp is at the top. That sort order is maintained at all 52 * times! 53 * 54 * After the auxtrace infrastructure has been setup, the auxtrace queues are 55 * filled with data (offset/size pairs) and the auxtrace_heap is populated. 56 * 57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. 58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of 59 * the perf record is compared with the time stamp located on the auxtrace_heap 60 * top element. If that time stamp is lower than the time stamp from the 61 * record sample, the auxtrace queues will be processed. As auxtrace queues 62 * control many auxtrace_buffers and each buffer can be quite large, the 63 * auxtrace buffer might be processed only partially. In this case the 64 * position in the auxtrace_buffer of that queue is remembered and the time 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 * current auxtrace_heap top. 67 * 68 * 3. Auxtrace_queues might run of out data and are feeded by the 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 * 71 * Event Generation 72 * Each sampling-data entry in the auxilary trace data generates a perf sample. 73 * This sample is filled 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 * etc. This sample is processed with perf_session__deliver_synth_event() to 76 * be included into the GUI. 77 * 78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining 79 * auxiliary traces entries until the time stamp of this record is reached 80 * auxtrace_heap top. This is triggered by ordered_event->deliver(). 81 * 82 * 83 * Perf event processing. 84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. 85 * This is the function call sequence: 86 * 87 * __cmd_report() 88 * | 89 * perf_session__process_events() 90 * | 91 * __perf_session__process_events() 92 * | 93 * perf_session__process_event() 94 * | This functions splits the PERF_RECORD_XXX records. 95 * | - Those generated by perf record command (type number equal or higher 96 * | than PERF_RECORD_USER_TYPE_START) are handled by 97 * | perf_session__process_user_event(see below) 98 * | - Those generated by the kernel are handled by 99 * | perf_evlist__parse_sample_timestamp() 100 * | 101 * perf_evlist__parse_sample_timestamp() 102 * | Extract time stamp from sample data. 103 * | 104 * perf_session__queue_event() 105 * | If timestamp is positive the sample is entered into an ordered_event 106 * | list, sort order is the timestamp. The event processing is deferred until 107 * | later (see perf_session__process_user_event()). 108 * | Other timestamps (0 or -1) are handled immediately by 109 * | perf_session__deliver_event(). These are events generated at start up 110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* 111 * | records. They are needed to create a list of running processes and its 112 * | memory mappings and layout. They are needed at the beginning to enable 113 * | command perf report to create process trees and memory mappings. 114 * | 115 * perf_session__deliver_event() 116 * | Delivers a PERF_RECORD_XXX entry for handling. 117 * | 118 * auxtrace__process_event() 119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with 120 * | time stamps from the auxiliary trace buffers. This enables 121 * | synchronization between auxiliary trace data and the events on the 122 * | perf.data file. 123 * | 124 * machine__deliver_event() 125 * | Handles the PERF_RECORD_XXX event. This depends on the record type. 126 * It might update the process tree, update a process memory map or enter 127 * a sample with IP and call back chain data into GUI data pool. 128 * 129 * 130 * Deferred processing determined by perf_session__process_user_event() is 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These 132 * are generated during command perf record. 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was 135 * built up while reading the perf.data file. 136 * Each event is now processed by calling perf_session__deliver_event(). 137 * This enables time synchronization between the data in the perf.data file and 138 * the data in the auxiliary trace buffers. 139 */ 140 141 #include <endian.h> 142 #include <errno.h> 143 #include <byteswap.h> 144 #include <inttypes.h> 145 #include <linux/kernel.h> 146 #include <linux/types.h> 147 #include <linux/bitops.h> 148 #include <linux/log2.h> 149 150 #include <sys/stat.h> 151 #include <sys/types.h> 152 153 #include "cpumap.h" 154 #include "color.h" 155 #include "evsel.h" 156 #include "evlist.h" 157 #include "machine.h" 158 #include "session.h" 159 #include "util.h" 160 #include "thread.h" 161 #include "debug.h" 162 #include "auxtrace.h" 163 #include "s390-cpumsf.h" 164 #include "s390-cpumsf-kernel.h" 165 #include "config.h" 166 167 struct s390_cpumsf { 168 struct auxtrace auxtrace; 169 struct auxtrace_queues queues; 170 struct auxtrace_heap heap; 171 struct perf_session *session; 172 struct machine *machine; 173 u32 auxtrace_type; 174 u32 pmu_type; 175 u16 machine_type; 176 bool data_queued; 177 bool use_logfile; 178 char *logdir; 179 }; 180 181 struct s390_cpumsf_queue { 182 struct s390_cpumsf *sf; 183 unsigned int queue_nr; 184 struct auxtrace_buffer *buffer; 185 int cpu; 186 FILE *logfile; 187 }; 188 189 /* Display s390 CPU measurement facility basic-sampling data entry */ 190 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 191 struct hws_basic_entry *basic) 192 { 193 if (basic->def != 1) { 194 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 195 return false; 196 } 197 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" 198 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" 199 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", 200 pos, basic->def, basic->U, 201 basic->T ? 'T' : ' ', 202 basic->W ? 'W' : ' ', 203 basic->P ? 'P' : ' ', 204 basic->I ? 'I' : ' ', 205 basic->AS, basic->prim_asn, basic->ia, basic->CL, 206 basic->hpp, basic->gpp); 207 return true; 208 } 209 210 /* Display s390 CPU measurement facility diagnostic-sampling data entry */ 211 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 212 struct hws_diag_entry *diag) 213 { 214 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 215 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 216 return false; 217 } 218 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", 219 pos, diag->def, diag->I ? 'I' : ' '); 220 return true; 221 } 222 223 /* Return TOD timestamp contained in an trailer entry */ 224 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) 225 { 226 /* te->t set: TOD in STCKE format, bytes 8-15 227 * to->t not set: TOD in STCK format, bytes 0-7 228 */ 229 unsigned long long ts; 230 231 memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); 232 return ts; 233 } 234 235 /* Display s390 CPU measurement facility trailer entry */ 236 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 237 struct hws_trailer_entry *te) 238 { 239 if (te->bsdes != sizeof(struct hws_basic_entry)) { 240 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 241 return false; 242 } 243 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 244 " dsdes:%d Overflow:%lld Time:%#llx\n" 245 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", 246 pos, 247 te->f ? 'F' : ' ', 248 te->a ? 'A' : ' ', 249 te->t ? 'T' : ' ', 250 te->bsdes, te->dsdes, te->overflow, 251 trailer_timestamp(te), te->clock_base, te->progusage2, 252 te->progusage[0], te->progusage[1]); 253 return true; 254 } 255 256 /* Test a sample data block. It must be 4KB or a multiple thereof in size and 257 * 4KB page aligned. Each sample data page has a trailer entry at the 258 * end which contains the sample entry data sizes. 259 * 260 * Return true if the sample data block passes the checks and set the 261 * basic set entry size and diagnostic set entry size. 262 * 263 * Return false on failure. 264 * 265 * Note: Old hardware does not set the basic or diagnostic entry sizes 266 * in the trailer entry. Use the type number instead. 267 */ 268 static bool s390_cpumsf_validate(int machine_type, 269 unsigned char *buf, size_t len, 270 unsigned short *bsdes, 271 unsigned short *dsdes) 272 { 273 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; 274 struct hws_trailer_entry *te; 275 276 *dsdes = *bsdes = 0; 277 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 278 return false; 279 if (basic->def != 1) /* No basic set entry, must be first */ 280 return false; 281 /* Check for trailer entry at end of SDB */ 282 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 283 - sizeof(*te)); 284 *bsdes = te->bsdes; 285 *dsdes = te->dsdes; 286 if (!te->bsdes && !te->dsdes) { 287 /* Very old hardware, use CPUID */ 288 switch (machine_type) { 289 case 2097: 290 case 2098: 291 *dsdes = 64; 292 *bsdes = 32; 293 break; 294 case 2817: 295 case 2818: 296 *dsdes = 74; 297 *bsdes = 32; 298 break; 299 case 2827: 300 case 2828: 301 *dsdes = 85; 302 *bsdes = 32; 303 break; 304 default: 305 /* Illegal trailer entry */ 306 return false; 307 } 308 } 309 return true; 310 } 311 312 /* Return true if there is room for another entry */ 313 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) 314 { 315 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); 316 317 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) 318 return false; 319 return true; 320 } 321 322 /* Dump an auxiliary buffer. These buffers are multiple of 323 * 4KB SDB pages. 324 */ 325 static void s390_cpumsf_dump(struct s390_cpumsf *sf, 326 unsigned char *buf, size_t len) 327 { 328 const char *color = PERF_COLOR_BLUE; 329 struct hws_basic_entry *basic; 330 struct hws_diag_entry *diag; 331 unsigned short bsdes, dsdes; 332 size_t pos = 0; 333 334 color_fprintf(stdout, color, 335 ". ... s390 AUX data: size %zu bytes\n", 336 len); 337 338 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 339 &dsdes)) { 340 pr_err("Invalid AUX trace data block size:%zu" 341 " (type:%d bsdes:%hd dsdes:%hd)\n", 342 len, sf->machine_type, bsdes, dsdes); 343 return; 344 } 345 346 /* s390 kernel always returns 4KB blocks fully occupied, 347 * no partially filled SDBs. 348 */ 349 while (pos < len) { 350 /* Handle Basic entry */ 351 basic = (struct hws_basic_entry *)(buf + pos); 352 if (s390_cpumsf_basic_show(color, pos, basic)) 353 pos += bsdes; 354 else 355 return; 356 357 /* Handle Diagnostic entry */ 358 diag = (struct hws_diag_entry *)(buf + pos); 359 if (s390_cpumsf_diag_show(color, pos, diag)) 360 pos += dsdes; 361 else 362 return; 363 364 /* Check for trailer entry */ 365 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 366 /* Show trailer entry */ 367 struct hws_trailer_entry te; 368 369 pos = (pos + S390_CPUMSF_PAGESZ) 370 & ~(S390_CPUMSF_PAGESZ - 1); 371 pos -= sizeof(te); 372 memcpy(&te, buf + pos, sizeof(te)); 373 /* Set descriptor sizes in case of old hardware 374 * where these values are not set. 375 */ 376 te.bsdes = bsdes; 377 te.dsdes = dsdes; 378 if (s390_cpumsf_trailer_show(color, pos, &te)) 379 pos += sizeof(te); 380 else 381 return; 382 } 383 } 384 } 385 386 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, 387 size_t len) 388 { 389 printf(".\n"); 390 s390_cpumsf_dump(sf, buf, len); 391 } 392 393 #define S390_LPP_PID_MASK 0xffffffff 394 395 static bool s390_cpumsf_make_event(size_t pos, 396 struct hws_basic_entry *basic, 397 struct s390_cpumsf_queue *sfq) 398 { 399 struct perf_sample sample = { 400 .ip = basic->ia, 401 .pid = basic->hpp & S390_LPP_PID_MASK, 402 .tid = basic->hpp & S390_LPP_PID_MASK, 403 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, 404 .cpu = sfq->cpu, 405 .period = 1 406 }; 407 union perf_event event; 408 409 memset(&event, 0, sizeof(event)); 410 if (basic->CL == 1) /* Native LPAR mode */ 411 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 412 : PERF_RECORD_MISC_KERNEL; 413 else if (basic->CL == 2) /* Guest kernel/user space */ 414 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 415 : PERF_RECORD_MISC_GUEST_KERNEL; 416 else if (basic->gpp || basic->prim_asn != 0xffff) 417 /* Use heuristics on old hardware */ 418 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 419 : PERF_RECORD_MISC_GUEST_KERNEL; 420 else 421 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 422 : PERF_RECORD_MISC_KERNEL; 423 424 event.sample.header.type = PERF_RECORD_SAMPLE; 425 event.sample.header.misc = sample.cpumode; 426 event.sample.header.size = sizeof(struct perf_event_header); 427 428 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", 429 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, 430 sample.tid, sample.cpumode, sample.cpu); 431 if (perf_session__deliver_synth_event(sfq->sf->session, &event, 432 &sample)) { 433 pr_err("s390 Auxiliary Trace: failed to deliver event\n"); 434 return false; 435 } 436 return true; 437 } 438 439 static unsigned long long get_trailer_time(const unsigned char *buf) 440 { 441 struct hws_trailer_entry *te; 442 unsigned long long aux_time; 443 444 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 445 - sizeof(*te)); 446 447 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ 448 return 0; 449 450 /* Correct calculation to convert time stamp in trailer entry to 451 * nano seconds (taken from arch/s390 function tod_to_ns()). 452 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 453 */ 454 aux_time = trailer_timestamp(te) - te->progusage2; 455 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 456 return aux_time; 457 } 458 459 /* Process the data samples of a single queue. The first parameter is a 460 * pointer to the queue, the second parameter is the time stamp. This 461 * is the time stamp: 462 * - of the event that triggered this processing. 463 * - or the time stamp when the last proccesing of this queue stopped. 464 * In this case it stopped at a 4KB page boundary and record the 465 * position on where to continue processing on the next invocation 466 * (see buffer->use_data and buffer->use_size). 467 * 468 * When this function returns the second parameter is updated to 469 * reflect the time stamp of the last processed auxiliary data entry 470 * (taken from the trailer entry of that page). The caller uses this 471 * returned time stamp to record the last processed entry in this 472 * queue. 473 * 474 * The function returns: 475 * 0: Processing successful. The second parameter returns the 476 * time stamp from the trailer entry until which position 477 * processing took place. Subsequent calls resume from this 478 * position. 479 * <0: An error occurred during processing. The second parameter 480 * returns the maximum time stamp. 481 * >0: Done on this queue. The second parameter returns the 482 * maximum time stamp. 483 */ 484 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) 485 { 486 struct s390_cpumsf *sf = sfq->sf; 487 unsigned char *buf = sfq->buffer->use_data; 488 size_t len = sfq->buffer->use_size; 489 struct hws_basic_entry *basic; 490 unsigned short bsdes, dsdes; 491 size_t pos = 0; 492 int err = 1; 493 u64 aux_ts; 494 495 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 496 &dsdes)) { 497 *ts = ~0ULL; 498 return -1; 499 } 500 501 /* Get trailer entry time stamp and check if entries in 502 * this auxiliary page are ready for processing. If the 503 * time stamp of the first entry is too high, whole buffer 504 * can be skipped. In this case return time stamp. 505 */ 506 aux_ts = get_trailer_time(buf); 507 if (!aux_ts) { 508 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", 509 (s64)sfq->buffer->data_offset); 510 aux_ts = ~0ULL; 511 goto out; 512 } 513 if (aux_ts > *ts) { 514 *ts = aux_ts; 515 return 0; 516 } 517 518 while (pos < len) { 519 /* Handle Basic entry */ 520 basic = (struct hws_basic_entry *)(buf + pos); 521 if (s390_cpumsf_make_event(pos, basic, sfq)) 522 pos += bsdes; 523 else { 524 err = -EBADF; 525 goto out; 526 } 527 528 pos += dsdes; /* Skip diagnositic entry */ 529 530 /* Check for trailer entry */ 531 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 532 pos = (pos + S390_CPUMSF_PAGESZ) 533 & ~(S390_CPUMSF_PAGESZ - 1); 534 /* Check existence of next page */ 535 if (pos >= len) 536 break; 537 aux_ts = get_trailer_time(buf + pos); 538 if (!aux_ts) { 539 aux_ts = ~0ULL; 540 goto out; 541 } 542 if (aux_ts > *ts) { 543 *ts = aux_ts; 544 sfq->buffer->use_data += pos; 545 sfq->buffer->use_size -= pos; 546 return 0; 547 } 548 } 549 } 550 out: 551 *ts = aux_ts; 552 sfq->buffer->use_size = 0; 553 sfq->buffer->use_data = NULL; 554 return err; /* Buffer completely scanned or error */ 555 } 556 557 /* Run the s390 auxiliary trace decoder. 558 * Select the queue buffer to operate on, the caller already selected 559 * the proper queue, depending on second parameter 'ts'. 560 * This is the time stamp until which the auxiliary entries should 561 * be processed. This value is updated by called functions and 562 * returned to the caller. 563 * 564 * Resume processing in the current buffer. If there is no buffer 565 * get a new buffer from the queue and setup start position for 566 * processing. 567 * When a buffer is completely processed remove it from the queue 568 * before returning. 569 * 570 * This function returns 571 * 1: When the queue is empty. Second parameter will be set to 572 * maximum time stamp. 573 * 0: Normal processing done. 574 * <0: Error during queue buffer setup. This causes the caller 575 * to stop processing completely. 576 */ 577 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, 578 u64 *ts) 579 { 580 581 struct auxtrace_buffer *buffer; 582 struct auxtrace_queue *queue; 583 int err; 584 585 queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; 586 587 /* Get buffer and last position in buffer to resume 588 * decoding the auxiliary entries. One buffer might be large 589 * and decoding might stop in between. This depends on the time 590 * stamp of the trailer entry in each page of the auxiliary 591 * data and the time stamp of the event triggering the decoding. 592 */ 593 if (sfq->buffer == NULL) { 594 sfq->buffer = buffer = auxtrace_buffer__next(queue, 595 sfq->buffer); 596 if (!buffer) { 597 *ts = ~0ULL; 598 return 1; /* Processing done on this queue */ 599 } 600 /* Start with a new buffer on this queue */ 601 if (buffer->data) { 602 buffer->use_size = buffer->size; 603 buffer->use_data = buffer->data; 604 } 605 if (sfq->logfile) { /* Write into log file */ 606 size_t rc = fwrite(buffer->data, buffer->size, 1, 607 sfq->logfile); 608 if (rc != 1) 609 pr_err("Failed to write auxiliary data\n"); 610 } 611 } else 612 buffer = sfq->buffer; 613 614 if (!buffer->data) { 615 int fd = perf_data__fd(sfq->sf->session->data); 616 617 buffer->data = auxtrace_buffer__get_data(buffer, fd); 618 if (!buffer->data) 619 return -ENOMEM; 620 buffer->use_size = buffer->size; 621 buffer->use_data = buffer->data; 622 623 if (sfq->logfile) { /* Write into log file */ 624 size_t rc = fwrite(buffer->data, buffer->size, 1, 625 sfq->logfile); 626 if (rc != 1) 627 pr_err("Failed to write auxiliary data\n"); 628 } 629 } 630 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", 631 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, 632 buffer->size, buffer->use_size); 633 err = s390_cpumsf_samples(sfq, ts); 634 635 /* If non-zero, there is either an error (err < 0) or the buffer is 636 * completely done (err > 0). The error is unrecoverable, usually 637 * some descriptors could not be read successfully, so continue with 638 * the next buffer. 639 * In both cases the parameter 'ts' has been updated. 640 */ 641 if (err) { 642 sfq->buffer = NULL; 643 list_del(&buffer->list); 644 auxtrace_buffer__free(buffer); 645 if (err > 0) /* Buffer done, no error */ 646 err = 0; 647 } 648 return err; 649 } 650 651 static struct s390_cpumsf_queue * 652 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) 653 { 654 struct s390_cpumsf_queue *sfq; 655 656 sfq = zalloc(sizeof(struct s390_cpumsf_queue)); 657 if (sfq == NULL) 658 return NULL; 659 660 sfq->sf = sf; 661 sfq->queue_nr = queue_nr; 662 sfq->cpu = -1; 663 if (sf->use_logfile) { 664 char *name; 665 int rc; 666 667 rc = (sf->logdir) 668 ? asprintf(&name, "%s/aux.smp.%02x", 669 sf->logdir, queue_nr) 670 : asprintf(&name, "aux.smp.%02x", queue_nr); 671 if (rc > 0) 672 sfq->logfile = fopen(name, "w"); 673 if (sfq->logfile == NULL) { 674 pr_err("Failed to open auxiliary log file %s," 675 "continue...\n", name); 676 sf->use_logfile = false; 677 } 678 free(name); 679 } 680 return sfq; 681 } 682 683 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, 684 struct auxtrace_queue *queue, 685 unsigned int queue_nr, u64 ts) 686 { 687 struct s390_cpumsf_queue *sfq = queue->priv; 688 689 if (list_empty(&queue->head)) 690 return 0; 691 692 if (sfq == NULL) { 693 sfq = s390_cpumsf_alloc_queue(sf, queue_nr); 694 if (!sfq) 695 return -ENOMEM; 696 queue->priv = sfq; 697 698 if (queue->cpu != -1) 699 sfq->cpu = queue->cpu; 700 } 701 return auxtrace_heap__add(&sf->heap, queue_nr, ts); 702 } 703 704 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) 705 { 706 unsigned int i; 707 int ret = 0; 708 709 for (i = 0; i < sf->queues.nr_queues; i++) { 710 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], 711 i, ts); 712 if (ret) 713 break; 714 } 715 return ret; 716 } 717 718 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) 719 { 720 if (!sf->queues.new_data) 721 return 0; 722 723 sf->queues.new_data = false; 724 return s390_cpumsf_setup_queues(sf, ts); 725 } 726 727 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) 728 { 729 unsigned int queue_nr; 730 u64 ts; 731 int ret; 732 733 while (1) { 734 struct auxtrace_queue *queue; 735 struct s390_cpumsf_queue *sfq; 736 737 if (!sf->heap.heap_cnt) 738 return 0; 739 740 if (sf->heap.heap_array[0].ordinal >= timestamp) 741 return 0; 742 743 queue_nr = sf->heap.heap_array[0].queue_nr; 744 queue = &sf->queues.queue_array[queue_nr]; 745 sfq = queue->priv; 746 747 auxtrace_heap__pop(&sf->heap); 748 if (sf->heap.heap_cnt) { 749 ts = sf->heap.heap_array[0].ordinal + 1; 750 if (ts > timestamp) 751 ts = timestamp; 752 } else { 753 ts = timestamp; 754 } 755 756 ret = s390_cpumsf_run_decoder(sfq, &ts); 757 if (ret < 0) { 758 auxtrace_heap__add(&sf->heap, queue_nr, ts); 759 return ret; 760 } 761 if (!ret) { 762 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); 763 if (ret < 0) 764 return ret; 765 } 766 } 767 return 0; 768 } 769 770 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 771 pid_t pid, pid_t tid, u64 ip) 772 { 773 char msg[MAX_AUXTRACE_ERROR_MSG]; 774 union perf_event event; 775 int err; 776 777 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 778 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 779 code, cpu, pid, tid, ip, msg); 780 781 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 782 if (err) 783 pr_err("s390 Auxiliary Trace: failed to deliver error event," 784 "error %d\n", err); 785 return err; 786 } 787 788 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 789 { 790 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 791 sample->pid, sample->tid, 0); 792 } 793 794 static int 795 s390_cpumsf_process_event(struct perf_session *session __maybe_unused, 796 union perf_event *event, 797 struct perf_sample *sample, 798 struct perf_tool *tool) 799 { 800 struct s390_cpumsf *sf = container_of(session->auxtrace, 801 struct s390_cpumsf, 802 auxtrace); 803 u64 timestamp = sample->time; 804 int err = 0; 805 806 if (dump_trace) 807 return 0; 808 809 if (!tool->ordered_events) { 810 pr_err("s390 Auxiliary Trace requires ordered events\n"); 811 return -EINVAL; 812 } 813 814 if (event->header.type == PERF_RECORD_AUX && 815 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 816 return s390_cpumsf_lost(sf, sample); 817 818 if (timestamp) { 819 err = s390_cpumsf_update_queues(sf, timestamp); 820 if (!err) 821 err = s390_cpumsf_process_queues(sf, timestamp); 822 } 823 return err; 824 } 825 826 struct s390_cpumsf_synth { 827 struct perf_tool cpumsf_tool; 828 struct perf_session *session; 829 }; 830 831 static int 832 s390_cpumsf_process_auxtrace_event(struct perf_session *session, 833 union perf_event *event __maybe_unused, 834 struct perf_tool *tool __maybe_unused) 835 { 836 struct s390_cpumsf *sf = container_of(session->auxtrace, 837 struct s390_cpumsf, 838 auxtrace); 839 840 int fd = perf_data__fd(session->data); 841 struct auxtrace_buffer *buffer; 842 off_t data_offset; 843 int err; 844 845 if (sf->data_queued) 846 return 0; 847 848 if (perf_data__is_pipe(session->data)) { 849 data_offset = 0; 850 } else { 851 data_offset = lseek(fd, 0, SEEK_CUR); 852 if (data_offset == -1) 853 return -errno; 854 } 855 856 err = auxtrace_queues__add_event(&sf->queues, session, event, 857 data_offset, &buffer); 858 if (err) 859 return err; 860 861 /* Dump here after copying piped trace out of the pipe */ 862 if (dump_trace) { 863 if (auxtrace_buffer__get_data(buffer, fd)) { 864 s390_cpumsf_dump_event(sf, buffer->data, 865 buffer->size); 866 auxtrace_buffer__put_data(buffer); 867 } 868 } 869 return 0; 870 } 871 872 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) 873 { 874 } 875 876 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, 877 struct perf_tool *tool __maybe_unused) 878 { 879 return 0; 880 } 881 882 static void s390_cpumsf_free_queues(struct perf_session *session) 883 { 884 struct s390_cpumsf *sf = container_of(session->auxtrace, 885 struct s390_cpumsf, 886 auxtrace); 887 struct auxtrace_queues *queues = &sf->queues; 888 unsigned int i; 889 890 for (i = 0; i < queues->nr_queues; i++) { 891 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) 892 queues->queue_array[i].priv; 893 894 if (sfq != NULL && sfq->logfile) { 895 fclose(sfq->logfile); 896 sfq->logfile = NULL; 897 } 898 zfree(&queues->queue_array[i].priv); 899 } 900 auxtrace_queues__free(queues); 901 } 902 903 static void s390_cpumsf_free(struct perf_session *session) 904 { 905 struct s390_cpumsf *sf = container_of(session->auxtrace, 906 struct s390_cpumsf, 907 auxtrace); 908 909 auxtrace_heap__free(&sf->heap); 910 s390_cpumsf_free_queues(session); 911 session->auxtrace = NULL; 912 free(sf->logdir); 913 free(sf); 914 } 915 916 static int s390_cpumsf_get_type(const char *cpuid) 917 { 918 int ret, family = 0; 919 920 ret = sscanf(cpuid, "%*[^,],%u", &family); 921 return (ret == 1) ? family : 0; 922 } 923 924 /* Check itrace options set on perf report command. 925 * Return true, if none are set or all options specified can be 926 * handled on s390 (currently only option 'd' for logging. 927 * Return false otherwise. 928 */ 929 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) 930 { 931 bool ison = false; 932 933 if (!itops || !itops->set) 934 return true; 935 ison = itops->inject || itops->instructions || itops->branches || 936 itops->transactions || itops->ptwrites || 937 itops->pwr_events || itops->errors || 938 itops->dont_decode || itops->calls || itops->returns || 939 itops->callchain || itops->thread_stack || 940 itops->last_branch; 941 if (!ison) 942 return true; 943 pr_err("Unsupported --itrace options specified\n"); 944 return false; 945 } 946 947 /* Check for AUXTRACE dump directory if it is needed. 948 * On failure print an error message but continue. 949 * Return 0 on wrong keyword in config file and 1 otherwise. 950 */ 951 static int s390_cpumsf__config(const char *var, const char *value, void *cb) 952 { 953 struct s390_cpumsf *sf = cb; 954 struct stat stbuf; 955 int rc; 956 957 if (strcmp(var, "auxtrace.dumpdir")) 958 return 0; 959 sf->logdir = strdup(value); 960 if (sf->logdir == NULL) { 961 pr_err("Failed to find auxtrace log directory %s," 962 " continue with current directory...\n", value); 963 return 1; 964 } 965 rc = stat(sf->logdir, &stbuf); 966 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { 967 pr_err("Missing auxtrace log directory %s," 968 " continue with current directory...\n", value); 969 free(sf->logdir); 970 sf->logdir = NULL; 971 } 972 return 1; 973 } 974 975 int s390_cpumsf_process_auxtrace_info(union perf_event *event, 976 struct perf_session *session) 977 { 978 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 979 struct s390_cpumsf *sf; 980 int err; 981 982 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) 983 return -EINVAL; 984 985 sf = zalloc(sizeof(struct s390_cpumsf)); 986 if (sf == NULL) 987 return -ENOMEM; 988 989 if (!check_auxtrace_itrace(session->itrace_synth_opts)) { 990 err = -EINVAL; 991 goto err_free; 992 } 993 sf->use_logfile = session->itrace_synth_opts->log; 994 if (sf->use_logfile) 995 perf_config(s390_cpumsf__config, sf); 996 997 err = auxtrace_queues__init(&sf->queues); 998 if (err) 999 goto err_free; 1000 1001 sf->session = session; 1002 sf->machine = &session->machines.host; /* No kvm support */ 1003 sf->auxtrace_type = auxtrace_info->type; 1004 sf->pmu_type = PERF_TYPE_RAW; 1005 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); 1006 1007 sf->auxtrace.process_event = s390_cpumsf_process_event; 1008 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; 1009 sf->auxtrace.flush_events = s390_cpumsf_flush; 1010 sf->auxtrace.free_events = s390_cpumsf_free_events; 1011 sf->auxtrace.free = s390_cpumsf_free; 1012 session->auxtrace = &sf->auxtrace; 1013 1014 if (dump_trace) 1015 return 0; 1016 1017 err = auxtrace_queues__process_index(&sf->queues, session); 1018 if (err) 1019 goto err_free_queues; 1020 1021 if (sf->queues.populated) 1022 sf->data_queued = true; 1023 1024 return 0; 1025 1026 err_free_queues: 1027 auxtrace_queues__free(&sf->queues); 1028 session->auxtrace = NULL; 1029 err_free: 1030 free(sf->logdir); 1031 free(sf); 1032 return err; 1033 } 1034