1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2018 4 * Auxtrace support for s390 CPU-Measurement Sampling Facility 5 * 6 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 7 * 8 * Auxiliary traces are collected during 'perf record' using rbd000 event. 9 * Several PERF_RECORD_XXX are generated during recording: 10 * 11 * PERF_RECORD_AUX: 12 * Records that new data landed in the AUX buffer part. 13 * PERF_RECORD_AUXTRACE: 14 * Defines auxtrace data. Followed by the actual data. The contents of 15 * the auxtrace data is dependent on the event and the CPU. 16 * This record is generated by perf record command. For details 17 * see Documentation/perf.data-file-format.txt. 18 * PERF_RECORD_AUXTRACE_INFO: 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 * record is generated during 'perf record' command. Each record contains up 21 * to 256 entries describing offset and size of the AUXTRACE data in the 22 * perf.data file. 23 * PERF_RECORD_AUXTRACE_ERROR: 24 * Indicates an error during AUXTRACE collection such as buffer overflow. 25 * PERF_RECORD_FINISHED_ROUND: 26 * Perf events are not necessarily in time stamp order, as they can be 27 * collected in parallel on different CPUs. If the events should be 28 * processed in time order they need to be sorted first. 29 * Perf report guarantees that there is no reordering over a 30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a 31 * time stamp lower than this record are processed (and displayed) before 32 * the succeeding perf record are processed. 33 * 34 * These records are evaluated during perf report command. 35 * 36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for 37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() 38 * below. 39 * Auxiliary trace data is collected per CPU. To merge the data into the report 40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace 41 * data is in ascending order. 42 * 43 * Each queue has a double linked list of auxtrace_buffers. This list contains 44 * the offset and size of a CPU's auxtrace data. During auxtrace processing 45 * the data portion is mmap()'ed. 46 * 47 * To sort the queues in chronological order, all queue access is controlled 48 * by the auxtrace_heap. This is basicly a stack, each stack element has two 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 * (nearest) time stamp is at the top. That sort order is maintained at all 52 * times! 53 * 54 * After the auxtrace infrastructure has been setup, the auxtrace queues are 55 * filled with data (offset/size pairs) and the auxtrace_heap is populated. 56 * 57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. 58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of 59 * the perf record is compared with the time stamp located on the auxtrace_heap 60 * top element. If that time stamp is lower than the time stamp from the 61 * record sample, the auxtrace queues will be processed. As auxtrace queues 62 * control many auxtrace_buffers and each buffer can be quite large, the 63 * auxtrace buffer might be processed only partially. In this case the 64 * position in the auxtrace_buffer of that queue is remembered and the time 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 * current auxtrace_heap top. 67 * 68 * 3. Auxtrace_queues might run of out data and are feeded by the 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 * 71 * Event Generation 72 * Each sampling-data entry in the auxilary trace data generates a perf sample. 73 * This sample is filled 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 * etc. This sample is processed with perf_session__deliver_synth_event() to 76 * be included into the GUI. 77 * 78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining 79 * auxiliary traces entries until the time stamp of this record is reached 80 * auxtrace_heap top. This is triggered by ordered_event->deliver(). 81 * 82 * 83 * Perf event processing. 84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. 85 * This is the function call sequence: 86 * 87 * __cmd_report() 88 * | 89 * perf_session__process_events() 90 * | 91 * __perf_session__process_events() 92 * | 93 * perf_session__process_event() 94 * | This functions splits the PERF_RECORD_XXX records. 95 * | - Those generated by perf record command (type number equal or higher 96 * | than PERF_RECORD_USER_TYPE_START) are handled by 97 * | perf_session__process_user_event(see below) 98 * | - Those generated by the kernel are handled by 99 * | perf_evlist__parse_sample_timestamp() 100 * | 101 * perf_evlist__parse_sample_timestamp() 102 * | Extract time stamp from sample data. 103 * | 104 * perf_session__queue_event() 105 * | If timestamp is positive the sample is entered into an ordered_event 106 * | list, sort order is the timestamp. The event processing is deferred until 107 * | later (see perf_session__process_user_event()). 108 * | Other timestamps (0 or -1) are handled immediately by 109 * | perf_session__deliver_event(). These are events generated at start up 110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* 111 * | records. They are needed to create a list of running processes and its 112 * | memory mappings and layout. They are needed at the beginning to enable 113 * | command perf report to create process trees and memory mappings. 114 * | 115 * perf_session__deliver_event() 116 * | Delivers a PERF_RECORD_XXX entry for handling. 117 * | 118 * auxtrace__process_event() 119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with 120 * | time stamps from the auxiliary trace buffers. This enables 121 * | synchronization between auxiliary trace data and the events on the 122 * | perf.data file. 123 * | 124 * machine__deliver_event() 125 * | Handles the PERF_RECORD_XXX event. This depends on the record type. 126 * It might update the process tree, update a process memory map or enter 127 * a sample with IP and call back chain data into GUI data pool. 128 * 129 * 130 * Deferred processing determined by perf_session__process_user_event() is 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These 132 * are generated during command perf record. 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was 135 * built up while reading the perf.data file. 136 * Each event is now processed by calling perf_session__deliver_event(). 137 * This enables time synchronization between the data in the perf.data file and 138 * the data in the auxiliary trace buffers. 139 */ 140 141 #include <endian.h> 142 #include <errno.h> 143 #include <byteswap.h> 144 #include <inttypes.h> 145 #include <linux/kernel.h> 146 #include <linux/types.h> 147 #include <linux/bitops.h> 148 #include <linux/log2.h> 149 150 #include "cpumap.h" 151 #include "color.h" 152 #include "evsel.h" 153 #include "evlist.h" 154 #include "machine.h" 155 #include "session.h" 156 #include "util.h" 157 #include "thread.h" 158 #include "debug.h" 159 #include "auxtrace.h" 160 #include "s390-cpumsf.h" 161 #include "s390-cpumsf-kernel.h" 162 163 struct s390_cpumsf { 164 struct auxtrace auxtrace; 165 struct auxtrace_queues queues; 166 struct auxtrace_heap heap; 167 struct perf_session *session; 168 struct machine *machine; 169 u32 auxtrace_type; 170 u32 pmu_type; 171 u16 machine_type; 172 bool data_queued; 173 }; 174 175 struct s390_cpumsf_queue { 176 struct s390_cpumsf *sf; 177 unsigned int queue_nr; 178 struct auxtrace_buffer *buffer; 179 int cpu; 180 }; 181 182 /* Display s390 CPU measurement facility basic-sampling data entry */ 183 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 184 struct hws_basic_entry *basic) 185 { 186 if (basic->def != 1) { 187 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 188 return false; 189 } 190 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" 191 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" 192 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", 193 pos, basic->def, basic->U, 194 basic->T ? 'T' : ' ', 195 basic->W ? 'W' : ' ', 196 basic->P ? 'P' : ' ', 197 basic->I ? 'I' : ' ', 198 basic->AS, basic->prim_asn, basic->ia, basic->CL, 199 basic->hpp, basic->gpp); 200 return true; 201 } 202 203 /* Display s390 CPU measurement facility diagnostic-sampling data entry */ 204 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 205 struct hws_diag_entry *diag) 206 { 207 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 208 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 209 return false; 210 } 211 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", 212 pos, diag->def, diag->I ? 'I' : ' '); 213 return true; 214 } 215 216 /* Return TOD timestamp contained in an trailer entry */ 217 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) 218 { 219 /* te->t set: TOD in STCKE format, bytes 8-15 220 * to->t not set: TOD in STCK format, bytes 0-7 221 */ 222 unsigned long long ts; 223 224 memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); 225 return ts; 226 } 227 228 /* Display s390 CPU measurement facility trailer entry */ 229 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 230 struct hws_trailer_entry *te) 231 { 232 if (te->bsdes != sizeof(struct hws_basic_entry)) { 233 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 234 return false; 235 } 236 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 237 " dsdes:%d Overflow:%lld Time:%#llx\n" 238 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", 239 pos, 240 te->f ? 'F' : ' ', 241 te->a ? 'A' : ' ', 242 te->t ? 'T' : ' ', 243 te->bsdes, te->dsdes, te->overflow, 244 trailer_timestamp(te), te->clock_base, te->progusage2, 245 te->progusage[0], te->progusage[1]); 246 return true; 247 } 248 249 /* Test a sample data block. It must be 4KB or a multiple thereof in size and 250 * 4KB page aligned. Each sample data page has a trailer entry at the 251 * end which contains the sample entry data sizes. 252 * 253 * Return true if the sample data block passes the checks and set the 254 * basic set entry size and diagnostic set entry size. 255 * 256 * Return false on failure. 257 * 258 * Note: Old hardware does not set the basic or diagnostic entry sizes 259 * in the trailer entry. Use the type number instead. 260 */ 261 static bool s390_cpumsf_validate(int machine_type, 262 unsigned char *buf, size_t len, 263 unsigned short *bsdes, 264 unsigned short *dsdes) 265 { 266 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; 267 struct hws_trailer_entry *te; 268 269 *dsdes = *bsdes = 0; 270 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 271 return false; 272 if (basic->def != 1) /* No basic set entry, must be first */ 273 return false; 274 /* Check for trailer entry at end of SDB */ 275 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 276 - sizeof(*te)); 277 *bsdes = te->bsdes; 278 *dsdes = te->dsdes; 279 if (!te->bsdes && !te->dsdes) { 280 /* Very old hardware, use CPUID */ 281 switch (machine_type) { 282 case 2097: 283 case 2098: 284 *dsdes = 64; 285 *bsdes = 32; 286 break; 287 case 2817: 288 case 2818: 289 *dsdes = 74; 290 *bsdes = 32; 291 break; 292 case 2827: 293 case 2828: 294 *dsdes = 85; 295 *bsdes = 32; 296 break; 297 default: 298 /* Illegal trailer entry */ 299 return false; 300 } 301 } 302 return true; 303 } 304 305 /* Return true if there is room for another entry */ 306 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) 307 { 308 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); 309 310 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) 311 return false; 312 return true; 313 } 314 315 /* Dump an auxiliary buffer. These buffers are multiple of 316 * 4KB SDB pages. 317 */ 318 static void s390_cpumsf_dump(struct s390_cpumsf *sf, 319 unsigned char *buf, size_t len) 320 { 321 const char *color = PERF_COLOR_BLUE; 322 struct hws_basic_entry *basic; 323 struct hws_diag_entry *diag; 324 unsigned short bsdes, dsdes; 325 size_t pos = 0; 326 327 color_fprintf(stdout, color, 328 ". ... s390 AUX data: size %zu bytes\n", 329 len); 330 331 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 332 &dsdes)) { 333 pr_err("Invalid AUX trace data block size:%zu" 334 " (type:%d bsdes:%hd dsdes:%hd)\n", 335 len, sf->machine_type, bsdes, dsdes); 336 return; 337 } 338 339 /* s390 kernel always returns 4KB blocks fully occupied, 340 * no partially filled SDBs. 341 */ 342 while (pos < len) { 343 /* Handle Basic entry */ 344 basic = (struct hws_basic_entry *)(buf + pos); 345 if (s390_cpumsf_basic_show(color, pos, basic)) 346 pos += bsdes; 347 else 348 return; 349 350 /* Handle Diagnostic entry */ 351 diag = (struct hws_diag_entry *)(buf + pos); 352 if (s390_cpumsf_diag_show(color, pos, diag)) 353 pos += dsdes; 354 else 355 return; 356 357 /* Check for trailer entry */ 358 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 359 /* Show trailer entry */ 360 struct hws_trailer_entry te; 361 362 pos = (pos + S390_CPUMSF_PAGESZ) 363 & ~(S390_CPUMSF_PAGESZ - 1); 364 pos -= sizeof(te); 365 memcpy(&te, buf + pos, sizeof(te)); 366 /* Set descriptor sizes in case of old hardware 367 * where these values are not set. 368 */ 369 te.bsdes = bsdes; 370 te.dsdes = dsdes; 371 if (s390_cpumsf_trailer_show(color, pos, &te)) 372 pos += sizeof(te); 373 else 374 return; 375 } 376 } 377 } 378 379 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, 380 size_t len) 381 { 382 printf(".\n"); 383 s390_cpumsf_dump(sf, buf, len); 384 } 385 386 #define S390_LPP_PID_MASK 0xffffffff 387 388 static bool s390_cpumsf_make_event(size_t pos, 389 struct hws_basic_entry *basic, 390 struct s390_cpumsf_queue *sfq) 391 { 392 struct perf_sample sample = { 393 .ip = basic->ia, 394 .pid = basic->hpp & S390_LPP_PID_MASK, 395 .tid = basic->hpp & S390_LPP_PID_MASK, 396 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, 397 .cpu = sfq->cpu, 398 .period = 1 399 }; 400 union perf_event event; 401 402 memset(&event, 0, sizeof(event)); 403 if (basic->CL == 1) /* Native LPAR mode */ 404 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 405 : PERF_RECORD_MISC_KERNEL; 406 else if (basic->CL == 2) /* Guest kernel/user space */ 407 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 408 : PERF_RECORD_MISC_GUEST_KERNEL; 409 else if (basic->gpp || basic->prim_asn != 0xffff) 410 /* Use heuristics on old hardware */ 411 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 412 : PERF_RECORD_MISC_GUEST_KERNEL; 413 else 414 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 415 : PERF_RECORD_MISC_KERNEL; 416 417 event.sample.header.type = PERF_RECORD_SAMPLE; 418 event.sample.header.misc = sample.cpumode; 419 event.sample.header.size = sizeof(struct perf_event_header); 420 421 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", 422 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, 423 sample.tid, sample.cpumode, sample.cpu); 424 if (perf_session__deliver_synth_event(sfq->sf->session, &event, 425 &sample)) { 426 pr_err("s390 Auxiliary Trace: failed to deliver event\n"); 427 return false; 428 } 429 return true; 430 } 431 432 static unsigned long long get_trailer_time(const unsigned char *buf) 433 { 434 struct hws_trailer_entry *te; 435 unsigned long long aux_time; 436 437 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 438 - sizeof(*te)); 439 440 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ 441 return 0; 442 443 /* Correct calculation to convert time stamp in trailer entry to 444 * nano seconds (taken from arch/s390 function tod_to_ns()). 445 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 446 */ 447 aux_time = trailer_timestamp(te) - te->progusage2; 448 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 449 return aux_time; 450 } 451 452 /* Process the data samples of a single queue. The first parameter is a 453 * pointer to the queue, the second parameter is the time stamp. This 454 * is the time stamp: 455 * - of the event that triggered this processing. 456 * - or the time stamp when the last proccesing of this queue stopped. 457 * In this case it stopped at a 4KB page boundary and record the 458 * position on where to continue processing on the next invocation 459 * (see buffer->use_data and buffer->use_size). 460 * 461 * When this function returns the second parameter is updated to 462 * reflect the time stamp of the last processed auxiliary data entry 463 * (taken from the trailer entry of that page). The caller uses this 464 * returned time stamp to record the last processed entry in this 465 * queue. 466 * 467 * The function returns: 468 * 0: Processing successful. The second parameter returns the 469 * time stamp from the trailer entry until which position 470 * processing took place. Subsequent calls resume from this 471 * position. 472 * <0: An error occurred during processing. The second parameter 473 * returns the maximum time stamp. 474 * >0: Done on this queue. The second parameter returns the 475 * maximum time stamp. 476 */ 477 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) 478 { 479 struct s390_cpumsf *sf = sfq->sf; 480 unsigned char *buf = sfq->buffer->use_data; 481 size_t len = sfq->buffer->use_size; 482 struct hws_basic_entry *basic; 483 unsigned short bsdes, dsdes; 484 size_t pos = 0; 485 int err = 1; 486 u64 aux_ts; 487 488 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 489 &dsdes)) { 490 *ts = ~0ULL; 491 return -1; 492 } 493 494 /* Get trailer entry time stamp and check if entries in 495 * this auxiliary page are ready for processing. If the 496 * time stamp of the first entry is too high, whole buffer 497 * can be skipped. In this case return time stamp. 498 */ 499 aux_ts = get_trailer_time(buf); 500 if (!aux_ts) { 501 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", 502 sfq->buffer->data_offset); 503 aux_ts = ~0ULL; 504 goto out; 505 } 506 if (aux_ts > *ts) { 507 *ts = aux_ts; 508 return 0; 509 } 510 511 while (pos < len) { 512 /* Handle Basic entry */ 513 basic = (struct hws_basic_entry *)(buf + pos); 514 if (s390_cpumsf_make_event(pos, basic, sfq)) 515 pos += bsdes; 516 else { 517 err = -EBADF; 518 goto out; 519 } 520 521 pos += dsdes; /* Skip diagnositic entry */ 522 523 /* Check for trailer entry */ 524 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 525 pos = (pos + S390_CPUMSF_PAGESZ) 526 & ~(S390_CPUMSF_PAGESZ - 1); 527 /* Check existence of next page */ 528 if (pos >= len) 529 break; 530 aux_ts = get_trailer_time(buf + pos); 531 if (!aux_ts) { 532 aux_ts = ~0ULL; 533 goto out; 534 } 535 if (aux_ts > *ts) { 536 *ts = aux_ts; 537 sfq->buffer->use_data += pos; 538 sfq->buffer->use_size -= pos; 539 return 0; 540 } 541 } 542 } 543 out: 544 *ts = aux_ts; 545 sfq->buffer->use_size = 0; 546 sfq->buffer->use_data = NULL; 547 return err; /* Buffer completely scanned or error */ 548 } 549 550 /* Run the s390 auxiliary trace decoder. 551 * Select the queue buffer to operate on, the caller already selected 552 * the proper queue, depending on second parameter 'ts'. 553 * This is the time stamp until which the auxiliary entries should 554 * be processed. This value is updated by called functions and 555 * returned to the caller. 556 * 557 * Resume processing in the current buffer. If there is no buffer 558 * get a new buffer from the queue and setup start position for 559 * processing. 560 * When a buffer is completely processed remove it from the queue 561 * before returning. 562 * 563 * This function returns 564 * 1: When the queue is empty. Second parameter will be set to 565 * maximum time stamp. 566 * 0: Normal processing done. 567 * <0: Error during queue buffer setup. This causes the caller 568 * to stop processing completely. 569 */ 570 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, 571 u64 *ts) 572 { 573 574 struct auxtrace_buffer *buffer; 575 struct auxtrace_queue *queue; 576 int err; 577 578 queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; 579 580 /* Get buffer and last position in buffer to resume 581 * decoding the auxiliary entries. One buffer might be large 582 * and decoding might stop in between. This depends on the time 583 * stamp of the trailer entry in each page of the auxiliary 584 * data and the time stamp of the event triggering the decoding. 585 */ 586 if (sfq->buffer == NULL) { 587 sfq->buffer = buffer = auxtrace_buffer__next(queue, 588 sfq->buffer); 589 if (!buffer) { 590 *ts = ~0ULL; 591 return 1; /* Processing done on this queue */ 592 } 593 /* Start with a new buffer on this queue */ 594 if (buffer->data) { 595 buffer->use_size = buffer->size; 596 buffer->use_data = buffer->data; 597 } 598 } else 599 buffer = sfq->buffer; 600 601 if (!buffer->data) { 602 int fd = perf_data__fd(sfq->sf->session->data); 603 604 buffer->data = auxtrace_buffer__get_data(buffer, fd); 605 if (!buffer->data) 606 return -ENOMEM; 607 buffer->use_size = buffer->size; 608 buffer->use_data = buffer->data; 609 } 610 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", 611 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, 612 buffer->size, buffer->use_size); 613 err = s390_cpumsf_samples(sfq, ts); 614 615 /* If non-zero, there is either an error (err < 0) or the buffer is 616 * completely done (err > 0). The error is unrecoverable, usually 617 * some descriptors could not be read successfully, so continue with 618 * the next buffer. 619 * In both cases the parameter 'ts' has been updated. 620 */ 621 if (err) { 622 sfq->buffer = NULL; 623 list_del(&buffer->list); 624 auxtrace_buffer__free(buffer); 625 if (err > 0) /* Buffer done, no error */ 626 err = 0; 627 } 628 return err; 629 } 630 631 static struct s390_cpumsf_queue * 632 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) 633 { 634 struct s390_cpumsf_queue *sfq; 635 636 sfq = zalloc(sizeof(struct s390_cpumsf_queue)); 637 if (sfq == NULL) 638 return NULL; 639 640 sfq->sf = sf; 641 sfq->queue_nr = queue_nr; 642 sfq->cpu = -1; 643 return sfq; 644 } 645 646 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, 647 struct auxtrace_queue *queue, 648 unsigned int queue_nr, u64 ts) 649 { 650 struct s390_cpumsf_queue *sfq = queue->priv; 651 652 if (list_empty(&queue->head)) 653 return 0; 654 655 if (sfq == NULL) { 656 sfq = s390_cpumsf_alloc_queue(sf, queue_nr); 657 if (!sfq) 658 return -ENOMEM; 659 queue->priv = sfq; 660 661 if (queue->cpu != -1) 662 sfq->cpu = queue->cpu; 663 } 664 return auxtrace_heap__add(&sf->heap, queue_nr, ts); 665 } 666 667 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) 668 { 669 unsigned int i; 670 int ret = 0; 671 672 for (i = 0; i < sf->queues.nr_queues; i++) { 673 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], 674 i, ts); 675 if (ret) 676 break; 677 } 678 return ret; 679 } 680 681 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) 682 { 683 if (!sf->queues.new_data) 684 return 0; 685 686 sf->queues.new_data = false; 687 return s390_cpumsf_setup_queues(sf, ts); 688 } 689 690 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) 691 { 692 unsigned int queue_nr; 693 u64 ts; 694 int ret; 695 696 while (1) { 697 struct auxtrace_queue *queue; 698 struct s390_cpumsf_queue *sfq; 699 700 if (!sf->heap.heap_cnt) 701 return 0; 702 703 if (sf->heap.heap_array[0].ordinal >= timestamp) 704 return 0; 705 706 queue_nr = sf->heap.heap_array[0].queue_nr; 707 queue = &sf->queues.queue_array[queue_nr]; 708 sfq = queue->priv; 709 710 auxtrace_heap__pop(&sf->heap); 711 if (sf->heap.heap_cnt) { 712 ts = sf->heap.heap_array[0].ordinal + 1; 713 if (ts > timestamp) 714 ts = timestamp; 715 } else { 716 ts = timestamp; 717 } 718 719 ret = s390_cpumsf_run_decoder(sfq, &ts); 720 if (ret < 0) { 721 auxtrace_heap__add(&sf->heap, queue_nr, ts); 722 return ret; 723 } 724 if (!ret) { 725 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); 726 if (ret < 0) 727 return ret; 728 } 729 } 730 return 0; 731 } 732 733 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 734 pid_t pid, pid_t tid, u64 ip) 735 { 736 char msg[MAX_AUXTRACE_ERROR_MSG]; 737 union perf_event event; 738 int err; 739 740 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 741 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 742 code, cpu, pid, tid, ip, msg); 743 744 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 745 if (err) 746 pr_err("s390 Auxiliary Trace: failed to deliver error event," 747 "error %d\n", err); 748 return err; 749 } 750 751 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 752 { 753 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 754 sample->pid, sample->tid, 0); 755 } 756 757 static int 758 s390_cpumsf_process_event(struct perf_session *session __maybe_unused, 759 union perf_event *event, 760 struct perf_sample *sample, 761 struct perf_tool *tool) 762 { 763 struct s390_cpumsf *sf = container_of(session->auxtrace, 764 struct s390_cpumsf, 765 auxtrace); 766 u64 timestamp = sample->time; 767 int err = 0; 768 769 if (dump_trace) 770 return 0; 771 772 if (!tool->ordered_events) { 773 pr_err("s390 Auxiliary Trace requires ordered events\n"); 774 return -EINVAL; 775 } 776 777 if (event->header.type == PERF_RECORD_AUX && 778 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 779 return s390_cpumsf_lost(sf, sample); 780 781 if (timestamp) { 782 err = s390_cpumsf_update_queues(sf, timestamp); 783 if (!err) 784 err = s390_cpumsf_process_queues(sf, timestamp); 785 } 786 return err; 787 } 788 789 struct s390_cpumsf_synth { 790 struct perf_tool cpumsf_tool; 791 struct perf_session *session; 792 }; 793 794 static int 795 s390_cpumsf_process_auxtrace_event(struct perf_session *session, 796 union perf_event *event __maybe_unused, 797 struct perf_tool *tool __maybe_unused) 798 { 799 struct s390_cpumsf *sf = container_of(session->auxtrace, 800 struct s390_cpumsf, 801 auxtrace); 802 803 int fd = perf_data__fd(session->data); 804 struct auxtrace_buffer *buffer; 805 off_t data_offset; 806 int err; 807 808 if (sf->data_queued) 809 return 0; 810 811 if (perf_data__is_pipe(session->data)) { 812 data_offset = 0; 813 } else { 814 data_offset = lseek(fd, 0, SEEK_CUR); 815 if (data_offset == -1) 816 return -errno; 817 } 818 819 err = auxtrace_queues__add_event(&sf->queues, session, event, 820 data_offset, &buffer); 821 if (err) 822 return err; 823 824 /* Dump here after copying piped trace out of the pipe */ 825 if (dump_trace) { 826 if (auxtrace_buffer__get_data(buffer, fd)) { 827 s390_cpumsf_dump_event(sf, buffer->data, 828 buffer->size); 829 auxtrace_buffer__put_data(buffer); 830 } 831 } 832 return 0; 833 } 834 835 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) 836 { 837 } 838 839 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, 840 struct perf_tool *tool __maybe_unused) 841 { 842 return 0; 843 } 844 845 static void s390_cpumsf_free_queues(struct perf_session *session) 846 { 847 struct s390_cpumsf *sf = container_of(session->auxtrace, 848 struct s390_cpumsf, 849 auxtrace); 850 struct auxtrace_queues *queues = &sf->queues; 851 unsigned int i; 852 853 for (i = 0; i < queues->nr_queues; i++) 854 zfree(&queues->queue_array[i].priv); 855 auxtrace_queues__free(queues); 856 } 857 858 static void s390_cpumsf_free(struct perf_session *session) 859 { 860 struct s390_cpumsf *sf = container_of(session->auxtrace, 861 struct s390_cpumsf, 862 auxtrace); 863 864 auxtrace_heap__free(&sf->heap); 865 s390_cpumsf_free_queues(session); 866 session->auxtrace = NULL; 867 free(sf); 868 } 869 870 static int s390_cpumsf_get_type(const char *cpuid) 871 { 872 int ret, family = 0; 873 874 ret = sscanf(cpuid, "%*[^,],%u", &family); 875 return (ret == 1) ? family : 0; 876 } 877 878 /* Check itrace options set on perf report command. 879 * Return true, if none are set or all options specified can be 880 * handled on s390. 881 * Return false otherwise. 882 */ 883 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) 884 { 885 if (!itops || !itops->set) 886 return true; 887 pr_err("No --itrace options supported\n"); 888 return false; 889 } 890 891 int s390_cpumsf_process_auxtrace_info(union perf_event *event, 892 struct perf_session *session) 893 { 894 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 895 struct s390_cpumsf *sf; 896 int err; 897 898 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) 899 return -EINVAL; 900 901 sf = zalloc(sizeof(struct s390_cpumsf)); 902 if (sf == NULL) 903 return -ENOMEM; 904 905 if (!check_auxtrace_itrace(session->itrace_synth_opts)) { 906 err = -EINVAL; 907 goto err_free; 908 } 909 910 err = auxtrace_queues__init(&sf->queues); 911 if (err) 912 goto err_free; 913 914 sf->session = session; 915 sf->machine = &session->machines.host; /* No kvm support */ 916 sf->auxtrace_type = auxtrace_info->type; 917 sf->pmu_type = PERF_TYPE_RAW; 918 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); 919 920 sf->auxtrace.process_event = s390_cpumsf_process_event; 921 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; 922 sf->auxtrace.flush_events = s390_cpumsf_flush; 923 sf->auxtrace.free_events = s390_cpumsf_free_events; 924 sf->auxtrace.free = s390_cpumsf_free; 925 session->auxtrace = &sf->auxtrace; 926 927 if (dump_trace) 928 return 0; 929 930 err = auxtrace_queues__process_index(&sf->queues, session); 931 if (err) 932 goto err_free_queues; 933 934 if (sf->queues.populated) 935 sf->data_queued = true; 936 937 return 0; 938 939 err_free_queues: 940 auxtrace_queues__free(&sf->queues); 941 session->auxtrace = NULL; 942 err_free: 943 free(sf); 944 return err; 945 } 946