1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include <stdlib.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <time.h> 15 #ifndef _WIN32 16 #include <signal.h> 17 #include <pthread.h> 18 #endif 19 #include "qemu-timer.h" 20 #include "trace.h" 21 #include "trace/control.h" 22 23 /** Trace file header event ID */ 24 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 25 26 /** Trace file magic number */ 27 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 28 29 /** Trace file version number, bump if format changes */ 30 #define HEADER_VERSION 2 31 32 /** Records were dropped event ID */ 33 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 34 35 /** Trace record is valid */ 36 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 37 38 /* 39 * Trace records are written out by a dedicated thread. The thread waits for 40 * records to become available, writes them out, and then waits again. 41 */ 42 static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; 43 static GCond *trace_available_cond; 44 static GCond *trace_empty_cond; 45 static bool trace_available; 46 static bool trace_writeout_enabled; 47 48 enum { 49 TRACE_BUF_LEN = 4096 * 64, 50 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 51 }; 52 53 uint8_t trace_buf[TRACE_BUF_LEN]; 54 static unsigned int trace_idx; 55 static unsigned int writeout_idx; 56 static uint64_t dropped_events; 57 static FILE *trace_fp; 58 static char *trace_file_name = NULL; 59 60 /* * Trace buffer entry */ 61 typedef struct { 62 uint64_t event; /* TraceEventID */ 63 uint64_t timestamp_ns; 64 uint32_t length; /* in bytes */ 65 uint32_t reserved; /* unused */ 66 uint8_t arguments[]; 67 } TraceRecord; 68 69 typedef struct { 70 uint64_t header_event_id; /* HEADER_EVENT_ID */ 71 uint64_t header_magic; /* HEADER_MAGIC */ 72 uint64_t header_version; /* HEADER_VERSION */ 73 } TraceRecordHeader; 74 75 76 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 77 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 78 79 static void clear_buffer_range(unsigned int idx, size_t len) 80 { 81 uint32_t num = 0; 82 while (num < len) { 83 if (idx >= TRACE_BUF_LEN) { 84 idx = idx % TRACE_BUF_LEN; 85 } 86 trace_buf[idx++] = 0; 87 num++; 88 } 89 } 90 /** 91 * Read a trace record from the trace buffer 92 * 93 * @idx Trace buffer index 94 * @record Trace record to fill 95 * 96 * Returns false if the record is not valid. 97 */ 98 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 99 { 100 uint64_t event_flag = 0; 101 TraceRecord record; 102 /* read the event flag to see if its a valid record */ 103 read_from_buffer(idx, &record, sizeof(event_flag)); 104 105 if (!(record.event & TRACE_RECORD_VALID)) { 106 return false; 107 } 108 109 smp_rmb(); /* read memory barrier before accessing record */ 110 /* read the record header to know record length */ 111 read_from_buffer(idx, &record, sizeof(TraceRecord)); 112 *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ 113 /* make a copy of record to avoid being overwritten */ 114 read_from_buffer(idx, *recordptr, record.length); 115 smp_rmb(); /* memory barrier before clearing valid flag */ 116 (*recordptr)->event &= ~TRACE_RECORD_VALID; 117 /* clear the trace buffer range for consumed record otherwise any byte 118 * with its MSB set may be considered as a valid event id when the writer 119 * thread crosses this range of buffer again. 120 */ 121 clear_buffer_range(idx, record.length); 122 return true; 123 } 124 125 /** 126 * Kick writeout thread 127 * 128 * @wait Whether to wait for writeout thread to complete 129 */ 130 static void flush_trace_file(bool wait) 131 { 132 g_static_mutex_lock(&trace_lock); 133 trace_available = true; 134 g_cond_signal(trace_available_cond); 135 136 if (wait) { 137 g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); 138 } 139 140 g_static_mutex_unlock(&trace_lock); 141 } 142 143 static void wait_for_trace_records_available(void) 144 { 145 g_static_mutex_lock(&trace_lock); 146 while (!(trace_available && trace_writeout_enabled)) { 147 g_cond_signal(trace_empty_cond); 148 g_cond_wait(trace_available_cond, 149 g_static_mutex_get_mutex(&trace_lock)); 150 } 151 trace_available = false; 152 g_static_mutex_unlock(&trace_lock); 153 } 154 155 static gpointer writeout_thread(gpointer opaque) 156 { 157 TraceRecord *recordptr; 158 union { 159 TraceRecord rec; 160 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 161 } dropped; 162 unsigned int idx = 0; 163 uint64_t dropped_count; 164 size_t unused __attribute__ ((unused)); 165 166 for (;;) { 167 wait_for_trace_records_available(); 168 169 if (dropped_events) { 170 dropped.rec.event = DROPPED_EVENT_ID, 171 dropped.rec.timestamp_ns = get_clock(); 172 dropped.rec.length = sizeof(TraceRecord) + sizeof(dropped_events), 173 dropped.rec.reserved = 0; 174 while (1) { 175 dropped_count = dropped_events; 176 if (g_atomic_int_compare_and_exchange((gint *)&dropped_events, 177 dropped_count, 0)) { 178 break; 179 } 180 } 181 memcpy(dropped.rec.arguments, &dropped_count, sizeof(uint64_t)); 182 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 183 } 184 185 while (get_trace_record(idx, &recordptr)) { 186 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 187 writeout_idx += recordptr->length; 188 free(recordptr); /* dont use g_free, can deadlock when traced */ 189 idx = writeout_idx % TRACE_BUF_LEN; 190 } 191 192 fflush(trace_fp); 193 } 194 return NULL; 195 } 196 197 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 198 { 199 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 200 } 201 202 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 203 { 204 /* Write string length first */ 205 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 206 /* Write actual string now */ 207 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 208 } 209 210 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 211 { 212 unsigned int idx, rec_off, old_idx, new_idx; 213 uint32_t rec_len = sizeof(TraceRecord) + datasize; 214 uint64_t timestamp_ns = get_clock(); 215 216 while (1) { 217 old_idx = trace_idx; 218 smp_rmb(); 219 new_idx = old_idx + rec_len; 220 221 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 222 /* Trace Buffer Full, Event dropped ! */ 223 g_atomic_int_inc((gint *)&dropped_events); 224 return -ENOSPC; 225 } 226 227 if (g_atomic_int_compare_and_exchange((gint *)&trace_idx, 228 old_idx, new_idx)) { 229 break; 230 } 231 } 232 233 idx = old_idx % TRACE_BUF_LEN; 234 /* To check later if threshold crossed */ 235 rec->next_tbuf_idx = new_idx % TRACE_BUF_LEN; 236 237 rec_off = idx; 238 rec_off = write_to_buffer(rec_off, (uint8_t*)&event, sizeof(event)); 239 rec_off = write_to_buffer(rec_off, (uint8_t*)×tamp_ns, sizeof(timestamp_ns)); 240 rec_off = write_to_buffer(rec_off, (uint8_t*)&rec_len, sizeof(rec_len)); 241 242 rec->tbuf_idx = idx; 243 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 244 return 0; 245 } 246 247 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 248 { 249 uint8_t *data_ptr = dataptr; 250 uint32_t x = 0; 251 while (x < size) { 252 if (idx >= TRACE_BUF_LEN) { 253 idx = idx % TRACE_BUF_LEN; 254 } 255 data_ptr[x++] = trace_buf[idx++]; 256 } 257 } 258 259 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 260 { 261 uint8_t *data_ptr = dataptr; 262 uint32_t x = 0; 263 while (x < size) { 264 if (idx >= TRACE_BUF_LEN) { 265 idx = idx % TRACE_BUF_LEN; 266 } 267 trace_buf[idx++] = data_ptr[x++]; 268 } 269 return idx; /* most callers wants to know where to write next */ 270 } 271 272 void trace_record_finish(TraceBufferRecord *rec) 273 { 274 uint8_t temp_rec[sizeof(TraceRecord)]; 275 TraceRecord *record = (TraceRecord *) temp_rec; 276 read_from_buffer(rec->tbuf_idx, temp_rec, sizeof(TraceRecord)); 277 smp_wmb(); /* write barrier before marking as valid */ 278 record->event |= TRACE_RECORD_VALID; 279 write_to_buffer(rec->tbuf_idx, temp_rec, sizeof(TraceRecord)); 280 281 if ((trace_idx - writeout_idx) > TRACE_BUF_FLUSH_THRESHOLD) { 282 flush_trace_file(false); 283 } 284 } 285 286 void st_set_trace_file_enabled(bool enable) 287 { 288 if (enable == !!trace_fp) { 289 return; /* no change */ 290 } 291 292 /* Halt trace writeout */ 293 flush_trace_file(true); 294 trace_writeout_enabled = false; 295 flush_trace_file(true); 296 297 if (enable) { 298 static const TraceRecordHeader header = { 299 .header_event_id = HEADER_EVENT_ID, 300 .header_magic = HEADER_MAGIC, 301 /* Older log readers will check for version at next location */ 302 .header_version = HEADER_VERSION, 303 }; 304 305 trace_fp = fopen(trace_file_name, "wb"); 306 if (!trace_fp) { 307 return; 308 } 309 310 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 311 fclose(trace_fp); 312 trace_fp = NULL; 313 return; 314 } 315 316 /* Resume trace writeout */ 317 trace_writeout_enabled = true; 318 flush_trace_file(false); 319 } else { 320 fclose(trace_fp); 321 trace_fp = NULL; 322 } 323 } 324 325 /** 326 * Set the name of a trace file 327 * 328 * @file The trace file name or NULL for the default name-<pid> set at 329 * config time 330 */ 331 bool st_set_trace_file(const char *file) 332 { 333 st_set_trace_file_enabled(false); 334 335 free(trace_file_name); 336 337 if (!file) { 338 if (asprintf(&trace_file_name, CONFIG_TRACE_FILE, getpid()) < 0) { 339 trace_file_name = NULL; 340 return false; 341 } 342 } else { 343 if (asprintf(&trace_file_name, "%s", file) < 0) { 344 trace_file_name = NULL; 345 return false; 346 } 347 } 348 349 st_set_trace_file_enabled(true); 350 return true; 351 } 352 353 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 354 { 355 stream_printf(stream, "Trace file \"%s\" %s.\n", 356 trace_file_name, trace_fp ? "on" : "off"); 357 } 358 359 void st_flush_trace_buffer(void) 360 { 361 flush_trace_file(true); 362 } 363 364 void trace_print_events(FILE *stream, fprintf_function stream_printf) 365 { 366 unsigned int i; 367 368 for (i = 0; i < NR_TRACE_EVENTS; i++) { 369 stream_printf(stream, "%s [Event ID %u] : state %u\n", 370 trace_list[i].tp_name, i, trace_list[i].state); 371 } 372 } 373 374 bool trace_event_set_state(const char *name, bool state) 375 { 376 unsigned int i; 377 unsigned int len; 378 bool wildcard = false; 379 bool matched = false; 380 381 len = strlen(name); 382 if (len > 0 && name[len - 1] == '*') { 383 wildcard = true; 384 len -= 1; 385 } 386 for (i = 0; i < NR_TRACE_EVENTS; i++) { 387 if (wildcard) { 388 if (!strncmp(trace_list[i].tp_name, name, len)) { 389 trace_list[i].state = state; 390 matched = true; 391 } 392 continue; 393 } 394 if (!strcmp(trace_list[i].tp_name, name)) { 395 trace_list[i].state = state; 396 return true; 397 } 398 } 399 return matched; 400 } 401 402 /* Helper function to create a thread with signals blocked. Use glib's 403 * portable threads since QEMU abstractions cannot be used due to reentrancy in 404 * the tracer. Also note the signal masking on POSIX hosts so that the thread 405 * does not steal signals when the rest of the program wants them blocked. 406 */ 407 static GThread *trace_thread_create(GThreadFunc fn) 408 { 409 GThread *thread; 410 #ifndef _WIN32 411 sigset_t set, oldset; 412 413 sigfillset(&set); 414 pthread_sigmask(SIG_SETMASK, &set, &oldset); 415 #endif 416 thread = g_thread_create(fn, NULL, FALSE, NULL); 417 #ifndef _WIN32 418 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 419 #endif 420 421 return thread; 422 } 423 424 bool trace_backend_init(const char *events, const char *file) 425 { 426 GThread *thread; 427 428 if (!g_thread_supported()) { 429 #if !GLIB_CHECK_VERSION(2, 31, 0) 430 g_thread_init(NULL); 431 #else 432 fprintf(stderr, "glib threading failed to initialize.\n"); 433 exit(1); 434 #endif 435 } 436 437 trace_available_cond = g_cond_new(); 438 trace_empty_cond = g_cond_new(); 439 440 thread = trace_thread_create(writeout_thread); 441 if (!thread) { 442 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 443 return false; 444 } 445 446 atexit(st_flush_trace_buffer); 447 trace_backend_init_events(events); 448 st_set_trace_file(file); 449 return true; 450 } 451