1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include <stdlib.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <time.h> 15 #ifndef _WIN32 16 #include <signal.h> 17 #include <pthread.h> 18 #endif 19 #include "qemu/timer.h" 20 #include "trace.h" 21 #include "trace/control.h" 22 23 /** Trace file header event ID */ 24 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 25 26 /** Trace file magic number */ 27 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 28 29 /** Trace file version number, bump if format changes */ 30 #define HEADER_VERSION 2 31 32 /** Records were dropped event ID */ 33 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 34 35 /** Trace record is valid */ 36 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 37 38 /* 39 * Trace records are written out by a dedicated thread. The thread waits for 40 * records to become available, writes them out, and then waits again. 41 */ 42 static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; 43 static GCond *trace_available_cond; 44 static GCond *trace_empty_cond; 45 static bool trace_available; 46 static bool trace_writeout_enabled; 47 48 enum { 49 TRACE_BUF_LEN = 4096 * 64, 50 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 51 }; 52 53 uint8_t trace_buf[TRACE_BUF_LEN]; 54 static unsigned int trace_idx; 55 static unsigned int writeout_idx; 56 static int dropped_events; 57 static FILE *trace_fp; 58 static char *trace_file_name; 59 60 /* * Trace buffer entry */ 61 typedef struct { 62 uint64_t event; /* TraceEventID */ 63 uint64_t timestamp_ns; 64 uint32_t length; /* in bytes */ 65 uint32_t reserved; /* unused */ 66 uint64_t arguments[]; 67 } TraceRecord; 68 69 typedef struct { 70 uint64_t header_event_id; /* HEADER_EVENT_ID */ 71 uint64_t header_magic; /* HEADER_MAGIC */ 72 uint64_t header_version; /* HEADER_VERSION */ 73 } TraceLogHeader; 74 75 76 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 77 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 78 79 static void clear_buffer_range(unsigned int idx, size_t len) 80 { 81 uint32_t num = 0; 82 while (num < len) { 83 if (idx >= TRACE_BUF_LEN) { 84 idx = idx % TRACE_BUF_LEN; 85 } 86 trace_buf[idx++] = 0; 87 num++; 88 } 89 } 90 /** 91 * Read a trace record from the trace buffer 92 * 93 * @idx Trace buffer index 94 * @record Trace record to fill 95 * 96 * Returns false if the record is not valid. 97 */ 98 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 99 { 100 uint64_t event_flag = 0; 101 TraceRecord record; 102 /* read the event flag to see if its a valid record */ 103 read_from_buffer(idx, &record, sizeof(event_flag)); 104 105 if (!(record.event & TRACE_RECORD_VALID)) { 106 return false; 107 } 108 109 smp_rmb(); /* read memory barrier before accessing record */ 110 /* read the record header to know record length */ 111 read_from_buffer(idx, &record, sizeof(TraceRecord)); 112 *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ 113 /* make a copy of record to avoid being overwritten */ 114 read_from_buffer(idx, *recordptr, record.length); 115 smp_rmb(); /* memory barrier before clearing valid flag */ 116 (*recordptr)->event &= ~TRACE_RECORD_VALID; 117 /* clear the trace buffer range for consumed record otherwise any byte 118 * with its MSB set may be considered as a valid event id when the writer 119 * thread crosses this range of buffer again. 120 */ 121 clear_buffer_range(idx, record.length); 122 return true; 123 } 124 125 /** 126 * Kick writeout thread 127 * 128 * @wait Whether to wait for writeout thread to complete 129 */ 130 static void flush_trace_file(bool wait) 131 { 132 g_static_mutex_lock(&trace_lock); 133 trace_available = true; 134 g_cond_signal(trace_available_cond); 135 136 if (wait) { 137 g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); 138 } 139 140 g_static_mutex_unlock(&trace_lock); 141 } 142 143 static void wait_for_trace_records_available(void) 144 { 145 g_static_mutex_lock(&trace_lock); 146 while (!(trace_available && trace_writeout_enabled)) { 147 g_cond_signal(trace_empty_cond); 148 g_cond_wait(trace_available_cond, 149 g_static_mutex_get_mutex(&trace_lock)); 150 } 151 trace_available = false; 152 g_static_mutex_unlock(&trace_lock); 153 } 154 155 static gpointer writeout_thread(gpointer opaque) 156 { 157 TraceRecord *recordptr; 158 union { 159 TraceRecord rec; 160 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 161 } dropped; 162 unsigned int idx = 0; 163 int dropped_count; 164 size_t unused __attribute__ ((unused)); 165 166 for (;;) { 167 wait_for_trace_records_available(); 168 169 if (g_atomic_int_get(&dropped_events)) { 170 dropped.rec.event = DROPPED_EVENT_ID, 171 dropped.rec.timestamp_ns = get_clock(); 172 dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t), 173 dropped.rec.reserved = 0; 174 do { 175 dropped_count = g_atomic_int_get(&dropped_events); 176 } while (!g_atomic_int_compare_and_exchange(&dropped_events, 177 dropped_count, 0)); 178 dropped.rec.arguments[0] = dropped_count; 179 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 180 } 181 182 while (get_trace_record(idx, &recordptr)) { 183 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 184 writeout_idx += recordptr->length; 185 free(recordptr); /* dont use g_free, can deadlock when traced */ 186 idx = writeout_idx % TRACE_BUF_LEN; 187 } 188 189 fflush(trace_fp); 190 } 191 return NULL; 192 } 193 194 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 195 { 196 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 197 } 198 199 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 200 { 201 /* Write string length first */ 202 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 203 /* Write actual string now */ 204 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 205 } 206 207 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 208 { 209 unsigned int idx, rec_off, old_idx, new_idx; 210 uint32_t rec_len = sizeof(TraceRecord) + datasize; 211 uint64_t timestamp_ns = get_clock(); 212 213 do { 214 old_idx = g_atomic_int_get(&trace_idx); 215 smp_rmb(); 216 new_idx = old_idx + rec_len; 217 218 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 219 /* Trace Buffer Full, Event dropped ! */ 220 g_atomic_int_inc(&dropped_events); 221 return -ENOSPC; 222 } 223 } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx)); 224 225 idx = old_idx % TRACE_BUF_LEN; 226 227 rec_off = idx; 228 rec_off = write_to_buffer(rec_off, &event, sizeof(event)); 229 rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); 230 rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); 231 232 rec->tbuf_idx = idx; 233 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 234 return 0; 235 } 236 237 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 238 { 239 uint8_t *data_ptr = dataptr; 240 uint32_t x = 0; 241 while (x < size) { 242 if (idx >= TRACE_BUF_LEN) { 243 idx = idx % TRACE_BUF_LEN; 244 } 245 data_ptr[x++] = trace_buf[idx++]; 246 } 247 } 248 249 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 250 { 251 uint8_t *data_ptr = dataptr; 252 uint32_t x = 0; 253 while (x < size) { 254 if (idx >= TRACE_BUF_LEN) { 255 idx = idx % TRACE_BUF_LEN; 256 } 257 trace_buf[idx++] = data_ptr[x++]; 258 } 259 return idx; /* most callers wants to know where to write next */ 260 } 261 262 void trace_record_finish(TraceBufferRecord *rec) 263 { 264 TraceRecord record; 265 read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 266 smp_wmb(); /* write barrier before marking as valid */ 267 record.event |= TRACE_RECORD_VALID; 268 write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 269 270 if ((g_atomic_int_get(&trace_idx) - writeout_idx) 271 > TRACE_BUF_FLUSH_THRESHOLD) { 272 flush_trace_file(false); 273 } 274 } 275 276 void st_set_trace_file_enabled(bool enable) 277 { 278 if (enable == !!trace_fp) { 279 return; /* no change */ 280 } 281 282 /* Halt trace writeout */ 283 flush_trace_file(true); 284 trace_writeout_enabled = false; 285 flush_trace_file(true); 286 287 if (enable) { 288 static const TraceLogHeader header = { 289 .header_event_id = HEADER_EVENT_ID, 290 .header_magic = HEADER_MAGIC, 291 /* Older log readers will check for version at next location */ 292 .header_version = HEADER_VERSION, 293 }; 294 295 trace_fp = fopen(trace_file_name, "wb"); 296 if (!trace_fp) { 297 return; 298 } 299 300 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 301 fclose(trace_fp); 302 trace_fp = NULL; 303 return; 304 } 305 306 /* Resume trace writeout */ 307 trace_writeout_enabled = true; 308 flush_trace_file(false); 309 } else { 310 fclose(trace_fp); 311 trace_fp = NULL; 312 } 313 } 314 315 /** 316 * Set the name of a trace file 317 * 318 * @file The trace file name or NULL for the default name-<pid> set at 319 * config time 320 */ 321 bool st_set_trace_file(const char *file) 322 { 323 st_set_trace_file_enabled(false); 324 325 g_free(trace_file_name); 326 327 if (!file) { 328 trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); 329 } else { 330 trace_file_name = g_strdup_printf("%s", file); 331 } 332 333 st_set_trace_file_enabled(true); 334 return true; 335 } 336 337 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 338 { 339 stream_printf(stream, "Trace file \"%s\" %s.\n", 340 trace_file_name, trace_fp ? "on" : "off"); 341 } 342 343 void st_flush_trace_buffer(void) 344 { 345 flush_trace_file(true); 346 } 347 348 void trace_print_events(FILE *stream, fprintf_function stream_printf) 349 { 350 unsigned int i; 351 352 for (i = 0; i < NR_TRACE_EVENTS; i++) { 353 stream_printf(stream, "%s [Event ID %u] : state %u\n", 354 trace_list[i].tp_name, i, trace_list[i].state); 355 } 356 } 357 358 bool trace_event_set_state(const char *name, bool state) 359 { 360 unsigned int i; 361 unsigned int len; 362 bool wildcard = false; 363 bool matched = false; 364 365 len = strlen(name); 366 if (len > 0 && name[len - 1] == '*') { 367 wildcard = true; 368 len -= 1; 369 } 370 for (i = 0; i < NR_TRACE_EVENTS; i++) { 371 if (wildcard) { 372 if (!strncmp(trace_list[i].tp_name, name, len)) { 373 trace_list[i].state = state; 374 matched = true; 375 } 376 continue; 377 } 378 if (!strcmp(trace_list[i].tp_name, name)) { 379 trace_list[i].state = state; 380 return true; 381 } 382 } 383 return matched; 384 } 385 386 /* Helper function to create a thread with signals blocked. Use glib's 387 * portable threads since QEMU abstractions cannot be used due to reentrancy in 388 * the tracer. Also note the signal masking on POSIX hosts so that the thread 389 * does not steal signals when the rest of the program wants them blocked. 390 */ 391 static GThread *trace_thread_create(GThreadFunc fn) 392 { 393 GThread *thread; 394 #ifndef _WIN32 395 sigset_t set, oldset; 396 397 sigfillset(&set); 398 pthread_sigmask(SIG_SETMASK, &set, &oldset); 399 #endif 400 thread = g_thread_create(fn, NULL, FALSE, NULL); 401 #ifndef _WIN32 402 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 403 #endif 404 405 return thread; 406 } 407 408 bool trace_backend_init(const char *events, const char *file) 409 { 410 GThread *thread; 411 412 if (!g_thread_supported()) { 413 #if !GLIB_CHECK_VERSION(2, 31, 0) 414 g_thread_init(NULL); 415 #else 416 fprintf(stderr, "glib threading failed to initialize.\n"); 417 exit(1); 418 #endif 419 } 420 421 trace_available_cond = g_cond_new(); 422 trace_empty_cond = g_cond_new(); 423 424 thread = trace_thread_create(writeout_thread); 425 if (!thread) { 426 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 427 return false; 428 } 429 430 atexit(st_flush_trace_buffer); 431 trace_backend_init_events(events); 432 st_set_trace_file(file); 433 return true; 434 } 435