1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include <stdlib.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <time.h> 15 #ifndef _WIN32 16 #include <signal.h> 17 #include <pthread.h> 18 #endif 19 #include "qemu/timer.h" 20 #include "trace.h" 21 #include "trace/control.h" 22 23 /** Trace file header event ID */ 24 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 25 26 /** Trace file magic number */ 27 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 28 29 /** Trace file version number, bump if format changes */ 30 #define HEADER_VERSION 2 31 32 /** Records were dropped event ID */ 33 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 34 35 /** Trace record is valid */ 36 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 37 38 /* 39 * Trace records are written out by a dedicated thread. The thread waits for 40 * records to become available, writes them out, and then waits again. 41 */ 42 static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; 43 44 /* g_cond_new() was deprecated in glib 2.31 but we still need to support it */ 45 #if GLIB_CHECK_VERSION(2, 31, 0) 46 static GCond the_trace_available_cond; 47 static GCond the_trace_empty_cond; 48 static GCond *trace_available_cond = &the_trace_available_cond; 49 static GCond *trace_empty_cond = &the_trace_empty_cond; 50 #else 51 static GCond *trace_available_cond; 52 static GCond *trace_empty_cond; 53 #endif 54 55 static bool trace_available; 56 static bool trace_writeout_enabled; 57 58 enum { 59 TRACE_BUF_LEN = 4096 * 64, 60 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 61 }; 62 63 uint8_t trace_buf[TRACE_BUF_LEN]; 64 static volatile gint trace_idx; 65 static unsigned int writeout_idx; 66 static volatile gint dropped_events; 67 static FILE *trace_fp; 68 static char *trace_file_name; 69 70 /* * Trace buffer entry */ 71 typedef struct { 72 uint64_t event; /* TraceEventID */ 73 uint64_t timestamp_ns; 74 uint32_t length; /* in bytes */ 75 uint32_t reserved; /* unused */ 76 uint64_t arguments[]; 77 } TraceRecord; 78 79 typedef struct { 80 uint64_t header_event_id; /* HEADER_EVENT_ID */ 81 uint64_t header_magic; /* HEADER_MAGIC */ 82 uint64_t header_version; /* HEADER_VERSION */ 83 } TraceLogHeader; 84 85 86 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 87 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 88 89 static void clear_buffer_range(unsigned int idx, size_t len) 90 { 91 uint32_t num = 0; 92 while (num < len) { 93 if (idx >= TRACE_BUF_LEN) { 94 idx = idx % TRACE_BUF_LEN; 95 } 96 trace_buf[idx++] = 0; 97 num++; 98 } 99 } 100 /** 101 * Read a trace record from the trace buffer 102 * 103 * @idx Trace buffer index 104 * @record Trace record to fill 105 * 106 * Returns false if the record is not valid. 107 */ 108 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 109 { 110 uint64_t event_flag = 0; 111 TraceRecord record; 112 /* read the event flag to see if its a valid record */ 113 read_from_buffer(idx, &record, sizeof(event_flag)); 114 115 if (!(record.event & TRACE_RECORD_VALID)) { 116 return false; 117 } 118 119 smp_rmb(); /* read memory barrier before accessing record */ 120 /* read the record header to know record length */ 121 read_from_buffer(idx, &record, sizeof(TraceRecord)); 122 *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ 123 /* make a copy of record to avoid being overwritten */ 124 read_from_buffer(idx, *recordptr, record.length); 125 smp_rmb(); /* memory barrier before clearing valid flag */ 126 (*recordptr)->event &= ~TRACE_RECORD_VALID; 127 /* clear the trace buffer range for consumed record otherwise any byte 128 * with its MSB set may be considered as a valid event id when the writer 129 * thread crosses this range of buffer again. 130 */ 131 clear_buffer_range(idx, record.length); 132 return true; 133 } 134 135 /** 136 * Kick writeout thread 137 * 138 * @wait Whether to wait for writeout thread to complete 139 */ 140 static void flush_trace_file(bool wait) 141 { 142 g_static_mutex_lock(&trace_lock); 143 trace_available = true; 144 g_cond_signal(trace_available_cond); 145 146 if (wait) { 147 g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); 148 } 149 150 g_static_mutex_unlock(&trace_lock); 151 } 152 153 static void wait_for_trace_records_available(void) 154 { 155 g_static_mutex_lock(&trace_lock); 156 while (!(trace_available && trace_writeout_enabled)) { 157 g_cond_signal(trace_empty_cond); 158 g_cond_wait(trace_available_cond, 159 g_static_mutex_get_mutex(&trace_lock)); 160 } 161 trace_available = false; 162 g_static_mutex_unlock(&trace_lock); 163 } 164 165 static gpointer writeout_thread(gpointer opaque) 166 { 167 TraceRecord *recordptr; 168 union { 169 TraceRecord rec; 170 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 171 } dropped; 172 unsigned int idx = 0; 173 int dropped_count; 174 size_t unused __attribute__ ((unused)); 175 176 for (;;) { 177 wait_for_trace_records_available(); 178 179 if (g_atomic_int_get(&dropped_events)) { 180 dropped.rec.event = DROPPED_EVENT_ID, 181 dropped.rec.timestamp_ns = get_clock(); 182 dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t), 183 dropped.rec.reserved = 0; 184 do { 185 dropped_count = g_atomic_int_get(&dropped_events); 186 } while (!g_atomic_int_compare_and_exchange(&dropped_events, 187 dropped_count, 0)); 188 dropped.rec.arguments[0] = dropped_count; 189 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 190 } 191 192 while (get_trace_record(idx, &recordptr)) { 193 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 194 writeout_idx += recordptr->length; 195 free(recordptr); /* dont use g_free, can deadlock when traced */ 196 idx = writeout_idx % TRACE_BUF_LEN; 197 } 198 199 fflush(trace_fp); 200 } 201 return NULL; 202 } 203 204 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 205 { 206 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 207 } 208 209 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 210 { 211 /* Write string length first */ 212 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 213 /* Write actual string now */ 214 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 215 } 216 217 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 218 { 219 unsigned int idx, rec_off, old_idx, new_idx; 220 uint32_t rec_len = sizeof(TraceRecord) + datasize; 221 uint64_t event_u64 = event; 222 uint64_t timestamp_ns = get_clock(); 223 224 do { 225 old_idx = g_atomic_int_get(&trace_idx); 226 smp_rmb(); 227 new_idx = old_idx + rec_len; 228 229 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 230 /* Trace Buffer Full, Event dropped ! */ 231 g_atomic_int_inc(&dropped_events); 232 return -ENOSPC; 233 } 234 } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx)); 235 236 idx = old_idx % TRACE_BUF_LEN; 237 238 rec_off = idx; 239 rec_off = write_to_buffer(rec_off, &event_u64, sizeof(event_u64)); 240 rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); 241 rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); 242 243 rec->tbuf_idx = idx; 244 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 245 return 0; 246 } 247 248 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 249 { 250 uint8_t *data_ptr = dataptr; 251 uint32_t x = 0; 252 while (x < size) { 253 if (idx >= TRACE_BUF_LEN) { 254 idx = idx % TRACE_BUF_LEN; 255 } 256 data_ptr[x++] = trace_buf[idx++]; 257 } 258 } 259 260 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 261 { 262 uint8_t *data_ptr = dataptr; 263 uint32_t x = 0; 264 while (x < size) { 265 if (idx >= TRACE_BUF_LEN) { 266 idx = idx % TRACE_BUF_LEN; 267 } 268 trace_buf[idx++] = data_ptr[x++]; 269 } 270 return idx; /* most callers wants to know where to write next */ 271 } 272 273 void trace_record_finish(TraceBufferRecord *rec) 274 { 275 TraceRecord record; 276 read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 277 smp_wmb(); /* write barrier before marking as valid */ 278 record.event |= TRACE_RECORD_VALID; 279 write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 280 281 if (((unsigned int)g_atomic_int_get(&trace_idx) - writeout_idx) 282 > TRACE_BUF_FLUSH_THRESHOLD) { 283 flush_trace_file(false); 284 } 285 } 286 287 void st_set_trace_file_enabled(bool enable) 288 { 289 if (enable == !!trace_fp) { 290 return; /* no change */ 291 } 292 293 /* Halt trace writeout */ 294 flush_trace_file(true); 295 trace_writeout_enabled = false; 296 flush_trace_file(true); 297 298 if (enable) { 299 static const TraceLogHeader header = { 300 .header_event_id = HEADER_EVENT_ID, 301 .header_magic = HEADER_MAGIC, 302 /* Older log readers will check for version at next location */ 303 .header_version = HEADER_VERSION, 304 }; 305 306 trace_fp = fopen(trace_file_name, "wb"); 307 if (!trace_fp) { 308 return; 309 } 310 311 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 312 fclose(trace_fp); 313 trace_fp = NULL; 314 return; 315 } 316 317 /* Resume trace writeout */ 318 trace_writeout_enabled = true; 319 flush_trace_file(false); 320 } else { 321 fclose(trace_fp); 322 trace_fp = NULL; 323 } 324 } 325 326 /** 327 * Set the name of a trace file 328 * 329 * @file The trace file name or NULL for the default name-<pid> set at 330 * config time 331 */ 332 bool st_set_trace_file(const char *file) 333 { 334 st_set_trace_file_enabled(false); 335 336 g_free(trace_file_name); 337 338 if (!file) { 339 trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); 340 } else { 341 trace_file_name = g_strdup_printf("%s", file); 342 } 343 344 st_set_trace_file_enabled(true); 345 return true; 346 } 347 348 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 349 { 350 stream_printf(stream, "Trace file \"%s\" %s.\n", 351 trace_file_name, trace_fp ? "on" : "off"); 352 } 353 354 void st_flush_trace_buffer(void) 355 { 356 flush_trace_file(true); 357 } 358 359 void trace_print_events(FILE *stream, fprintf_function stream_printf) 360 { 361 unsigned int i; 362 363 for (i = 0; i < trace_event_count(); i++) { 364 TraceEvent *ev = trace_event_id(i); 365 stream_printf(stream, "%s [Event ID %u] : state %u\n", 366 trace_event_get_name(ev), i, trace_event_get_state_dynamic(ev)); 367 } 368 } 369 370 void trace_event_set_state_dynamic_backend(TraceEvent *ev, bool state) 371 { 372 ev->dstate = state; 373 } 374 375 /* Helper function to create a thread with signals blocked. Use glib's 376 * portable threads since QEMU abstractions cannot be used due to reentrancy in 377 * the tracer. Also note the signal masking on POSIX hosts so that the thread 378 * does not steal signals when the rest of the program wants them blocked. 379 */ 380 static GThread *trace_thread_create(GThreadFunc fn) 381 { 382 GThread *thread; 383 #ifndef _WIN32 384 sigset_t set, oldset; 385 386 sigfillset(&set); 387 pthread_sigmask(SIG_SETMASK, &set, &oldset); 388 #endif 389 390 #if GLIB_CHECK_VERSION(2, 31, 0) 391 thread = g_thread_new("trace-thread", fn, NULL); 392 #else 393 thread = g_thread_create(fn, NULL, FALSE, NULL); 394 #endif 395 396 #ifndef _WIN32 397 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 398 #endif 399 400 return thread; 401 } 402 403 bool trace_backend_init(const char *events, const char *file) 404 { 405 GThread *thread; 406 407 if (!g_thread_supported()) { 408 #if !GLIB_CHECK_VERSION(2, 31, 0) 409 g_thread_init(NULL); 410 #else 411 fprintf(stderr, "glib threading failed to initialize.\n"); 412 exit(1); 413 #endif 414 } 415 416 #if !GLIB_CHECK_VERSION(2, 31, 0) 417 trace_available_cond = g_cond_new(); 418 trace_empty_cond = g_cond_new(); 419 #endif 420 421 thread = trace_thread_create(writeout_thread); 422 if (!thread) { 423 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 424 return false; 425 } 426 427 atexit(st_flush_trace_buffer); 428 trace_backend_init_events(events); 429 st_set_trace_file(file); 430 return true; 431 } 432