1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include <stdlib.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <time.h> 15 #ifndef _WIN32 16 #include <signal.h> 17 #include <pthread.h> 18 #endif 19 #include "qemu/timer.h" 20 #include "trace.h" 21 #include "trace/control.h" 22 #include "trace/simple.h" 23 24 /** Trace file header event ID */ 25 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 26 27 /** Trace file magic number */ 28 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 29 30 /** Trace file version number, bump if format changes */ 31 #define HEADER_VERSION 3 32 33 /** Records were dropped event ID */ 34 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 35 36 /** Trace record is valid */ 37 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 38 39 /* 40 * Trace records are written out by a dedicated thread. The thread waits for 41 * records to become available, writes them out, and then waits again. 42 */ 43 #if GLIB_CHECK_VERSION(2, 32, 0) 44 static GMutex trace_lock; 45 #define lock_trace_lock() g_mutex_lock(&trace_lock) 46 #define unlock_trace_lock() g_mutex_unlock(&trace_lock) 47 #define get_trace_lock_mutex() (&trace_lock) 48 #else 49 static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; 50 #define lock_trace_lock() g_static_mutex_lock(&trace_lock) 51 #define unlock_trace_lock() g_static_mutex_unlock(&trace_lock) 52 #define get_trace_lock_mutex() g_static_mutex_get_mutex(&trace_lock) 53 #endif 54 55 /* g_cond_new() was deprecated in glib 2.31 but we still need to support it */ 56 #if GLIB_CHECK_VERSION(2, 31, 0) 57 static GCond the_trace_available_cond; 58 static GCond the_trace_empty_cond; 59 static GCond *trace_available_cond = &the_trace_available_cond; 60 static GCond *trace_empty_cond = &the_trace_empty_cond; 61 #else 62 static GCond *trace_available_cond; 63 static GCond *trace_empty_cond; 64 #endif 65 66 static bool trace_available; 67 static bool trace_writeout_enabled; 68 69 enum { 70 TRACE_BUF_LEN = 4096 * 64, 71 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 72 }; 73 74 uint8_t trace_buf[TRACE_BUF_LEN]; 75 static volatile gint trace_idx; 76 static unsigned int writeout_idx; 77 static volatile gint dropped_events; 78 static uint32_t trace_pid; 79 static FILE *trace_fp; 80 static char *trace_file_name; 81 82 /* * Trace buffer entry */ 83 typedef struct { 84 uint64_t event; /* TraceEventID */ 85 uint64_t timestamp_ns; 86 uint32_t length; /* in bytes */ 87 uint32_t pid; 88 uint64_t arguments[]; 89 } TraceRecord; 90 91 typedef struct { 92 uint64_t header_event_id; /* HEADER_EVENT_ID */ 93 uint64_t header_magic; /* HEADER_MAGIC */ 94 uint64_t header_version; /* HEADER_VERSION */ 95 } TraceLogHeader; 96 97 98 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 99 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 100 101 static void clear_buffer_range(unsigned int idx, size_t len) 102 { 103 uint32_t num = 0; 104 while (num < len) { 105 if (idx >= TRACE_BUF_LEN) { 106 idx = idx % TRACE_BUF_LEN; 107 } 108 trace_buf[idx++] = 0; 109 num++; 110 } 111 } 112 /** 113 * Read a trace record from the trace buffer 114 * 115 * @idx Trace buffer index 116 * @record Trace record to fill 117 * 118 * Returns false if the record is not valid. 119 */ 120 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 121 { 122 uint64_t event_flag = 0; 123 TraceRecord record; 124 /* read the event flag to see if its a valid record */ 125 read_from_buffer(idx, &record, sizeof(event_flag)); 126 127 if (!(record.event & TRACE_RECORD_VALID)) { 128 return false; 129 } 130 131 smp_rmb(); /* read memory barrier before accessing record */ 132 /* read the record header to know record length */ 133 read_from_buffer(idx, &record, sizeof(TraceRecord)); 134 *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ 135 /* make a copy of record to avoid being overwritten */ 136 read_from_buffer(idx, *recordptr, record.length); 137 smp_rmb(); /* memory barrier before clearing valid flag */ 138 (*recordptr)->event &= ~TRACE_RECORD_VALID; 139 /* clear the trace buffer range for consumed record otherwise any byte 140 * with its MSB set may be considered as a valid event id when the writer 141 * thread crosses this range of buffer again. 142 */ 143 clear_buffer_range(idx, record.length); 144 return true; 145 } 146 147 /** 148 * Kick writeout thread 149 * 150 * @wait Whether to wait for writeout thread to complete 151 */ 152 static void flush_trace_file(bool wait) 153 { 154 lock_trace_lock(); 155 trace_available = true; 156 g_cond_signal(trace_available_cond); 157 158 if (wait) { 159 g_cond_wait(trace_empty_cond, get_trace_lock_mutex()); 160 } 161 162 unlock_trace_lock(); 163 } 164 165 static void wait_for_trace_records_available(void) 166 { 167 lock_trace_lock(); 168 while (!(trace_available && trace_writeout_enabled)) { 169 g_cond_signal(trace_empty_cond); 170 g_cond_wait(trace_available_cond, get_trace_lock_mutex()); 171 } 172 trace_available = false; 173 unlock_trace_lock(); 174 } 175 176 static gpointer writeout_thread(gpointer opaque) 177 { 178 TraceRecord *recordptr; 179 union { 180 TraceRecord rec; 181 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 182 } dropped; 183 unsigned int idx = 0; 184 int dropped_count; 185 size_t unused __attribute__ ((unused)); 186 187 for (;;) { 188 wait_for_trace_records_available(); 189 190 if (g_atomic_int_get(&dropped_events)) { 191 dropped.rec.event = DROPPED_EVENT_ID, 192 dropped.rec.timestamp_ns = get_clock(); 193 dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t), 194 dropped.rec.pid = trace_pid; 195 do { 196 dropped_count = g_atomic_int_get(&dropped_events); 197 } while (!g_atomic_int_compare_and_exchange(&dropped_events, 198 dropped_count, 0)); 199 dropped.rec.arguments[0] = dropped_count; 200 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 201 } 202 203 while (get_trace_record(idx, &recordptr)) { 204 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 205 writeout_idx += recordptr->length; 206 free(recordptr); /* dont use g_free, can deadlock when traced */ 207 idx = writeout_idx % TRACE_BUF_LEN; 208 } 209 210 fflush(trace_fp); 211 } 212 return NULL; 213 } 214 215 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 216 { 217 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 218 } 219 220 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 221 { 222 /* Write string length first */ 223 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 224 /* Write actual string now */ 225 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 226 } 227 228 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 229 { 230 unsigned int idx, rec_off, old_idx, new_idx; 231 uint32_t rec_len = sizeof(TraceRecord) + datasize; 232 uint64_t event_u64 = event; 233 uint64_t timestamp_ns = get_clock(); 234 235 do { 236 old_idx = g_atomic_int_get(&trace_idx); 237 smp_rmb(); 238 new_idx = old_idx + rec_len; 239 240 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 241 /* Trace Buffer Full, Event dropped ! */ 242 g_atomic_int_inc(&dropped_events); 243 return -ENOSPC; 244 } 245 } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx)); 246 247 idx = old_idx % TRACE_BUF_LEN; 248 249 rec_off = idx; 250 rec_off = write_to_buffer(rec_off, &event_u64, sizeof(event_u64)); 251 rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); 252 rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); 253 rec_off = write_to_buffer(rec_off, &trace_pid, sizeof(trace_pid)); 254 255 rec->tbuf_idx = idx; 256 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 257 return 0; 258 } 259 260 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 261 { 262 uint8_t *data_ptr = dataptr; 263 uint32_t x = 0; 264 while (x < size) { 265 if (idx >= TRACE_BUF_LEN) { 266 idx = idx % TRACE_BUF_LEN; 267 } 268 data_ptr[x++] = trace_buf[idx++]; 269 } 270 } 271 272 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 273 { 274 uint8_t *data_ptr = dataptr; 275 uint32_t x = 0; 276 while (x < size) { 277 if (idx >= TRACE_BUF_LEN) { 278 idx = idx % TRACE_BUF_LEN; 279 } 280 trace_buf[idx++] = data_ptr[x++]; 281 } 282 return idx; /* most callers wants to know where to write next */ 283 } 284 285 void trace_record_finish(TraceBufferRecord *rec) 286 { 287 TraceRecord record; 288 read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 289 smp_wmb(); /* write barrier before marking as valid */ 290 record.event |= TRACE_RECORD_VALID; 291 write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 292 293 if (((unsigned int)g_atomic_int_get(&trace_idx) - writeout_idx) 294 > TRACE_BUF_FLUSH_THRESHOLD) { 295 flush_trace_file(false); 296 } 297 } 298 299 void st_set_trace_file_enabled(bool enable) 300 { 301 if (enable == !!trace_fp) { 302 return; /* no change */ 303 } 304 305 /* Halt trace writeout */ 306 flush_trace_file(true); 307 trace_writeout_enabled = false; 308 flush_trace_file(true); 309 310 if (enable) { 311 static const TraceLogHeader header = { 312 .header_event_id = HEADER_EVENT_ID, 313 .header_magic = HEADER_MAGIC, 314 /* Older log readers will check for version at next location */ 315 .header_version = HEADER_VERSION, 316 }; 317 318 trace_fp = fopen(trace_file_name, "wb"); 319 if (!trace_fp) { 320 return; 321 } 322 323 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 324 fclose(trace_fp); 325 trace_fp = NULL; 326 return; 327 } 328 329 /* Resume trace writeout */ 330 trace_writeout_enabled = true; 331 flush_trace_file(false); 332 } else { 333 fclose(trace_fp); 334 trace_fp = NULL; 335 } 336 } 337 338 /** 339 * Set the name of a trace file 340 * 341 * @file The trace file name or NULL for the default name-<pid> set at 342 * config time 343 */ 344 bool st_set_trace_file(const char *file) 345 { 346 st_set_trace_file_enabled(false); 347 348 g_free(trace_file_name); 349 350 if (!file) { 351 trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); 352 } else { 353 trace_file_name = g_strdup_printf("%s", file); 354 } 355 356 st_set_trace_file_enabled(true); 357 return true; 358 } 359 360 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 361 { 362 stream_printf(stream, "Trace file \"%s\" %s.\n", 363 trace_file_name, trace_fp ? "on" : "off"); 364 } 365 366 void st_flush_trace_buffer(void) 367 { 368 flush_trace_file(true); 369 } 370 371 /* Helper function to create a thread with signals blocked. Use glib's 372 * portable threads since QEMU abstractions cannot be used due to reentrancy in 373 * the tracer. Also note the signal masking on POSIX hosts so that the thread 374 * does not steal signals when the rest of the program wants them blocked. 375 */ 376 static GThread *trace_thread_create(GThreadFunc fn) 377 { 378 GThread *thread; 379 #ifndef _WIN32 380 sigset_t set, oldset; 381 382 sigfillset(&set); 383 pthread_sigmask(SIG_SETMASK, &set, &oldset); 384 #endif 385 386 #if GLIB_CHECK_VERSION(2, 31, 0) 387 thread = g_thread_new("trace-thread", fn, NULL); 388 #else 389 thread = g_thread_create(fn, NULL, FALSE, NULL); 390 #endif 391 392 #ifndef _WIN32 393 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 394 #endif 395 396 return thread; 397 } 398 399 bool st_init(const char *file) 400 { 401 GThread *thread; 402 403 trace_pid = getpid(); 404 405 #if !GLIB_CHECK_VERSION(2, 31, 0) 406 trace_available_cond = g_cond_new(); 407 trace_empty_cond = g_cond_new(); 408 #endif 409 410 thread = trace_thread_create(writeout_thread); 411 if (!thread) { 412 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 413 return false; 414 } 415 416 atexit(st_flush_trace_buffer); 417 st_set_trace_file(file); 418 return true; 419 } 420