1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include <stdlib.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <time.h> 15 #ifndef _WIN32 16 #include <signal.h> 17 #include <pthread.h> 18 #endif 19 #include "qemu-timer.h" 20 #include "trace.h" 21 #include "trace/control.h" 22 23 /** Trace file header event ID */ 24 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 25 26 /** Trace file magic number */ 27 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 28 29 /** Trace file version number, bump if format changes */ 30 #define HEADER_VERSION 2 31 32 /** Records were dropped event ID */ 33 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 34 35 /** Trace record is valid */ 36 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 37 38 /* 39 * Trace records are written out by a dedicated thread. The thread waits for 40 * records to become available, writes them out, and then waits again. 41 */ 42 static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; 43 static GCond *trace_available_cond; 44 static GCond *trace_empty_cond; 45 static bool trace_available; 46 static bool trace_writeout_enabled; 47 48 enum { 49 TRACE_BUF_LEN = 4096 * 64, 50 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 51 }; 52 53 uint8_t trace_buf[TRACE_BUF_LEN]; 54 static unsigned int trace_idx; 55 static unsigned int writeout_idx; 56 static uint64_t dropped_events; 57 static FILE *trace_fp; 58 static char *trace_file_name; 59 60 /* * Trace buffer entry */ 61 typedef struct { 62 uint64_t event; /* TraceEventID */ 63 uint64_t timestamp_ns; 64 uint32_t length; /* in bytes */ 65 uint32_t reserved; /* unused */ 66 uint8_t arguments[]; 67 } TraceRecord; 68 69 typedef struct { 70 uint64_t header_event_id; /* HEADER_EVENT_ID */ 71 uint64_t header_magic; /* HEADER_MAGIC */ 72 uint64_t header_version; /* HEADER_VERSION */ 73 } TraceLogHeader; 74 75 76 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 77 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 78 79 static void clear_buffer_range(unsigned int idx, size_t len) 80 { 81 uint32_t num = 0; 82 while (num < len) { 83 if (idx >= TRACE_BUF_LEN) { 84 idx = idx % TRACE_BUF_LEN; 85 } 86 trace_buf[idx++] = 0; 87 num++; 88 } 89 } 90 /** 91 * Read a trace record from the trace buffer 92 * 93 * @idx Trace buffer index 94 * @record Trace record to fill 95 * 96 * Returns false if the record is not valid. 97 */ 98 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 99 { 100 uint64_t event_flag = 0; 101 TraceRecord record; 102 /* read the event flag to see if its a valid record */ 103 read_from_buffer(idx, &record, sizeof(event_flag)); 104 105 if (!(record.event & TRACE_RECORD_VALID)) { 106 return false; 107 } 108 109 smp_rmb(); /* read memory barrier before accessing record */ 110 /* read the record header to know record length */ 111 read_from_buffer(idx, &record, sizeof(TraceRecord)); 112 *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ 113 /* make a copy of record to avoid being overwritten */ 114 read_from_buffer(idx, *recordptr, record.length); 115 smp_rmb(); /* memory barrier before clearing valid flag */ 116 (*recordptr)->event &= ~TRACE_RECORD_VALID; 117 /* clear the trace buffer range for consumed record otherwise any byte 118 * with its MSB set may be considered as a valid event id when the writer 119 * thread crosses this range of buffer again. 120 */ 121 clear_buffer_range(idx, record.length); 122 return true; 123 } 124 125 /** 126 * Kick writeout thread 127 * 128 * @wait Whether to wait for writeout thread to complete 129 */ 130 static void flush_trace_file(bool wait) 131 { 132 g_static_mutex_lock(&trace_lock); 133 trace_available = true; 134 g_cond_signal(trace_available_cond); 135 136 if (wait) { 137 g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); 138 } 139 140 g_static_mutex_unlock(&trace_lock); 141 } 142 143 static void wait_for_trace_records_available(void) 144 { 145 g_static_mutex_lock(&trace_lock); 146 while (!(trace_available && trace_writeout_enabled)) { 147 g_cond_signal(trace_empty_cond); 148 g_cond_wait(trace_available_cond, 149 g_static_mutex_get_mutex(&trace_lock)); 150 } 151 trace_available = false; 152 g_static_mutex_unlock(&trace_lock); 153 } 154 155 static gpointer writeout_thread(gpointer opaque) 156 { 157 TraceRecord *recordptr; 158 union { 159 TraceRecord rec; 160 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 161 } dropped; 162 unsigned int idx = 0; 163 uint64_t dropped_count; 164 size_t unused __attribute__ ((unused)); 165 166 for (;;) { 167 wait_for_trace_records_available(); 168 169 if (dropped_events) { 170 dropped.rec.event = DROPPED_EVENT_ID, 171 dropped.rec.timestamp_ns = get_clock(); 172 dropped.rec.length = sizeof(TraceRecord) + sizeof(dropped_events), 173 dropped.rec.reserved = 0; 174 while (1) { 175 dropped_count = dropped_events; 176 if (g_atomic_int_compare_and_exchange((gint *)&dropped_events, 177 dropped_count, 0)) { 178 break; 179 } 180 } 181 memcpy(dropped.rec.arguments, &dropped_count, sizeof(uint64_t)); 182 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 183 } 184 185 while (get_trace_record(idx, &recordptr)) { 186 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 187 writeout_idx += recordptr->length; 188 free(recordptr); /* dont use g_free, can deadlock when traced */ 189 idx = writeout_idx % TRACE_BUF_LEN; 190 } 191 192 fflush(trace_fp); 193 } 194 return NULL; 195 } 196 197 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 198 { 199 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 200 } 201 202 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 203 { 204 /* Write string length first */ 205 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 206 /* Write actual string now */ 207 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 208 } 209 210 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 211 { 212 unsigned int idx, rec_off, old_idx, new_idx; 213 uint32_t rec_len = sizeof(TraceRecord) + datasize; 214 uint64_t timestamp_ns = get_clock(); 215 216 while (1) { 217 old_idx = trace_idx; 218 smp_rmb(); 219 new_idx = old_idx + rec_len; 220 221 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 222 /* Trace Buffer Full, Event dropped ! */ 223 g_atomic_int_inc((gint *)&dropped_events); 224 return -ENOSPC; 225 } 226 227 if (g_atomic_int_compare_and_exchange((gint *)&trace_idx, 228 old_idx, new_idx)) { 229 break; 230 } 231 } 232 233 idx = old_idx % TRACE_BUF_LEN; 234 235 rec_off = idx; 236 rec_off = write_to_buffer(rec_off, &event, sizeof(event)); 237 rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); 238 rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); 239 240 rec->tbuf_idx = idx; 241 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 242 return 0; 243 } 244 245 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 246 { 247 uint8_t *data_ptr = dataptr; 248 uint32_t x = 0; 249 while (x < size) { 250 if (idx >= TRACE_BUF_LEN) { 251 idx = idx % TRACE_BUF_LEN; 252 } 253 data_ptr[x++] = trace_buf[idx++]; 254 } 255 } 256 257 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 258 { 259 uint8_t *data_ptr = dataptr; 260 uint32_t x = 0; 261 while (x < size) { 262 if (idx >= TRACE_BUF_LEN) { 263 idx = idx % TRACE_BUF_LEN; 264 } 265 trace_buf[idx++] = data_ptr[x++]; 266 } 267 return idx; /* most callers wants to know where to write next */ 268 } 269 270 void trace_record_finish(TraceBufferRecord *rec) 271 { 272 TraceRecord record; 273 read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 274 smp_wmb(); /* write barrier before marking as valid */ 275 record.event |= TRACE_RECORD_VALID; 276 write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 277 278 if ((trace_idx - writeout_idx) > TRACE_BUF_FLUSH_THRESHOLD) { 279 flush_trace_file(false); 280 } 281 } 282 283 void st_set_trace_file_enabled(bool enable) 284 { 285 if (enable == !!trace_fp) { 286 return; /* no change */ 287 } 288 289 /* Halt trace writeout */ 290 flush_trace_file(true); 291 trace_writeout_enabled = false; 292 flush_trace_file(true); 293 294 if (enable) { 295 static const TraceLogHeader header = { 296 .header_event_id = HEADER_EVENT_ID, 297 .header_magic = HEADER_MAGIC, 298 /* Older log readers will check for version at next location */ 299 .header_version = HEADER_VERSION, 300 }; 301 302 trace_fp = fopen(trace_file_name, "wb"); 303 if (!trace_fp) { 304 return; 305 } 306 307 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 308 fclose(trace_fp); 309 trace_fp = NULL; 310 return; 311 } 312 313 /* Resume trace writeout */ 314 trace_writeout_enabled = true; 315 flush_trace_file(false); 316 } else { 317 fclose(trace_fp); 318 trace_fp = NULL; 319 } 320 } 321 322 /** 323 * Set the name of a trace file 324 * 325 * @file The trace file name or NULL for the default name-<pid> set at 326 * config time 327 */ 328 bool st_set_trace_file(const char *file) 329 { 330 st_set_trace_file_enabled(false); 331 332 g_free(trace_file_name); 333 334 if (!file) { 335 trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); 336 } else { 337 trace_file_name = g_strdup_printf("%s", file); 338 } 339 340 st_set_trace_file_enabled(true); 341 return true; 342 } 343 344 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 345 { 346 stream_printf(stream, "Trace file \"%s\" %s.\n", 347 trace_file_name, trace_fp ? "on" : "off"); 348 } 349 350 void st_flush_trace_buffer(void) 351 { 352 flush_trace_file(true); 353 } 354 355 void trace_print_events(FILE *stream, fprintf_function stream_printf) 356 { 357 unsigned int i; 358 359 for (i = 0; i < NR_TRACE_EVENTS; i++) { 360 stream_printf(stream, "%s [Event ID %u] : state %u\n", 361 trace_list[i].tp_name, i, trace_list[i].state); 362 } 363 } 364 365 bool trace_event_set_state(const char *name, bool state) 366 { 367 unsigned int i; 368 unsigned int len; 369 bool wildcard = false; 370 bool matched = false; 371 372 len = strlen(name); 373 if (len > 0 && name[len - 1] == '*') { 374 wildcard = true; 375 len -= 1; 376 } 377 for (i = 0; i < NR_TRACE_EVENTS; i++) { 378 if (wildcard) { 379 if (!strncmp(trace_list[i].tp_name, name, len)) { 380 trace_list[i].state = state; 381 matched = true; 382 } 383 continue; 384 } 385 if (!strcmp(trace_list[i].tp_name, name)) { 386 trace_list[i].state = state; 387 return true; 388 } 389 } 390 return matched; 391 } 392 393 /* Helper function to create a thread with signals blocked. Use glib's 394 * portable threads since QEMU abstractions cannot be used due to reentrancy in 395 * the tracer. Also note the signal masking on POSIX hosts so that the thread 396 * does not steal signals when the rest of the program wants them blocked. 397 */ 398 static GThread *trace_thread_create(GThreadFunc fn) 399 { 400 GThread *thread; 401 #ifndef _WIN32 402 sigset_t set, oldset; 403 404 sigfillset(&set); 405 pthread_sigmask(SIG_SETMASK, &set, &oldset); 406 #endif 407 thread = g_thread_create(fn, NULL, FALSE, NULL); 408 #ifndef _WIN32 409 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 410 #endif 411 412 return thread; 413 } 414 415 bool trace_backend_init(const char *events, const char *file) 416 { 417 GThread *thread; 418 419 if (!g_thread_supported()) { 420 #if !GLIB_CHECK_VERSION(2, 31, 0) 421 g_thread_init(NULL); 422 #else 423 fprintf(stderr, "glib threading failed to initialize.\n"); 424 exit(1); 425 #endif 426 } 427 428 trace_available_cond = g_cond_new(); 429 trace_empty_cond = g_cond_new(); 430 431 thread = trace_thread_create(writeout_thread); 432 if (!thread) { 433 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 434 return false; 435 } 436 437 atexit(st_flush_trace_buffer); 438 trace_backend_init_events(events); 439 st_set_trace_file(file); 440 return true; 441 } 442