1 /* 2 * Simple trace backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #ifndef _WIN32 13 #include <pthread.h> 14 #endif 15 #include "qemu/timer.h" 16 #include "trace.h" 17 #include "trace/control.h" 18 #include "trace/simple.h" 19 20 /** Trace file header event ID */ 21 #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ 22 23 /** Trace file magic number */ 24 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL 25 26 /** Trace file version number, bump if format changes */ 27 #define HEADER_VERSION 3 28 29 /** Records were dropped event ID */ 30 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) 31 32 /** Trace record is valid */ 33 #define TRACE_RECORD_VALID ((uint64_t)1 << 63) 34 35 /* 36 * Trace records are written out by a dedicated thread. The thread waits for 37 * records to become available, writes them out, and then waits again. 38 */ 39 static CompatGMutex trace_lock; 40 static CompatGCond trace_available_cond; 41 static CompatGCond trace_empty_cond; 42 43 static bool trace_available; 44 static bool trace_writeout_enabled; 45 46 enum { 47 TRACE_BUF_LEN = 4096 * 64, 48 TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, 49 }; 50 51 uint8_t trace_buf[TRACE_BUF_LEN]; 52 static volatile gint trace_idx; 53 static unsigned int writeout_idx; 54 static volatile gint dropped_events; 55 static uint32_t trace_pid; 56 static FILE *trace_fp; 57 static char *trace_file_name; 58 59 /* * Trace buffer entry */ 60 typedef struct { 61 uint64_t event; /* TraceEventID */ 62 uint64_t timestamp_ns; 63 uint32_t length; /* in bytes */ 64 uint32_t pid; 65 uint64_t arguments[]; 66 } TraceRecord; 67 68 typedef struct { 69 uint64_t header_event_id; /* HEADER_EVENT_ID */ 70 uint64_t header_magic; /* HEADER_MAGIC */ 71 uint64_t header_version; /* HEADER_VERSION */ 72 } TraceLogHeader; 73 74 75 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); 76 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); 77 78 static void clear_buffer_range(unsigned int idx, size_t len) 79 { 80 uint32_t num = 0; 81 while (num < len) { 82 if (idx >= TRACE_BUF_LEN) { 83 idx = idx % TRACE_BUF_LEN; 84 } 85 trace_buf[idx++] = 0; 86 num++; 87 } 88 } 89 /** 90 * Read a trace record from the trace buffer 91 * 92 * @idx Trace buffer index 93 * @record Trace record to fill 94 * 95 * Returns false if the record is not valid. 96 */ 97 static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) 98 { 99 uint64_t event_flag = 0; 100 TraceRecord record; 101 /* read the event flag to see if its a valid record */ 102 read_from_buffer(idx, &record, sizeof(event_flag)); 103 104 if (!(record.event & TRACE_RECORD_VALID)) { 105 return false; 106 } 107 108 smp_rmb(); /* read memory barrier before accessing record */ 109 /* read the record header to know record length */ 110 read_from_buffer(idx, &record, sizeof(TraceRecord)); 111 *recordptr = malloc(record.length); /* don't use g_malloc, can deadlock when traced */ 112 /* make a copy of record to avoid being overwritten */ 113 read_from_buffer(idx, *recordptr, record.length); 114 smp_rmb(); /* memory barrier before clearing valid flag */ 115 (*recordptr)->event &= ~TRACE_RECORD_VALID; 116 /* clear the trace buffer range for consumed record otherwise any byte 117 * with its MSB set may be considered as a valid event id when the writer 118 * thread crosses this range of buffer again. 119 */ 120 clear_buffer_range(idx, record.length); 121 return true; 122 } 123 124 /** 125 * Kick writeout thread 126 * 127 * @wait Whether to wait for writeout thread to complete 128 */ 129 static void flush_trace_file(bool wait) 130 { 131 g_mutex_lock(&trace_lock); 132 trace_available = true; 133 g_cond_signal(&trace_available_cond); 134 135 if (wait) { 136 g_cond_wait(&trace_empty_cond, &trace_lock); 137 } 138 139 g_mutex_unlock(&trace_lock); 140 } 141 142 static void wait_for_trace_records_available(void) 143 { 144 g_mutex_lock(&trace_lock); 145 while (!(trace_available && trace_writeout_enabled)) { 146 g_cond_signal(&trace_empty_cond); 147 g_cond_wait(&trace_available_cond, &trace_lock); 148 } 149 trace_available = false; 150 g_mutex_unlock(&trace_lock); 151 } 152 153 static gpointer writeout_thread(gpointer opaque) 154 { 155 TraceRecord *recordptr; 156 union { 157 TraceRecord rec; 158 uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; 159 } dropped; 160 unsigned int idx = 0; 161 int dropped_count; 162 size_t unused __attribute__ ((unused)); 163 164 for (;;) { 165 wait_for_trace_records_available(); 166 167 if (g_atomic_int_get(&dropped_events)) { 168 dropped.rec.event = DROPPED_EVENT_ID, 169 dropped.rec.timestamp_ns = get_clock(); 170 dropped.rec.length = sizeof(TraceRecord) + sizeof(uint64_t), 171 dropped.rec.pid = trace_pid; 172 do { 173 dropped_count = g_atomic_int_get(&dropped_events); 174 } while (!g_atomic_int_compare_and_exchange(&dropped_events, 175 dropped_count, 0)); 176 dropped.rec.arguments[0] = dropped_count; 177 unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); 178 } 179 180 while (get_trace_record(idx, &recordptr)) { 181 unused = fwrite(recordptr, recordptr->length, 1, trace_fp); 182 writeout_idx += recordptr->length; 183 free(recordptr); /* don't use g_free, can deadlock when traced */ 184 idx = writeout_idx % TRACE_BUF_LEN; 185 } 186 187 fflush(trace_fp); 188 } 189 return NULL; 190 } 191 192 void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) 193 { 194 rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); 195 } 196 197 void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) 198 { 199 /* Write string length first */ 200 rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); 201 /* Write actual string now */ 202 rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); 203 } 204 205 int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) 206 { 207 unsigned int idx, rec_off, old_idx, new_idx; 208 uint32_t rec_len = sizeof(TraceRecord) + datasize; 209 uint64_t event_u64 = event; 210 uint64_t timestamp_ns = get_clock(); 211 212 do { 213 old_idx = g_atomic_int_get(&trace_idx); 214 smp_rmb(); 215 new_idx = old_idx + rec_len; 216 217 if (new_idx - writeout_idx > TRACE_BUF_LEN) { 218 /* Trace Buffer Full, Event dropped ! */ 219 g_atomic_int_inc(&dropped_events); 220 return -ENOSPC; 221 } 222 } while (!g_atomic_int_compare_and_exchange(&trace_idx, old_idx, new_idx)); 223 224 idx = old_idx % TRACE_BUF_LEN; 225 226 rec_off = idx; 227 rec_off = write_to_buffer(rec_off, &event_u64, sizeof(event_u64)); 228 rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); 229 rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); 230 rec_off = write_to_buffer(rec_off, &trace_pid, sizeof(trace_pid)); 231 232 rec->tbuf_idx = idx; 233 rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; 234 return 0; 235 } 236 237 static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) 238 { 239 uint8_t *data_ptr = dataptr; 240 uint32_t x = 0; 241 while (x < size) { 242 if (idx >= TRACE_BUF_LEN) { 243 idx = idx % TRACE_BUF_LEN; 244 } 245 data_ptr[x++] = trace_buf[idx++]; 246 } 247 } 248 249 static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) 250 { 251 uint8_t *data_ptr = dataptr; 252 uint32_t x = 0; 253 while (x < size) { 254 if (idx >= TRACE_BUF_LEN) { 255 idx = idx % TRACE_BUF_LEN; 256 } 257 trace_buf[idx++] = data_ptr[x++]; 258 } 259 return idx; /* most callers wants to know where to write next */ 260 } 261 262 void trace_record_finish(TraceBufferRecord *rec) 263 { 264 TraceRecord record; 265 read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 266 smp_wmb(); /* write barrier before marking as valid */ 267 record.event |= TRACE_RECORD_VALID; 268 write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); 269 270 if (((unsigned int)g_atomic_int_get(&trace_idx) - writeout_idx) 271 > TRACE_BUF_FLUSH_THRESHOLD) { 272 flush_trace_file(false); 273 } 274 } 275 276 void st_set_trace_file_enabled(bool enable) 277 { 278 if (enable == !!trace_fp) { 279 return; /* no change */ 280 } 281 282 /* Halt trace writeout */ 283 flush_trace_file(true); 284 trace_writeout_enabled = false; 285 flush_trace_file(true); 286 287 if (enable) { 288 static const TraceLogHeader header = { 289 .header_event_id = HEADER_EVENT_ID, 290 .header_magic = HEADER_MAGIC, 291 /* Older log readers will check for version at next location */ 292 .header_version = HEADER_VERSION, 293 }; 294 295 trace_fp = fopen(trace_file_name, "wb"); 296 if (!trace_fp) { 297 return; 298 } 299 300 if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { 301 fclose(trace_fp); 302 trace_fp = NULL; 303 return; 304 } 305 306 /* Resume trace writeout */ 307 trace_writeout_enabled = true; 308 flush_trace_file(false); 309 } else { 310 fclose(trace_fp); 311 trace_fp = NULL; 312 } 313 } 314 315 /** 316 * Set the name of a trace file 317 * 318 * @file The trace file name or NULL for the default name-<pid> set at 319 * config time 320 */ 321 void st_set_trace_file(const char *file) 322 { 323 st_set_trace_file_enabled(false); 324 325 g_free(trace_file_name); 326 327 if (!file) { 328 /* Type cast needed for Windows where getpid() returns an int. */ 329 trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid()); 330 } else { 331 trace_file_name = g_strdup_printf("%s", file); 332 } 333 334 st_set_trace_file_enabled(true); 335 } 336 337 void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) 338 { 339 stream_printf(stream, "Trace file \"%s\" %s.\n", 340 trace_file_name, trace_fp ? "on" : "off"); 341 } 342 343 void st_flush_trace_buffer(void) 344 { 345 flush_trace_file(true); 346 } 347 348 /* Helper function to create a thread with signals blocked. Use glib's 349 * portable threads since QEMU abstractions cannot be used due to reentrancy in 350 * the tracer. Also note the signal masking on POSIX hosts so that the thread 351 * does not steal signals when the rest of the program wants them blocked. 352 */ 353 static GThread *trace_thread_create(GThreadFunc fn) 354 { 355 GThread *thread; 356 #ifndef _WIN32 357 sigset_t set, oldset; 358 359 sigfillset(&set); 360 pthread_sigmask(SIG_SETMASK, &set, &oldset); 361 #endif 362 363 thread = g_thread_new("trace-thread", fn, NULL); 364 365 #ifndef _WIN32 366 pthread_sigmask(SIG_SETMASK, &oldset, NULL); 367 #endif 368 369 return thread; 370 } 371 372 bool st_init(void) 373 { 374 GThread *thread; 375 376 trace_pid = getpid(); 377 378 thread = trace_thread_create(writeout_thread); 379 if (!thread) { 380 fprintf(stderr, "warning: unable to initialize simple trace backend\n"); 381 return false; 382 } 383 384 atexit(st_flush_trace_buffer); 385 return true; 386 } 387