xref: /openbmc/linux/tools/perf/builtin-kvm.c (revision 8fdff1dc)
1 #include "builtin.h"
2 #include "perf.h"
3 
4 #include "util/evsel.h"
5 #include "util/util.h"
6 #include "util/cache.h"
7 #include "util/symbol.h"
8 #include "util/thread.h"
9 #include "util/header.h"
10 #include "util/session.h"
11 
12 #include "util/parse-options.h"
13 #include "util/trace-event.h"
14 #include "util/debug.h"
15 #include "util/debugfs.h"
16 #include "util/tool.h"
17 #include "util/stat.h"
18 
19 #include <sys/prctl.h>
20 
21 #include <semaphore.h>
22 #include <pthread.h>
23 #include <math.h>
24 
25 #if defined(__i386__) || defined(__x86_64__)
26 #include <asm/svm.h>
27 #include <asm/vmx.h>
28 #include <asm/kvm.h>
29 
30 struct event_key {
31 	#define INVALID_KEY     (~0ULL)
32 	u64 key;
33 	int info;
34 };
35 
36 struct kvm_event_stats {
37 	u64 time;
38 	struct stats stats;
39 };
40 
41 struct kvm_event {
42 	struct list_head hash_entry;
43 	struct rb_node rb;
44 
45 	struct event_key key;
46 
47 	struct kvm_event_stats total;
48 
49 	#define DEFAULT_VCPU_NUM 8
50 	int max_vcpu;
51 	struct kvm_event_stats *vcpu;
52 };
53 
54 typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
55 
56 struct kvm_event_key {
57 	const char *name;
58 	key_cmp_fun key;
59 };
60 
61 
62 struct perf_kvm_stat;
63 
64 struct kvm_events_ops {
65 	bool (*is_begin_event)(struct perf_evsel *evsel,
66 			       struct perf_sample *sample,
67 			       struct event_key *key);
68 	bool (*is_end_event)(struct perf_evsel *evsel,
69 			     struct perf_sample *sample, struct event_key *key);
70 	void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
71 			   char decode[20]);
72 	const char *name;
73 };
74 
75 struct exit_reasons_table {
76 	unsigned long exit_code;
77 	const char *reason;
78 };
79 
80 #define EVENTS_BITS		12
81 #define EVENTS_CACHE_SIZE	(1UL << EVENTS_BITS)
82 
83 struct perf_kvm_stat {
84 	struct perf_tool    tool;
85 	struct perf_session *session;
86 
87 	const char *file_name;
88 	const char *report_event;
89 	const char *sort_key;
90 	int trace_vcpu;
91 
92 	struct exit_reasons_table *exit_reasons;
93 	int exit_reasons_size;
94 	const char *exit_reasons_isa;
95 
96 	struct kvm_events_ops *events_ops;
97 	key_cmp_fun compare;
98 	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
99 	u64 total_time;
100 	u64 total_count;
101 
102 	struct rb_root result;
103 };
104 
105 
106 static void exit_event_get_key(struct perf_evsel *evsel,
107 			       struct perf_sample *sample,
108 			       struct event_key *key)
109 {
110 	key->info = 0;
111 	key->key = perf_evsel__intval(evsel, sample, "exit_reason");
112 }
113 
114 static bool kvm_exit_event(struct perf_evsel *evsel)
115 {
116 	return !strcmp(evsel->name, "kvm:kvm_exit");
117 }
118 
119 static bool exit_event_begin(struct perf_evsel *evsel,
120 			     struct perf_sample *sample, struct event_key *key)
121 {
122 	if (kvm_exit_event(evsel)) {
123 		exit_event_get_key(evsel, sample, key);
124 		return true;
125 	}
126 
127 	return false;
128 }
129 
130 static bool kvm_entry_event(struct perf_evsel *evsel)
131 {
132 	return !strcmp(evsel->name, "kvm:kvm_entry");
133 }
134 
135 static bool exit_event_end(struct perf_evsel *evsel,
136 			   struct perf_sample *sample __maybe_unused,
137 			   struct event_key *key __maybe_unused)
138 {
139 	return kvm_entry_event(evsel);
140 }
141 
142 static struct exit_reasons_table vmx_exit_reasons[] = {
143 	VMX_EXIT_REASONS
144 };
145 
146 static struct exit_reasons_table svm_exit_reasons[] = {
147 	SVM_EXIT_REASONS
148 };
149 
150 static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code)
151 {
152 	int i = kvm->exit_reasons_size;
153 	struct exit_reasons_table *tbl = kvm->exit_reasons;
154 
155 	while (i--) {
156 		if (tbl->exit_code == exit_code)
157 			return tbl->reason;
158 		tbl++;
159 	}
160 
161 	pr_err("unknown kvm exit code:%lld on %s\n",
162 		(unsigned long long)exit_code, kvm->exit_reasons_isa);
163 	return "UNKNOWN";
164 }
165 
166 static void exit_event_decode_key(struct perf_kvm_stat *kvm,
167 				  struct event_key *key,
168 				  char decode[20])
169 {
170 	const char *exit_reason = get_exit_reason(kvm, key->key);
171 
172 	scnprintf(decode, 20, "%s", exit_reason);
173 }
174 
175 static struct kvm_events_ops exit_events = {
176 	.is_begin_event = exit_event_begin,
177 	.is_end_event = exit_event_end,
178 	.decode_key = exit_event_decode_key,
179 	.name = "VM-EXIT"
180 };
181 
182 /*
183  * For the mmio events, we treat:
184  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
185  * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
186  */
187 static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
188 			       struct event_key *key)
189 {
190 	key->key  = perf_evsel__intval(evsel, sample, "gpa");
191 	key->info = perf_evsel__intval(evsel, sample, "type");
192 }
193 
194 #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
195 #define KVM_TRACE_MMIO_READ 1
196 #define KVM_TRACE_MMIO_WRITE 2
197 
198 static bool mmio_event_begin(struct perf_evsel *evsel,
199 			     struct perf_sample *sample, struct event_key *key)
200 {
201 	/* MMIO read begin event in kernel. */
202 	if (kvm_exit_event(evsel))
203 		return true;
204 
205 	/* MMIO write begin event in kernel. */
206 	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
207 	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
208 		mmio_event_get_key(evsel, sample, key);
209 		return true;
210 	}
211 
212 	return false;
213 }
214 
215 static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
216 			   struct event_key *key)
217 {
218 	/* MMIO write end event in kernel. */
219 	if (kvm_entry_event(evsel))
220 		return true;
221 
222 	/* MMIO read end event in kernel.*/
223 	if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
224 	    perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
225 		mmio_event_get_key(evsel, sample, key);
226 		return true;
227 	}
228 
229 	return false;
230 }
231 
232 static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
233 				  struct event_key *key,
234 				  char decode[20])
235 {
236 	scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key,
237 				key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
238 }
239 
240 static struct kvm_events_ops mmio_events = {
241 	.is_begin_event = mmio_event_begin,
242 	.is_end_event = mmio_event_end,
243 	.decode_key = mmio_event_decode_key,
244 	.name = "MMIO Access"
245 };
246 
247  /* The time of emulation pio access is from kvm_pio to kvm_entry. */
248 static void ioport_event_get_key(struct perf_evsel *evsel,
249 				 struct perf_sample *sample,
250 				 struct event_key *key)
251 {
252 	key->key  = perf_evsel__intval(evsel, sample, "port");
253 	key->info = perf_evsel__intval(evsel, sample, "rw");
254 }
255 
256 static bool ioport_event_begin(struct perf_evsel *evsel,
257 			       struct perf_sample *sample,
258 			       struct event_key *key)
259 {
260 	if (!strcmp(evsel->name, "kvm:kvm_pio")) {
261 		ioport_event_get_key(evsel, sample, key);
262 		return true;
263 	}
264 
265 	return false;
266 }
267 
268 static bool ioport_event_end(struct perf_evsel *evsel,
269 			     struct perf_sample *sample __maybe_unused,
270 			     struct event_key *key __maybe_unused)
271 {
272 	return kvm_entry_event(evsel);
273 }
274 
275 static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
276 				    struct event_key *key,
277 				    char decode[20])
278 {
279 	scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key,
280 				key->info ? "POUT" : "PIN");
281 }
282 
283 static struct kvm_events_ops ioport_events = {
284 	.is_begin_event = ioport_event_begin,
285 	.is_end_event = ioport_event_end,
286 	.decode_key = ioport_event_decode_key,
287 	.name = "IO Port Access"
288 };
289 
290 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
291 {
292 	bool ret = true;
293 
294 	if (!strcmp(kvm->report_event, "vmexit"))
295 		kvm->events_ops = &exit_events;
296 	else if (!strcmp(kvm->report_event, "mmio"))
297 		kvm->events_ops = &mmio_events;
298 	else if (!strcmp(kvm->report_event, "ioport"))
299 		kvm->events_ops = &ioport_events;
300 	else {
301 		pr_err("Unknown report event:%s\n", kvm->report_event);
302 		ret = false;
303 	}
304 
305 	return ret;
306 }
307 
308 struct vcpu_event_record {
309 	int vcpu_id;
310 	u64 start_time;
311 	struct kvm_event *last_event;
312 };
313 
314 
315 static void init_kvm_event_record(struct perf_kvm_stat *kvm)
316 {
317 	unsigned int i;
318 
319 	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
320 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
321 }
322 
323 static int kvm_events_hash_fn(u64 key)
324 {
325 	return key & (EVENTS_CACHE_SIZE - 1);
326 }
327 
328 static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
329 {
330 	int old_max_vcpu = event->max_vcpu;
331 
332 	if (vcpu_id < event->max_vcpu)
333 		return true;
334 
335 	while (event->max_vcpu <= vcpu_id)
336 		event->max_vcpu += DEFAULT_VCPU_NUM;
337 
338 	event->vcpu = realloc(event->vcpu,
339 			      event->max_vcpu * sizeof(*event->vcpu));
340 	if (!event->vcpu) {
341 		pr_err("Not enough memory\n");
342 		return false;
343 	}
344 
345 	memset(event->vcpu + old_max_vcpu, 0,
346 	       (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
347 	return true;
348 }
349 
350 static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
351 {
352 	struct kvm_event *event;
353 
354 	event = zalloc(sizeof(*event));
355 	if (!event) {
356 		pr_err("Not enough memory\n");
357 		return NULL;
358 	}
359 
360 	event->key = *key;
361 	return event;
362 }
363 
364 static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
365 					       struct event_key *key)
366 {
367 	struct kvm_event *event;
368 	struct list_head *head;
369 
370 	BUG_ON(key->key == INVALID_KEY);
371 
372 	head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
373 	list_for_each_entry(event, head, hash_entry) {
374 		if (event->key.key == key->key && event->key.info == key->info)
375 			return event;
376 	}
377 
378 	event = kvm_alloc_init_event(key);
379 	if (!event)
380 		return NULL;
381 
382 	list_add(&event->hash_entry, head);
383 	return event;
384 }
385 
386 static bool handle_begin_event(struct perf_kvm_stat *kvm,
387 			       struct vcpu_event_record *vcpu_record,
388 			       struct event_key *key, u64 timestamp)
389 {
390 	struct kvm_event *event = NULL;
391 
392 	if (key->key != INVALID_KEY)
393 		event = find_create_kvm_event(kvm, key);
394 
395 	vcpu_record->last_event = event;
396 	vcpu_record->start_time = timestamp;
397 	return true;
398 }
399 
400 static void
401 kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
402 {
403 	kvm_stats->time += time_diff;
404 	update_stats(&kvm_stats->stats, time_diff);
405 }
406 
407 static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
408 {
409 	struct kvm_event_stats *kvm_stats = &event->total;
410 
411 	if (vcpu_id != -1)
412 		kvm_stats = &event->vcpu[vcpu_id];
413 
414 	return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
415 				avg_stats(&kvm_stats->stats));
416 }
417 
418 static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
419 			     u64 time_diff)
420 {
421 	if (vcpu_id == -1) {
422 		kvm_update_event_stats(&event->total, time_diff);
423 		return true;
424 	}
425 
426 	if (!kvm_event_expand(event, vcpu_id))
427 		return false;
428 
429 	kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
430 	return true;
431 }
432 
433 static bool handle_end_event(struct perf_kvm_stat *kvm,
434 			     struct vcpu_event_record *vcpu_record,
435 			     struct event_key *key,
436 			     u64 timestamp)
437 {
438 	struct kvm_event *event;
439 	u64 time_begin, time_diff;
440 	int vcpu;
441 
442 	if (kvm->trace_vcpu == -1)
443 		vcpu = -1;
444 	else
445 		vcpu = vcpu_record->vcpu_id;
446 
447 	event = vcpu_record->last_event;
448 	time_begin = vcpu_record->start_time;
449 
450 	/* The begin event is not caught. */
451 	if (!time_begin)
452 		return true;
453 
454 	/*
455 	 * In some case, the 'begin event' only records the start timestamp,
456 	 * the actual event is recognized in the 'end event' (e.g. mmio-event).
457 	 */
458 
459 	/* Both begin and end events did not get the key. */
460 	if (!event && key->key == INVALID_KEY)
461 		return true;
462 
463 	if (!event)
464 		event = find_create_kvm_event(kvm, key);
465 
466 	if (!event)
467 		return false;
468 
469 	vcpu_record->last_event = NULL;
470 	vcpu_record->start_time = 0;
471 
472 	BUG_ON(timestamp < time_begin);
473 
474 	time_diff = timestamp - time_begin;
475 	return update_kvm_event(event, vcpu, time_diff);
476 }
477 
478 static
479 struct vcpu_event_record *per_vcpu_record(struct thread *thread,
480 					  struct perf_evsel *evsel,
481 					  struct perf_sample *sample)
482 {
483 	/* Only kvm_entry records vcpu id. */
484 	if (!thread->priv && kvm_entry_event(evsel)) {
485 		struct vcpu_event_record *vcpu_record;
486 
487 		vcpu_record = zalloc(sizeof(*vcpu_record));
488 		if (!vcpu_record) {
489 			pr_err("%s: Not enough memory\n", __func__);
490 			return NULL;
491 		}
492 
493 		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id");
494 		thread->priv = vcpu_record;
495 	}
496 
497 	return thread->priv;
498 }
499 
500 static bool handle_kvm_event(struct perf_kvm_stat *kvm,
501 			     struct thread *thread,
502 			     struct perf_evsel *evsel,
503 			     struct perf_sample *sample)
504 {
505 	struct vcpu_event_record *vcpu_record;
506 	struct event_key key = {.key = INVALID_KEY};
507 
508 	vcpu_record = per_vcpu_record(thread, evsel, sample);
509 	if (!vcpu_record)
510 		return true;
511 
512 	/* only process events for vcpus user cares about */
513 	if ((kvm->trace_vcpu != -1) &&
514 	    (kvm->trace_vcpu != vcpu_record->vcpu_id))
515 		return true;
516 
517 	if (kvm->events_ops->is_begin_event(evsel, sample, &key))
518 		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
519 
520 	if (kvm->events_ops->is_end_event(evsel, sample, &key))
521 		return handle_end_event(kvm, vcpu_record, &key, sample->time);
522 
523 	return true;
524 }
525 
526 #define GET_EVENT_KEY(func, field)					\
527 static u64 get_event_ ##func(struct kvm_event *event, int vcpu)		\
528 {									\
529 	if (vcpu == -1)							\
530 		return event->total.field;				\
531 									\
532 	if (vcpu >= event->max_vcpu)					\
533 		return 0;						\
534 									\
535 	return event->vcpu[vcpu].field;					\
536 }
537 
538 #define COMPARE_EVENT_KEY(func, field)					\
539 GET_EVENT_KEY(func, field)						\
540 static int compare_kvm_event_ ## func(struct kvm_event *one,		\
541 					struct kvm_event *two, int vcpu)\
542 {									\
543 	return get_event_ ##func(one, vcpu) >				\
544 				get_event_ ##func(two, vcpu);		\
545 }
546 
547 GET_EVENT_KEY(time, time);
548 COMPARE_EVENT_KEY(count, stats.n);
549 COMPARE_EVENT_KEY(mean, stats.mean);
550 
551 #define DEF_SORT_NAME_KEY(name, compare_key)				\
552 	{ #name, compare_kvm_event_ ## compare_key }
553 
554 static struct kvm_event_key keys[] = {
555 	DEF_SORT_NAME_KEY(sample, count),
556 	DEF_SORT_NAME_KEY(time, mean),
557 	{ NULL, NULL }
558 };
559 
560 static bool select_key(struct perf_kvm_stat *kvm)
561 {
562 	int i;
563 
564 	for (i = 0; keys[i].name; i++) {
565 		if (!strcmp(keys[i].name, kvm->sort_key)) {
566 			kvm->compare = keys[i].key;
567 			return true;
568 		}
569 	}
570 
571 	pr_err("Unknown compare key:%s\n", kvm->sort_key);
572 	return false;
573 }
574 
575 static void insert_to_result(struct rb_root *result, struct kvm_event *event,
576 			     key_cmp_fun bigger, int vcpu)
577 {
578 	struct rb_node **rb = &result->rb_node;
579 	struct rb_node *parent = NULL;
580 	struct kvm_event *p;
581 
582 	while (*rb) {
583 		p = container_of(*rb, struct kvm_event, rb);
584 		parent = *rb;
585 
586 		if (bigger(event, p, vcpu))
587 			rb = &(*rb)->rb_left;
588 		else
589 			rb = &(*rb)->rb_right;
590 	}
591 
592 	rb_link_node(&event->rb, parent, rb);
593 	rb_insert_color(&event->rb, result);
594 }
595 
596 static void
597 update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
598 {
599 	int vcpu = kvm->trace_vcpu;
600 
601 	kvm->total_count += get_event_count(event, vcpu);
602 	kvm->total_time += get_event_time(event, vcpu);
603 }
604 
605 static bool event_is_valid(struct kvm_event *event, int vcpu)
606 {
607 	return !!get_event_count(event, vcpu);
608 }
609 
610 static void sort_result(struct perf_kvm_stat *kvm)
611 {
612 	unsigned int i;
613 	int vcpu = kvm->trace_vcpu;
614 	struct kvm_event *event;
615 
616 	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
617 		list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
618 			if (event_is_valid(event, vcpu)) {
619 				update_total_count(kvm, event);
620 				insert_to_result(&kvm->result, event,
621 						 kvm->compare, vcpu);
622 			}
623 		}
624 	}
625 }
626 
627 /* returns left most element of result, and erase it */
628 static struct kvm_event *pop_from_result(struct rb_root *result)
629 {
630 	struct rb_node *node = rb_first(result);
631 
632 	if (!node)
633 		return NULL;
634 
635 	rb_erase(node, result);
636 	return container_of(node, struct kvm_event, rb);
637 }
638 
639 static void print_vcpu_info(int vcpu)
640 {
641 	pr_info("Analyze events for ");
642 
643 	if (vcpu == -1)
644 		pr_info("all VCPUs:\n\n");
645 	else
646 		pr_info("VCPU %d:\n\n", vcpu);
647 }
648 
649 static void print_result(struct perf_kvm_stat *kvm)
650 {
651 	char decode[20];
652 	struct kvm_event *event;
653 	int vcpu = kvm->trace_vcpu;
654 
655 	pr_info("\n\n");
656 	print_vcpu_info(vcpu);
657 	pr_info("%20s ", kvm->events_ops->name);
658 	pr_info("%10s ", "Samples");
659 	pr_info("%9s ", "Samples%");
660 
661 	pr_info("%9s ", "Time%");
662 	pr_info("%16s ", "Avg time");
663 	pr_info("\n\n");
664 
665 	while ((event = pop_from_result(&kvm->result))) {
666 		u64 ecount, etime;
667 
668 		ecount = get_event_count(event, vcpu);
669 		etime = get_event_time(event, vcpu);
670 
671 		kvm->events_ops->decode_key(kvm, &event->key, decode);
672 		pr_info("%20s ", decode);
673 		pr_info("%10llu ", (unsigned long long)ecount);
674 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
675 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
676 		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
677 			kvm_event_rel_stddev(vcpu, event));
678 		pr_info("\n");
679 	}
680 
681 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
682 		kvm->total_count, kvm->total_time / 1e3);
683 }
684 
685 static int process_sample_event(struct perf_tool *tool,
686 				union perf_event *event,
687 				struct perf_sample *sample,
688 				struct perf_evsel *evsel,
689 				struct machine *machine)
690 {
691 	struct thread *thread = machine__findnew_thread(machine, sample->tid);
692 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
693 						 tool);
694 
695 	if (thread == NULL) {
696 		pr_debug("problem processing %d event, skipping it.\n",
697 			event->header.type);
698 		return -1;
699 	}
700 
701 	if (!handle_kvm_event(kvm, thread, evsel, sample))
702 		return -1;
703 
704 	return 0;
705 }
706 
707 static int get_cpu_isa(struct perf_session *session)
708 {
709 	char *cpuid = session->header.env.cpuid;
710 	int isa;
711 
712 	if (strstr(cpuid, "Intel"))
713 		isa = 1;
714 	else if (strstr(cpuid, "AMD"))
715 		isa = 0;
716 	else {
717 		pr_err("CPU %s is not supported.\n", cpuid);
718 		isa = -ENOTSUP;
719 	}
720 
721 	return isa;
722 }
723 
724 static int read_events(struct perf_kvm_stat *kvm)
725 {
726 	int ret;
727 
728 	struct perf_tool eops = {
729 		.sample			= process_sample_event,
730 		.comm			= perf_event__process_comm,
731 		.ordered_samples	= true,
732 	};
733 
734 	kvm->tool = eops;
735 	kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false,
736 					 &kvm->tool);
737 	if (!kvm->session) {
738 		pr_err("Initializing perf session failed\n");
739 		return -EINVAL;
740 	}
741 
742 	if (!perf_session__has_traces(kvm->session, "kvm record"))
743 		return -EINVAL;
744 
745 	/*
746 	 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
747 	 * traced in the old kernel.
748 	 */
749 	ret = get_cpu_isa(kvm->session);
750 
751 	if (ret < 0)
752 		return ret;
753 
754 	if (ret == 1) {
755 		kvm->exit_reasons = vmx_exit_reasons;
756 		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
757 		kvm->exit_reasons_isa = "VMX";
758 	}
759 
760 	return perf_session__process_events(kvm->session, &kvm->tool);
761 }
762 
763 static bool verify_vcpu(int vcpu)
764 {
765 	if (vcpu != -1 && vcpu < 0) {
766 		pr_err("Invalid vcpu:%d.\n", vcpu);
767 		return false;
768 	}
769 
770 	return true;
771 }
772 
773 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
774 {
775 	int ret = -EINVAL;
776 	int vcpu = kvm->trace_vcpu;
777 
778 	if (!verify_vcpu(vcpu))
779 		goto exit;
780 
781 	if (!select_key(kvm))
782 		goto exit;
783 
784 	if (!register_kvm_events_ops(kvm))
785 		goto exit;
786 
787 	init_kvm_event_record(kvm);
788 	setup_pager();
789 
790 	ret = read_events(kvm);
791 	if (ret)
792 		goto exit;
793 
794 	sort_result(kvm);
795 	print_result(kvm);
796 
797 exit:
798 	return ret;
799 }
800 
801 static const char * const record_args[] = {
802 	"record",
803 	"-R",
804 	"-f",
805 	"-m", "1024",
806 	"-c", "1",
807 	"-e", "kvm:kvm_entry",
808 	"-e", "kvm:kvm_exit",
809 	"-e", "kvm:kvm_mmio",
810 	"-e", "kvm:kvm_pio",
811 };
812 
813 #define STRDUP_FAIL_EXIT(s)		\
814 	({	char *_p;		\
815 	_p = strdup(s);		\
816 		if (!_p)		\
817 			return -ENOMEM;	\
818 		_p;			\
819 	})
820 
821 static int
822 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
823 {
824 	unsigned int rec_argc, i, j;
825 	const char **rec_argv;
826 
827 	rec_argc = ARRAY_SIZE(record_args) + argc + 2;
828 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
829 
830 	if (rec_argv == NULL)
831 		return -ENOMEM;
832 
833 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
834 		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
835 
836 	rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
837 	rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
838 
839 	for (j = 1; j < (unsigned int)argc; j++, i++)
840 		rec_argv[i] = argv[j];
841 
842 	return cmd_record(i, rec_argv, NULL);
843 }
844 
845 static int
846 kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
847 {
848 	const struct option kvm_events_report_options[] = {
849 		OPT_STRING(0, "event", &kvm->report_event, "report event",
850 			    "event for reporting: vmexit, mmio, ioport"),
851 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
852 			    "vcpu id to report"),
853 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
854 			    "key for sorting: sample(sort by samples number)"
855 			    " time (sort by avg time)"),
856 		OPT_END()
857 	};
858 
859 	const char * const kvm_events_report_usage[] = {
860 		"perf kvm stat report [<options>]",
861 		NULL
862 	};
863 
864 	symbol__init();
865 
866 	if (argc) {
867 		argc = parse_options(argc, argv,
868 				     kvm_events_report_options,
869 				     kvm_events_report_usage, 0);
870 		if (argc)
871 			usage_with_options(kvm_events_report_usage,
872 					   kvm_events_report_options);
873 	}
874 
875 	return kvm_events_report_vcpu(kvm);
876 }
877 
878 static void print_kvm_stat_usage(void)
879 {
880 	printf("Usage: perf kvm stat <command>\n\n");
881 
882 	printf("# Available commands:\n");
883 	printf("\trecord: record kvm events\n");
884 	printf("\treport: report statistical data of kvm events\n");
885 
886 	printf("\nOtherwise, it is the alias of 'perf stat':\n");
887 }
888 
889 static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
890 {
891 	struct perf_kvm_stat kvm = {
892 		.file_name = file_name,
893 
894 		.trace_vcpu	= -1,
895 		.report_event	= "vmexit",
896 		.sort_key	= "sample",
897 
898 		.exit_reasons = svm_exit_reasons,
899 		.exit_reasons_size = ARRAY_SIZE(svm_exit_reasons),
900 		.exit_reasons_isa = "SVM",
901 	};
902 
903 	if (argc == 1) {
904 		print_kvm_stat_usage();
905 		goto perf_stat;
906 	}
907 
908 	if (!strncmp(argv[1], "rec", 3))
909 		return kvm_events_record(&kvm, argc - 1, argv + 1);
910 
911 	if (!strncmp(argv[1], "rep", 3))
912 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
913 
914 perf_stat:
915 	return cmd_stat(argc, argv, NULL);
916 }
917 #endif
918 
919 static int __cmd_record(const char *file_name, int argc, const char **argv)
920 {
921 	int rec_argc, i = 0, j;
922 	const char **rec_argv;
923 
924 	rec_argc = argc + 2;
925 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
926 	rec_argv[i++] = strdup("record");
927 	rec_argv[i++] = strdup("-o");
928 	rec_argv[i++] = strdup(file_name);
929 	for (j = 1; j < argc; j++, i++)
930 		rec_argv[i] = argv[j];
931 
932 	BUG_ON(i != rec_argc);
933 
934 	return cmd_record(i, rec_argv, NULL);
935 }
936 
937 static int __cmd_report(const char *file_name, int argc, const char **argv)
938 {
939 	int rec_argc, i = 0, j;
940 	const char **rec_argv;
941 
942 	rec_argc = argc + 2;
943 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
944 	rec_argv[i++] = strdup("report");
945 	rec_argv[i++] = strdup("-i");
946 	rec_argv[i++] = strdup(file_name);
947 	for (j = 1; j < argc; j++, i++)
948 		rec_argv[i] = argv[j];
949 
950 	BUG_ON(i != rec_argc);
951 
952 	return cmd_report(i, rec_argv, NULL);
953 }
954 
955 static int
956 __cmd_buildid_list(const char *file_name, int argc, const char **argv)
957 {
958 	int rec_argc, i = 0, j;
959 	const char **rec_argv;
960 
961 	rec_argc = argc + 2;
962 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
963 	rec_argv[i++] = strdup("buildid-list");
964 	rec_argv[i++] = strdup("-i");
965 	rec_argv[i++] = strdup(file_name);
966 	for (j = 1; j < argc; j++, i++)
967 		rec_argv[i] = argv[j];
968 
969 	BUG_ON(i != rec_argc);
970 
971 	return cmd_buildid_list(i, rec_argv, NULL);
972 }
973 
974 int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
975 {
976 	const char *file_name;
977 
978 	const struct option kvm_options[] = {
979 		OPT_STRING('i', "input", &file_name, "file",
980 			   "Input file name"),
981 		OPT_STRING('o', "output", &file_name, "file",
982 			   "Output file name"),
983 		OPT_BOOLEAN(0, "guest", &perf_guest,
984 			    "Collect guest os data"),
985 		OPT_BOOLEAN(0, "host", &perf_host,
986 			    "Collect host os data"),
987 		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
988 			   "guest mount directory under which every guest os"
989 			   " instance has a subdir"),
990 		OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
991 			   "file", "file saving guest os vmlinux"),
992 		OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
993 			   "file", "file saving guest os /proc/kallsyms"),
994 		OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
995 			   "file", "file saving guest os /proc/modules"),
996 		OPT_END()
997 	};
998 
999 
1000 	const char * const kvm_usage[] = {
1001 		"perf kvm [<options>] {top|record|report|diff|buildid-list|stat}",
1002 		NULL
1003 	};
1004 
1005 	perf_host  = 0;
1006 	perf_guest = 1;
1007 
1008 	argc = parse_options(argc, argv, kvm_options, kvm_usage,
1009 			PARSE_OPT_STOP_AT_NON_OPTION);
1010 	if (!argc)
1011 		usage_with_options(kvm_usage, kvm_options);
1012 
1013 	if (!perf_host)
1014 		perf_guest = 1;
1015 
1016 	if (!file_name) {
1017 		if (perf_host && !perf_guest)
1018 			file_name = strdup("perf.data.host");
1019 		else if (!perf_host && perf_guest)
1020 			file_name = strdup("perf.data.guest");
1021 		else
1022 			file_name = strdup("perf.data.kvm");
1023 
1024 		if (!file_name) {
1025 			pr_err("Failed to allocate memory for filename\n");
1026 			return -ENOMEM;
1027 		}
1028 	}
1029 
1030 	if (!strncmp(argv[0], "rec", 3))
1031 		return __cmd_record(file_name, argc, argv);
1032 	else if (!strncmp(argv[0], "rep", 3))
1033 		return __cmd_report(file_name, argc, argv);
1034 	else if (!strncmp(argv[0], "diff", 4))
1035 		return cmd_diff(argc, argv, NULL);
1036 	else if (!strncmp(argv[0], "top", 3))
1037 		return cmd_top(argc, argv, NULL);
1038 	else if (!strncmp(argv[0], "buildid-list", 12))
1039 		return __cmd_buildid_list(file_name, argc, argv);
1040 #if defined(__i386__) || defined(__x86_64__)
1041 	else if (!strncmp(argv[0], "stat", 4))
1042 		return kvm_cmd_stat(file_name, argc, argv);
1043 #endif
1044 	else
1045 		usage_with_options(kvm_usage, kvm_options);
1046 
1047 	return 0;
1048 }
1049