xref: /openbmc/linux/tools/testing/selftests/bpf/veristat.c (revision fbe605ab157b174385b3f19ce33928d3548a9b09)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <string.h>
6 #include <stdlib.h>
7 #include <linux/compiler.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <libelf.h>
19 #include <gelf.h>
20 
21 enum stat_id {
22 	VERDICT,
23 	DURATION,
24 	TOTAL_INSNS,
25 	TOTAL_STATES,
26 	PEAK_STATES,
27 	MAX_STATES_PER_INSN,
28 	MARK_READ_MAX_LEN,
29 
30 	FILE_NAME,
31 	PROG_NAME,
32 
33 	ALL_STATS_CNT,
34 	NUM_STATS_CNT = FILE_NAME - VERDICT,
35 };
36 
37 struct verif_stats {
38 	char *file_name;
39 	char *prog_name;
40 
41 	long stats[NUM_STATS_CNT];
42 };
43 
44 struct stat_specs {
45 	int spec_cnt;
46 	enum stat_id ids[ALL_STATS_CNT];
47 	bool asc[ALL_STATS_CNT];
48 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
49 };
50 
51 enum resfmt {
52 	RESFMT_TABLE,
53 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
54 	RESFMT_CSV,
55 };
56 
57 struct filter {
58 	char *file_glob;
59 	char *prog_glob;
60 };
61 
62 static struct env {
63 	char **filenames;
64 	int filename_cnt;
65 	bool verbose;
66 	bool quiet;
67 	int log_level;
68 	enum resfmt out_fmt;
69 	bool comparison_mode;
70 
71 	struct verif_stats *prog_stats;
72 	int prog_stat_cnt;
73 
74 	/* baseline_stats is allocated and used only in comparsion mode */
75 	struct verif_stats *baseline_stats;
76 	int baseline_stat_cnt;
77 
78 	struct stat_specs output_spec;
79 	struct stat_specs sort_spec;
80 
81 	struct filter *allow_filters;
82 	struct filter *deny_filters;
83 	int allow_filter_cnt;
84 	int deny_filter_cnt;
85 
86 	int files_processed;
87 	int files_skipped;
88 	int progs_processed;
89 	int progs_skipped;
90 } env;
91 
92 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
93 {
94 	if (!env.verbose)
95 		return 0;
96 	if (level == LIBBPF_DEBUG /* && !env.verbose */)
97 		return 0;
98 	return vfprintf(stderr, format, args);
99 }
100 
101 const char *argp_program_version = "veristat";
102 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
103 const char argp_program_doc[] =
104 "veristat    BPF verifier stats collection and comparison tool.\n"
105 "\n"
106 "USAGE: veristat <obj-file> [<obj-file>...]\n"
107 "   OR: veristat -C <baseline.csv> <comparison.csv>\n";
108 
109 static const struct argp_option opts[] = {
110 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
111 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
112 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
113 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
114 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
115 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
116 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
117 	{ "compare", 'C', NULL, 0, "Comparison mode" },
118 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
119 	{},
120 };
121 
122 static int parse_stats(const char *stats_str, struct stat_specs *specs);
123 static int append_filter(struct filter **filters, int *cnt, const char *str);
124 static int append_filter_file(const char *path);
125 
126 static error_t parse_arg(int key, char *arg, struct argp_state *state)
127 {
128 	void *tmp;
129 	int err;
130 
131 	switch (key) {
132 	case 'h':
133 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
134 		break;
135 	case 'v':
136 		env.verbose = true;
137 		break;
138 	case 'q':
139 		env.quiet = true;
140 		break;
141 	case 'e':
142 		err = parse_stats(arg, &env.output_spec);
143 		if (err)
144 			return err;
145 		break;
146 	case 's':
147 		err = parse_stats(arg, &env.sort_spec);
148 		if (err)
149 			return err;
150 		break;
151 	case 'o':
152 		if (strcmp(arg, "table") == 0) {
153 			env.out_fmt = RESFMT_TABLE;
154 		} else if (strcmp(arg, "csv") == 0) {
155 			env.out_fmt = RESFMT_CSV;
156 		} else {
157 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
158 			return -EINVAL;
159 		}
160 		break;
161 	case 'l':
162 		errno = 0;
163 		env.log_level = strtol(arg, NULL, 10);
164 		if (errno) {
165 			fprintf(stderr, "invalid log level: %s\n", arg);
166 			argp_usage(state);
167 		}
168 		break;
169 	case 'C':
170 		env.comparison_mode = true;
171 		break;
172 	case 'f':
173 		if (arg[0] == '@')
174 			err = append_filter_file(arg + 1);
175 		else if (arg[0] == '!')
176 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
177 		else
178 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
179 		if (err) {
180 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
181 			return err;
182 		}
183 		break;
184 	case ARGP_KEY_ARG:
185 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
186 		if (!tmp)
187 			return -ENOMEM;
188 		env.filenames = tmp;
189 		env.filenames[env.filename_cnt] = strdup(arg);
190 		if (!env.filenames[env.filename_cnt])
191 			return -ENOMEM;
192 		env.filename_cnt++;
193 		break;
194 	default:
195 		return ARGP_ERR_UNKNOWN;
196 	}
197 	return 0;
198 }
199 
200 static const struct argp argp = {
201 	.options = opts,
202 	.parser = parse_arg,
203 	.doc = argp_program_doc,
204 };
205 
206 
207 /* Adapted from perf/util/string.c */
208 static bool glob_matches(const char *str, const char *pat)
209 {
210 	while (*str && *pat && *pat != '*') {
211 		if (*str != *pat)
212 			return false;
213 		str++;
214 		pat++;
215 	}
216 	/* Check wild card */
217 	if (*pat == '*') {
218 		while (*pat == '*')
219 			pat++;
220 		if (!*pat) /* Tail wild card matches all */
221 			return true;
222 		while (*str)
223 			if (glob_matches(str++, pat))
224 				return true;
225 	}
226 	return !*str && !*pat;
227 }
228 
229 static bool should_process_file(const char *filename)
230 {
231 	int i;
232 
233 	if (env.deny_filter_cnt > 0) {
234 		for (i = 0; i < env.deny_filter_cnt; i++) {
235 			if (glob_matches(filename, env.deny_filters[i].file_glob))
236 				return false;
237 		}
238 	}
239 
240 	if (env.allow_filter_cnt == 0)
241 		return true;
242 
243 	for (i = 0; i < env.allow_filter_cnt; i++) {
244 		if (glob_matches(filename, env.allow_filters[i].file_glob))
245 			return true;
246 	}
247 
248 	return false;
249 }
250 
251 static bool is_bpf_obj_file(const char *path) {
252 	Elf64_Ehdr *ehdr;
253 	int fd, err = -EINVAL;
254 	Elf *elf = NULL;
255 
256 	fd = open(path, O_RDONLY | O_CLOEXEC);
257 	if (fd < 0)
258 		return true; /* we'll fail later and propagate error */
259 
260 	/* ensure libelf is initialized */
261 	(void)elf_version(EV_CURRENT);
262 
263 	elf = elf_begin(fd, ELF_C_READ, NULL);
264 	if (!elf)
265 		goto cleanup;
266 
267 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
268 		goto cleanup;
269 
270 	ehdr = elf64_getehdr(elf);
271 	/* Old LLVM set e_machine to EM_NONE */
272 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
273 		goto cleanup;
274 
275 	err = 0;
276 cleanup:
277 	if (elf)
278 		elf_end(elf);
279 	close(fd);
280 	return err == 0;
281 }
282 
283 static bool should_process_prog(const char *path, const char *prog_name)
284 {
285 	const char *filename = basename(path);
286 	int i;
287 
288 	if (env.deny_filter_cnt > 0) {
289 		for (i = 0; i < env.deny_filter_cnt; i++) {
290 			if (glob_matches(filename, env.deny_filters[i].file_glob))
291 				return false;
292 			if (!env.deny_filters[i].prog_glob)
293 				continue;
294 			if (glob_matches(prog_name, env.deny_filters[i].prog_glob))
295 				return false;
296 		}
297 	}
298 
299 	if (env.allow_filter_cnt == 0)
300 		return true;
301 
302 	for (i = 0; i < env.allow_filter_cnt; i++) {
303 		if (!glob_matches(filename, env.allow_filters[i].file_glob))
304 			continue;
305 		/* if filter specifies only filename glob part, it implicitly
306 		 * allows all progs within that file
307 		 */
308 		if (!env.allow_filters[i].prog_glob)
309 			return true;
310 		if (glob_matches(prog_name, env.allow_filters[i].prog_glob))
311 			return true;
312 	}
313 
314 	return false;
315 }
316 
317 static int append_filter(struct filter **filters, int *cnt, const char *str)
318 {
319 	struct filter *f;
320 	void *tmp;
321 	const char *p;
322 
323 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
324 	if (!tmp)
325 		return -ENOMEM;
326 	*filters = tmp;
327 
328 	f = &(*filters)[*cnt];
329 	f->file_glob = f->prog_glob = NULL;
330 
331 	/* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */
332 	p = strchr(str, '/');
333 	if (!p) {
334 		f->file_glob = strdup(str);
335 		if (!f->file_glob)
336 			return -ENOMEM;
337 	} else {
338 		f->file_glob = strndup(str, p - str);
339 		f->prog_glob = strdup(p + 1);
340 		if (!f->file_glob || !f->prog_glob) {
341 			free(f->file_glob);
342 			free(f->prog_glob);
343 			f->file_glob = f->prog_glob = NULL;
344 			return -ENOMEM;
345 		}
346 	}
347 
348 	*cnt = *cnt + 1;
349 	return 0;
350 }
351 
352 static int append_filter_file(const char *path)
353 {
354 	char buf[1024];
355 	FILE *f;
356 	int err = 0;
357 
358 	f = fopen(path, "r");
359 	if (!f) {
360 		err = -errno;
361 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
362 		return err;
363 	}
364 
365 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
366 		/* lines starting with # are comments, skip them */
367 		if (buf[0] == '\0' || buf[0] == '#')
368 			continue;
369 		/* lines starting with ! are negative match filters */
370 		if (buf[0] == '!')
371 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
372 		else
373 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
374 		if (err)
375 			goto cleanup;
376 	}
377 
378 cleanup:
379 	fclose(f);
380 	return err;
381 }
382 
383 static const struct stat_specs default_output_spec = {
384 	.spec_cnt = 7,
385 	.ids = {
386 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
387 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
388 	},
389 };
390 
391 static const struct stat_specs default_sort_spec = {
392 	.spec_cnt = 2,
393 	.ids = {
394 		FILE_NAME, PROG_NAME,
395 	},
396 	.asc = { true, true, },
397 };
398 
399 static struct stat_def {
400 	const char *header;
401 	const char *names[4];
402 	bool asc_by_default;
403 } stat_defs[] = {
404 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ },
405 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ },
406 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ },
407 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
408 	[TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, },
409 	[TOTAL_STATES] = { "Total states", {"total_states", "states"}, },
410 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
411 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
412 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
413 };
414 
415 static int parse_stat(const char *stat_name, struct stat_specs *specs)
416 {
417 	int id, i;
418 
419 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
420 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
421 		return -E2BIG;
422 	}
423 
424 	for (id = 0; id < ARRAY_SIZE(stat_defs); id++) {
425 		struct stat_def *def = &stat_defs[id];
426 
427 		for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) {
428 			if (!def->names[i] || strcmp(def->names[i], stat_name) != 0)
429 				continue;
430 
431 			specs->ids[specs->spec_cnt] = id;
432 			specs->asc[specs->spec_cnt] = def->asc_by_default;
433 			specs->spec_cnt++;
434 
435 			return 0;
436 		}
437 	}
438 
439 	fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
440 	return -ESRCH;
441 }
442 
443 static int parse_stats(const char *stats_str, struct stat_specs *specs)
444 {
445 	char *input, *state = NULL, *next;
446 	int err;
447 
448 	input = strdup(stats_str);
449 	if (!input)
450 		return -ENOMEM;
451 
452 	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
453 		err = parse_stat(next, specs);
454 		if (err)
455 			return err;
456 	}
457 
458 	return 0;
459 }
460 
461 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
462 {
463 	int i;
464 
465 	if (!stats)
466 		return;
467 
468 	for (i = 0; i < stat_cnt; i++) {
469 		free(stats[i].file_name);
470 		free(stats[i].prog_name);
471 	}
472 	free(stats);
473 }
474 
475 static char verif_log_buf[64 * 1024];
476 
477 #define MAX_PARSED_LOG_LINES 100
478 
479 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
480 {
481 	const char *cur;
482 	int pos, lines;
483 
484 	buf[buf_sz - 1] = '\0';
485 
486 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
487 		/* find previous endline or otherwise take the start of log buf */
488 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
489 		}
490 		/* next time start from end of previous line (or pos goes to <0) */
491 		pos--;
492 		/* if we found endline, point right after endline symbol;
493 		 * otherwise, stay at the beginning of log buf
494 		 */
495 		if (cur[0] == '\n')
496 			cur++;
497 
498 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
499 			continue;
500 		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
501 				&s->stats[TOTAL_INSNS],
502 				&s->stats[MAX_STATES_PER_INSN],
503 				&s->stats[TOTAL_STATES],
504 				&s->stats[PEAK_STATES],
505 				&s->stats[MARK_READ_MAX_LEN]))
506 			continue;
507 	}
508 
509 	return 0;
510 }
511 
512 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
513 {
514 	const char *prog_name = bpf_program__name(prog);
515 	size_t buf_sz = sizeof(verif_log_buf);
516 	char *buf = verif_log_buf;
517 	struct verif_stats *stats;
518 	int err = 0;
519 	void *tmp;
520 
521 	if (!should_process_prog(filename, bpf_program__name(prog))) {
522 		env.progs_skipped++;
523 		return 0;
524 	}
525 
526 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
527 	if (!tmp)
528 		return -ENOMEM;
529 	env.prog_stats = tmp;
530 	stats = &env.prog_stats[env.prog_stat_cnt++];
531 	memset(stats, 0, sizeof(*stats));
532 
533 	if (env.verbose) {
534 		buf_sz = 16 * 1024 * 1024;
535 		buf = malloc(buf_sz);
536 		if (!buf)
537 			return -ENOMEM;
538 		bpf_program__set_log_buf(prog, buf, buf_sz);
539 		bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
540 	} else {
541 		bpf_program__set_log_buf(prog, buf, buf_sz);
542 		bpf_program__set_log_level(prog, 4); /* only verifier stats */
543 	}
544 	verif_log_buf[0] = '\0';
545 
546 	err = bpf_object__load(obj);
547 	env.progs_processed++;
548 
549 	stats->file_name = strdup(basename(filename));
550 	stats->prog_name = strdup(bpf_program__name(prog));
551 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
552 	parse_verif_log(buf, buf_sz, stats);
553 
554 	if (env.verbose) {
555 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
556 		       filename, prog_name, stats->stats[DURATION],
557 		       err ? "failure" : "success", buf);
558 	}
559 
560 	if (verif_log_buf != buf)
561 		free(buf);
562 
563 	return 0;
564 };
565 
566 static int process_obj(const char *filename)
567 {
568 	struct bpf_object *obj = NULL, *tobj;
569 	struct bpf_program *prog, *tprog, *lprog;
570 	libbpf_print_fn_t old_libbpf_print_fn;
571 	LIBBPF_OPTS(bpf_object_open_opts, opts);
572 	int err = 0, prog_cnt = 0;
573 
574 	if (!should_process_file(basename(filename))) {
575 		if (env.verbose)
576 			printf("Skipping '%s' due to filters...\n", filename);
577 		env.files_skipped++;
578 		return 0;
579 	}
580 	if (!is_bpf_obj_file(filename)) {
581 		if (env.verbose)
582 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
583 		env.files_skipped++;
584 		return 0;
585 	}
586 
587 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
588 		printf("Processing '%s'...\n", basename(filename));
589 
590 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
591 	obj = bpf_object__open_file(filename, &opts);
592 	if (!obj) {
593 		/* if libbpf can't open BPF object file, it could be because
594 		 * that BPF object file is incomplete and has to be statically
595 		 * linked into a final BPF object file; instead of bailing
596 		 * out, report it into stderr, mark it as skipped, and
597 		 * proceeed
598 		 */
599 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
600 		env.files_skipped++;
601 		err = 0;
602 		goto cleanup;
603 	}
604 
605 	env.files_processed++;
606 
607 	bpf_object__for_each_program(prog, obj) {
608 		prog_cnt++;
609 	}
610 
611 	if (prog_cnt == 1) {
612 		prog = bpf_object__next_program(obj, NULL);
613 		bpf_program__set_autoload(prog, true);
614 		process_prog(filename, obj, prog);
615 		goto cleanup;
616 	}
617 
618 	bpf_object__for_each_program(prog, obj) {
619 		const char *prog_name = bpf_program__name(prog);
620 
621 		tobj = bpf_object__open_file(filename, &opts);
622 		if (!tobj) {
623 			err = -errno;
624 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
625 			goto cleanup;
626 		}
627 
628 		bpf_object__for_each_program(tprog, tobj) {
629 			const char *tprog_name = bpf_program__name(tprog);
630 
631 			if (strcmp(prog_name, tprog_name) == 0) {
632 				bpf_program__set_autoload(tprog, true);
633 				lprog = tprog;
634 			} else {
635 				bpf_program__set_autoload(tprog, false);
636 			}
637 		}
638 
639 		process_prog(filename, tobj, lprog);
640 		bpf_object__close(tobj);
641 	}
642 
643 cleanup:
644 	bpf_object__close(obj);
645 	libbpf_set_print(old_libbpf_print_fn);
646 	return err;
647 }
648 
649 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
650 		    enum stat_id id, bool asc)
651 {
652 	int cmp = 0;
653 
654 	switch (id) {
655 	case FILE_NAME:
656 		cmp = strcmp(s1->file_name, s2->file_name);
657 		break;
658 	case PROG_NAME:
659 		cmp = strcmp(s1->prog_name, s2->prog_name);
660 		break;
661 	case VERDICT:
662 	case DURATION:
663 	case TOTAL_INSNS:
664 	case TOTAL_STATES:
665 	case PEAK_STATES:
666 	case MAX_STATES_PER_INSN:
667 	case MARK_READ_MAX_LEN: {
668 		long v1 = s1->stats[id];
669 		long v2 = s2->stats[id];
670 
671 		if (v1 != v2)
672 			cmp = v1 < v2 ? -1 : 1;
673 		break;
674 	}
675 	default:
676 		fprintf(stderr, "Unrecognized stat #%d\n", id);
677 		exit(1);
678 	}
679 
680 	return asc ? cmp : -cmp;
681 }
682 
683 static int cmp_prog_stats(const void *v1, const void *v2)
684 {
685 	const struct verif_stats *s1 = v1, *s2 = v2;
686 	int i, cmp;
687 
688 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
689 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
690 		if (cmp != 0)
691 			return cmp;
692 	}
693 
694 	return 0;
695 }
696 
697 #define HEADER_CHAR '-'
698 #define COLUMN_SEP "  "
699 
700 static void output_header_underlines(void)
701 {
702 	int i, j, len;
703 
704 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
705 		len = env.output_spec.lens[i];
706 
707 		printf("%s", i == 0 ? "" : COLUMN_SEP);
708 		for (j = 0; j < len; j++)
709 			printf("%c", HEADER_CHAR);
710 	}
711 	printf("\n");
712 }
713 
714 static void output_headers(enum resfmt fmt)
715 {
716 	int i, len;
717 
718 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
719 		int id = env.output_spec.ids[i];
720 		int *max_len = &env.output_spec.lens[i];
721 
722 		switch (fmt) {
723 		case RESFMT_TABLE_CALCLEN:
724 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
725 			if (len > *max_len)
726 				*max_len = len;
727 			break;
728 		case RESFMT_TABLE:
729 			printf("%s%-*s", i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
730 			if (i == env.output_spec.spec_cnt - 1)
731 				printf("\n");
732 			break;
733 		case RESFMT_CSV:
734 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
735 			if (i == env.output_spec.spec_cnt - 1)
736 				printf("\n");
737 			break;
738 		}
739 	}
740 
741 	if (fmt == RESFMT_TABLE)
742 		output_header_underlines();
743 }
744 
745 static void prepare_value(const struct verif_stats *s, enum stat_id id,
746 			  const char **str, long *val)
747 {
748 	switch (id) {
749 	case FILE_NAME:
750 		*str = s->file_name;
751 		break;
752 	case PROG_NAME:
753 		*str = s->prog_name;
754 		break;
755 	case VERDICT:
756 		*str = s->stats[VERDICT] ? "success" : "failure";
757 		break;
758 	case DURATION:
759 	case TOTAL_INSNS:
760 	case TOTAL_STATES:
761 	case PEAK_STATES:
762 	case MAX_STATES_PER_INSN:
763 	case MARK_READ_MAX_LEN:
764 		*val = s->stats[id];
765 		break;
766 	default:
767 		fprintf(stderr, "Unrecognized stat #%d\n", id);
768 		exit(1);
769 	}
770 }
771 
772 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
773 {
774 	int i;
775 
776 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
777 		int id = env.output_spec.ids[i];
778 		int *max_len = &env.output_spec.lens[i], len;
779 		const char *str = NULL;
780 		long val = 0;
781 
782 		prepare_value(s, id, &str, &val);
783 
784 		switch (fmt) {
785 		case RESFMT_TABLE_CALCLEN:
786 			if (str)
787 				len = snprintf(NULL, 0, "%s", str);
788 			else
789 				len = snprintf(NULL, 0, "%ld", val);
790 			if (len > *max_len)
791 				*max_len = len;
792 			break;
793 		case RESFMT_TABLE:
794 			if (str)
795 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
796 			else
797 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
798 			if (i == env.output_spec.spec_cnt - 1)
799 				printf("\n");
800 			break;
801 		case RESFMT_CSV:
802 			if (str)
803 				printf("%s%s", i == 0 ? "" : ",", str);
804 			else
805 				printf("%s%ld", i == 0 ? "" : ",", val);
806 			if (i == env.output_spec.spec_cnt - 1)
807 				printf("\n");
808 			break;
809 		}
810 	}
811 
812 	if (last && fmt == RESFMT_TABLE) {
813 		output_header_underlines();
814 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
815 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
816 	}
817 }
818 
819 static int handle_verif_mode(void)
820 {
821 	int i, err;
822 
823 	if (env.filename_cnt == 0) {
824 		fprintf(stderr, "Please provide path to BPF object file!\n");
825 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
826 		return -EINVAL;
827 	}
828 
829 	for (i = 0; i < env.filename_cnt; i++) {
830 		err = process_obj(env.filenames[i]);
831 		if (err) {
832 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
833 			return err;
834 		}
835 	}
836 
837 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
838 
839 	if (env.out_fmt == RESFMT_TABLE) {
840 		/* calculate column widths */
841 		output_headers(RESFMT_TABLE_CALCLEN);
842 		for (i = 0; i < env.prog_stat_cnt; i++)
843 			output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false);
844 	}
845 
846 	/* actually output the table */
847 	output_headers(env.out_fmt);
848 	for (i = 0; i < env.prog_stat_cnt; i++) {
849 		output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1);
850 	}
851 
852 	return 0;
853 }
854 
855 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
856 {
857 	switch (id) {
858 	case FILE_NAME:
859 		st->file_name = strdup(str);
860 		if (!st->file_name)
861 			return -ENOMEM;
862 		break;
863 	case PROG_NAME:
864 		st->prog_name = strdup(str);
865 		if (!st->prog_name)
866 			return -ENOMEM;
867 		break;
868 	case VERDICT:
869 		if (strcmp(str, "success") == 0) {
870 			st->stats[VERDICT] = true;
871 		} else if (strcmp(str, "failure") == 0) {
872 			st->stats[VERDICT] = false;
873 		} else {
874 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
875 			return -EINVAL;
876 		}
877 		break;
878 	case DURATION:
879 	case TOTAL_INSNS:
880 	case TOTAL_STATES:
881 	case PEAK_STATES:
882 	case MAX_STATES_PER_INSN:
883 	case MARK_READ_MAX_LEN: {
884 		long val;
885 		int err, n;
886 
887 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
888 			err = -errno;
889 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
890 			return err;
891 		}
892 
893 		st->stats[id] = val;
894 		break;
895 	}
896 	default:
897 		fprintf(stderr, "Unrecognized stat #%d\n", id);
898 		return -EINVAL;
899 	}
900 	return 0;
901 }
902 
903 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
904 			   struct verif_stats **statsp, int *stat_cntp)
905 {
906 	char line[4096];
907 	FILE *f;
908 	int err = 0;
909 	bool header = true;
910 
911 	f = fopen(filename, "r");
912 	if (!f) {
913 		err = -errno;
914 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
915 		return err;
916 	}
917 
918 	*stat_cntp = 0;
919 
920 	while (fgets(line, sizeof(line), f)) {
921 		char *input = line, *state = NULL, *next;
922 		struct verif_stats *st = NULL;
923 		int col = 0;
924 
925 		if (!header) {
926 			void *tmp;
927 
928 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
929 			if (!tmp) {
930 				err = -ENOMEM;
931 				goto cleanup;
932 			}
933 			*statsp = tmp;
934 
935 			st = &(*statsp)[*stat_cntp];
936 			memset(st, 0, sizeof(*st));
937 
938 			*stat_cntp += 1;
939 		}
940 
941 		while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
942 			if (header) {
943 				/* for the first line, set up spec stats */
944 				err = parse_stat(next, specs);
945 				if (err)
946 					goto cleanup;
947 				continue;
948 			}
949 
950 			/* for all other lines, parse values based on spec */
951 			if (col >= specs->spec_cnt) {
952 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
953 					col, *stat_cntp, filename);
954 				err = -EINVAL;
955 				goto cleanup;
956 			}
957 			err = parse_stat_value(next, specs->ids[col], st);
958 			if (err)
959 				goto cleanup;
960 			col++;
961 		}
962 
963 		if (header) {
964 			header = false;
965 			continue;
966 		}
967 
968 		if (col < specs->spec_cnt) {
969 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
970 				*stat_cntp, filename);
971 			err = -EINVAL;
972 			goto cleanup;
973 		}
974 
975 		if (!st->file_name || !st->prog_name) {
976 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
977 				*stat_cntp, filename);
978 			err = -EINVAL;
979 			goto cleanup;
980 		}
981 
982 		/* in comparison mode we can only check filters after we
983 		 * parsed entire line; if row should be ignored we pretend we
984 		 * never parsed it
985 		 */
986 		if (!should_process_prog(st->file_name, st->prog_name)) {
987 			free(st->file_name);
988 			free(st->prog_name);
989 			*stat_cntp -= 1;
990 		}
991 	}
992 
993 	if (!feof(f)) {
994 		err = -errno;
995 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
996 	}
997 
998 cleanup:
999 	fclose(f);
1000 	return err;
1001 }
1002 
1003 /* empty/zero stats for mismatched rows */
1004 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1005 
1006 static bool is_key_stat(enum stat_id id)
1007 {
1008 	return id == FILE_NAME || id == PROG_NAME;
1009 }
1010 
1011 static void output_comp_header_underlines(void)
1012 {
1013 	int i, j, k;
1014 
1015 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1016 		int id = env.output_spec.ids[i];
1017 		int max_j = is_key_stat(id) ? 1 : 3;
1018 
1019 		for (j = 0; j < max_j; j++) {
1020 			int len = env.output_spec.lens[3 * i + j];
1021 
1022 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1023 
1024 			for (k = 0; k < len; k++)
1025 				printf("%c", HEADER_CHAR);
1026 		}
1027 	}
1028 	printf("\n");
1029 }
1030 
1031 static void output_comp_headers(enum resfmt fmt)
1032 {
1033 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1034 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1035 	int i, j, len;
1036 
1037 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1038 		int id = env.output_spec.ids[i];
1039 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1040 		int max_j = is_key_stat(id) ? 1 : 3;
1041 
1042 		for (j = 0; j < max_j; j++) {
1043 			int *max_len = &env.output_spec.lens[3 * i + j];
1044 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1045 			const char *sfx;
1046 
1047 			switch (fmt) {
1048 			case RESFMT_TABLE_CALCLEN:
1049 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1050 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1051 				if (len > *max_len)
1052 					*max_len = len;
1053 				break;
1054 			case RESFMT_TABLE:
1055 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1056 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1057 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1058 				if (last)
1059 					printf("\n");
1060 				break;
1061 			case RESFMT_CSV:
1062 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1063 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1064 				if (last)
1065 					printf("\n");
1066 				break;
1067 			}
1068 		}
1069 	}
1070 
1071 	if (fmt == RESFMT_TABLE)
1072 		output_comp_header_underlines();
1073 }
1074 
1075 static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp,
1076 			      enum resfmt fmt, bool last)
1077 {
1078 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1079 	int i;
1080 
1081 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1082 		int id = env.output_spec.ids[i], len;
1083 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1084 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1085 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1086 		const char *base_str = NULL, *comp_str = NULL;
1087 		long base_val = 0, comp_val = 0, diff_val = 0;
1088 
1089 		prepare_value(base, id, &base_str, &base_val);
1090 		prepare_value(comp, id, &comp_str, &comp_val);
1091 
1092 		/* normalize all the outputs to be in string buffers for simplicity */
1093 		if (is_key_stat(id)) {
1094 			/* key stats (file and program name) are always strings */
1095 			if (base != &fallback_stats)
1096 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1097 			else
1098 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1099 		} else if (base_str) {
1100 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1101 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1102 			if (strcmp(base_str, comp_str) == 0)
1103 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1104 			else
1105 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1106 		} else {
1107 			snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1108 			snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1109 
1110 			diff_val = comp_val - base_val;
1111 			if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) {
1112 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
1113 					 diff_val, comp_val < base_val ? -100.0 : 100.0);
1114 			} else {
1115 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
1116 					 diff_val, diff_val * 100.0 / base_val);
1117 			}
1118 		}
1119 
1120 		switch (fmt) {
1121 		case RESFMT_TABLE_CALCLEN:
1122 			len = strlen(base_buf);
1123 			if (len > *max_len_base)
1124 				*max_len_base = len;
1125 			if (!is_key_stat(id)) {
1126 				len = strlen(comp_buf);
1127 				if (len > *max_len_comp)
1128 					*max_len_comp = len;
1129 				len = strlen(diff_buf);
1130 				if (len > *max_len_diff)
1131 					*max_len_diff = len;
1132 			}
1133 			break;
1134 		case RESFMT_TABLE: {
1135 			/* string outputs are left-aligned, number outputs are right-aligned */
1136 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1137 
1138 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1139 			if (!is_key_stat(id)) {
1140 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1141 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1142 			}
1143 			if (i == env.output_spec.spec_cnt - 1)
1144 				printf("\n");
1145 			break;
1146 		}
1147 		case RESFMT_CSV:
1148 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1149 			if (!is_key_stat(id)) {
1150 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1151 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1152 			}
1153 			if (i == env.output_spec.spec_cnt - 1)
1154 				printf("\n");
1155 			break;
1156 		}
1157 	}
1158 
1159 	if (last && fmt == RESFMT_TABLE)
1160 		output_comp_header_underlines();
1161 }
1162 
1163 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1164 {
1165 	int r;
1166 
1167 	r = strcmp(base->file_name, comp->file_name);
1168 	if (r != 0)
1169 		return r;
1170 	return strcmp(base->prog_name, comp->prog_name);
1171 }
1172 
1173 static int handle_comparison_mode(void)
1174 {
1175 	struct stat_specs base_specs = {}, comp_specs = {};
1176 	enum resfmt cur_fmt;
1177 	int err, i, j;
1178 
1179 	if (env.filename_cnt != 2) {
1180 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n");
1181 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1182 		return -EINVAL;
1183 	}
1184 
1185 	err = parse_stats_csv(env.filenames[0], &base_specs,
1186 			      &env.baseline_stats, &env.baseline_stat_cnt);
1187 	if (err) {
1188 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1189 		return err;
1190 	}
1191 	err = parse_stats_csv(env.filenames[1], &comp_specs,
1192 			      &env.prog_stats, &env.prog_stat_cnt);
1193 	if (err) {
1194 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1195 		return err;
1196 	}
1197 
1198 	/* To keep it simple we validate that the set and order of stats in
1199 	 * both CSVs are exactly the same. This can be lifted with a bit more
1200 	 * pre-processing later.
1201 	 */
1202 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1203 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1204 			env.filenames[0], env.filenames[1],
1205 			base_specs.spec_cnt, comp_specs.spec_cnt);
1206 		return -EINVAL;
1207 	}
1208 	for (i = 0; i < base_specs.spec_cnt; i++) {
1209 		if (base_specs.ids[i] != comp_specs.ids[i]) {
1210 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1211 				env.filenames[0], env.filenames[1],
1212 				stat_defs[base_specs.ids[i]].names[0],
1213 				stat_defs[comp_specs.ids[i]].names[0]);
1214 			return -EINVAL;
1215 		}
1216 	}
1217 
1218 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1219 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1220 
1221 	/* for human-readable table output we need to do extra pass to
1222 	 * calculate column widths, so we substitute current output format
1223 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1224 	 * and do everything again.
1225 	 */
1226 	if (env.out_fmt == RESFMT_TABLE)
1227 		cur_fmt = RESFMT_TABLE_CALCLEN;
1228 	else
1229 		cur_fmt = env.out_fmt;
1230 
1231 one_more_time:
1232 	output_comp_headers(cur_fmt);
1233 
1234 	/* If baseline and comparison datasets have different subset of rows
1235 	 * (we match by 'object + prog' as a unique key) then assume
1236 	 * empty/missing/zero value for rows that are missing in the opposite
1237 	 * data set
1238 	 */
1239 	i = j = 0;
1240 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1241 		bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1);
1242 		const struct verif_stats *base, *comp;
1243 		int r;
1244 
1245 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1246 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1247 
1248 		if (!base->file_name || !base->prog_name) {
1249 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1250 				i, env.filenames[0]);
1251 			return -EINVAL;
1252 		}
1253 		if (!comp->file_name || !comp->prog_name) {
1254 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1255 				j, env.filenames[1]);
1256 			return -EINVAL;
1257 		}
1258 
1259 		r = cmp_stats_key(base, comp);
1260 		if (r == 0) {
1261 			output_comp_stats(base, comp, cur_fmt, last);
1262 			i++;
1263 			j++;
1264 		} else if (comp == &fallback_stats || r < 0) {
1265 			output_comp_stats(base, &fallback_stats, cur_fmt, last);
1266 			i++;
1267 		} else {
1268 			output_comp_stats(&fallback_stats, comp, cur_fmt, last);
1269 			j++;
1270 		}
1271 	}
1272 
1273 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1274 		cur_fmt = RESFMT_TABLE;
1275 		goto one_more_time; /* ... this time with feeling */
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 int main(int argc, char **argv)
1282 {
1283 	int err = 0, i;
1284 
1285 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
1286 		return 1;
1287 
1288 	if (env.verbose && env.quiet) {
1289 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n");
1290 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1291 		return 1;
1292 	}
1293 	if (env.verbose && env.log_level == 0)
1294 		env.log_level = 1;
1295 
1296 	if (env.output_spec.spec_cnt == 0)
1297 		env.output_spec = default_output_spec;
1298 	if (env.sort_spec.spec_cnt == 0)
1299 		env.sort_spec = default_sort_spec;
1300 
1301 	if (env.comparison_mode)
1302 		err = handle_comparison_mode();
1303 	else
1304 		err = handle_verif_mode();
1305 
1306 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
1307 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
1308 	for (i = 0; i < env.filename_cnt; i++)
1309 		free(env.filenames[i]);
1310 	free(env.filenames);
1311 	for (i = 0; i < env.allow_filter_cnt; i++) {
1312 		free(env.allow_filters[i].file_glob);
1313 		free(env.allow_filters[i].prog_glob);
1314 	}
1315 	free(env.allow_filters);
1316 	for (i = 0; i < env.deny_filter_cnt; i++) {
1317 		free(env.deny_filters[i].file_glob);
1318 		free(env.deny_filters[i].prog_glob);
1319 	}
1320 	free(env.deny_filters);
1321 	return -err;
1322 }
1323