1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <libgen.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <sched.h>
9 #include <pthread.h>
10 #include <dirent.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 #include <sys/sysinfo.h>
16 #include <sys/stat.h>
17 #include <bpf/libbpf.h>
18 #include <bpf/btf.h>
19 #include <libelf.h>
20 #include <gelf.h>
21 #include <float.h>
22 #include <math.h>
23 
24 #ifndef ARRAY_SIZE
25 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
26 #endif
27 
28 enum stat_id {
29 	VERDICT,
30 	DURATION,
31 	TOTAL_INSNS,
32 	TOTAL_STATES,
33 	PEAK_STATES,
34 	MAX_STATES_PER_INSN,
35 	MARK_READ_MAX_LEN,
36 
37 	FILE_NAME,
38 	PROG_NAME,
39 
40 	ALL_STATS_CNT,
41 	NUM_STATS_CNT = FILE_NAME - VERDICT,
42 };
43 
44 /* In comparison mode each stat can specify up to four different values:
45  *   - A side value;
46  *   - B side value;
47  *   - absolute diff value;
48  *   - relative (percentage) diff value.
49  *
50  * When specifying stat specs in comparison mode, user can use one of the
51  * following variant suffixes to specify which exact variant should be used for
52  * ordering or filtering:
53  *   - `_a` for A side value;
54  *   - `_b` for B side value;
55  *   - `_diff` for absolute diff value;
56  *   - `_pct` for relative (percentage) diff value.
57  *
58  * If no variant suffix is provided, then `_b` (control data) is assumed.
59  *
60  * As an example, let's say instructions stat has the following output:
61  *
62  * Insns (A)  Insns (B)  Insns   (DIFF)
63  * ---------  ---------  --------------
64  * 21547      20920       -627 (-2.91%)
65  *
66  * Then:
67  *   - 21547 is A side value (insns_a);
68  *   - 20920 is B side value (insns_b);
69  *   - -627 is absolute diff value (insns_diff);
70  *   - -2.91% is relative diff value (insns_pct).
71  *
72  * For verdict there is no verdict_pct variant.
73  * For file and program name, _a and _b variants are equivalent and there are
74  * no _diff or _pct variants.
75  */
76 enum stat_variant {
77 	VARIANT_A,
78 	VARIANT_B,
79 	VARIANT_DIFF,
80 	VARIANT_PCT,
81 };
82 
83 struct verif_stats {
84 	char *file_name;
85 	char *prog_name;
86 
87 	long stats[NUM_STATS_CNT];
88 };
89 
90 /* joined comparison mode stats */
91 struct verif_stats_join {
92 	char *file_name;
93 	char *prog_name;
94 
95 	const struct verif_stats *stats_a;
96 	const struct verif_stats *stats_b;
97 };
98 
99 struct stat_specs {
100 	int spec_cnt;
101 	enum stat_id ids[ALL_STATS_CNT];
102 	enum stat_variant variants[ALL_STATS_CNT];
103 	bool asc[ALL_STATS_CNT];
104 	bool abs[ALL_STATS_CNT];
105 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
106 };
107 
108 enum resfmt {
109 	RESFMT_TABLE,
110 	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
111 	RESFMT_CSV,
112 };
113 
114 enum filter_kind {
115 	FILTER_NAME,
116 	FILTER_STAT,
117 };
118 
119 enum operator_kind {
120 	OP_EQ,		/* == or = */
121 	OP_NEQ,		/* != or <> */
122 	OP_LT,		/* < */
123 	OP_LE,		/* <= */
124 	OP_GT,		/* > */
125 	OP_GE,		/* >= */
126 };
127 
128 struct filter {
129 	enum filter_kind kind;
130 	/* FILTER_NAME */
131 	char *any_glob;
132 	char *file_glob;
133 	char *prog_glob;
134 	/* FILTER_STAT */
135 	enum operator_kind op;
136 	int stat_id;
137 	enum stat_variant stat_var;
138 	long value;
139 	bool abs;
140 };
141 
142 static struct env {
143 	char **filenames;
144 	int filename_cnt;
145 	bool verbose;
146 	bool debug;
147 	bool quiet;
148 	bool force_checkpoints;
149 	bool force_reg_invariants;
150 	enum resfmt out_fmt;
151 	bool show_version;
152 	bool comparison_mode;
153 	bool replay_mode;
154 	int top_n;
155 
156 	int log_level;
157 	int log_size;
158 	bool log_fixed;
159 
160 	struct verif_stats *prog_stats;
161 	int prog_stat_cnt;
162 
163 	/* baseline_stats is allocated and used only in comparison mode */
164 	struct verif_stats *baseline_stats;
165 	int baseline_stat_cnt;
166 
167 	struct verif_stats_join *join_stats;
168 	int join_stat_cnt;
169 
170 	struct stat_specs output_spec;
171 	struct stat_specs sort_spec;
172 
173 	struct filter *allow_filters;
174 	struct filter *deny_filters;
175 	int allow_filter_cnt;
176 	int deny_filter_cnt;
177 
178 	int files_processed;
179 	int files_skipped;
180 	int progs_processed;
181 	int progs_skipped;
182 } env;
183 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)184 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
185 {
186 	if (!env.verbose)
187 		return 0;
188 	if (level == LIBBPF_DEBUG  && !env.debug)
189 		return 0;
190 	return vfprintf(stderr, format, args);
191 }
192 
193 #ifndef VERISTAT_VERSION
194 #define VERISTAT_VERSION "<kernel>"
195 #endif
196 
197 const char *argp_program_version = "veristat v" VERISTAT_VERSION;
198 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
199 const char argp_program_doc[] =
200 "veristat    BPF verifier stats collection and comparison tool.\n"
201 "\n"
202 "USAGE: veristat <obj-file> [<obj-file>...]\n"
203 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
204 "   OR: veristat -R <results.csv>\n";
205 
206 enum {
207 	OPT_LOG_FIXED = 1000,
208 	OPT_LOG_SIZE = 1001,
209 };
210 
211 static const struct argp_option opts[] = {
212 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
213 	{ "version", 'V', NULL, 0, "Print version" },
214 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
215 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
216 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
217 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
218 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
219 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
220 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
221 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
222 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
223 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
224 	{ "compare", 'C', NULL, 0, "Comparison mode" },
225 	{ "replay", 'R', NULL, 0, "Replay mode" },
226 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
227 	{ "test-states", 't', NULL, 0,
228 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
229 	{ "test-reg-invariants", 'r', NULL, 0,
230 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
231 	{},
232 };
233 
234 static int parse_stats(const char *stats_str, struct stat_specs *specs);
235 static int append_filter(struct filter **filters, int *cnt, const char *str);
236 static int append_filter_file(const char *path);
237 
parse_arg(int key,char * arg,struct argp_state * state)238 static error_t parse_arg(int key, char *arg, struct argp_state *state)
239 {
240 	void *tmp;
241 	int err;
242 
243 	switch (key) {
244 	case 'h':
245 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
246 		break;
247 	case 'V':
248 		env.show_version = true;
249 		break;
250 	case 'v':
251 		env.verbose = true;
252 		break;
253 	case 'd':
254 		env.debug = true;
255 		env.verbose = true;
256 		break;
257 	case 'q':
258 		env.quiet = true;
259 		break;
260 	case 'e':
261 		err = parse_stats(arg, &env.output_spec);
262 		if (err)
263 			return err;
264 		break;
265 	case 's':
266 		err = parse_stats(arg, &env.sort_spec);
267 		if (err)
268 			return err;
269 		break;
270 	case 'o':
271 		if (strcmp(arg, "table") == 0) {
272 			env.out_fmt = RESFMT_TABLE;
273 		} else if (strcmp(arg, "csv") == 0) {
274 			env.out_fmt = RESFMT_CSV;
275 		} else {
276 			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
277 			return -EINVAL;
278 		}
279 		break;
280 	case 'l':
281 		errno = 0;
282 		env.log_level = strtol(arg, NULL, 10);
283 		if (errno) {
284 			fprintf(stderr, "invalid log level: %s\n", arg);
285 			argp_usage(state);
286 		}
287 		break;
288 	case OPT_LOG_FIXED:
289 		env.log_fixed = true;
290 		break;
291 	case OPT_LOG_SIZE:
292 		errno = 0;
293 		env.log_size = strtol(arg, NULL, 10);
294 		if (errno) {
295 			fprintf(stderr, "invalid log size: %s\n", arg);
296 			argp_usage(state);
297 		}
298 		break;
299 	case 't':
300 		env.force_checkpoints = true;
301 		break;
302 	case 'r':
303 		env.force_reg_invariants = true;
304 		break;
305 	case 'n':
306 		errno = 0;
307 		env.top_n = strtol(arg, NULL, 10);
308 		if (errno) {
309 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
310 			argp_usage(state);
311 		}
312 	case 'C':
313 		env.comparison_mode = true;
314 		break;
315 	case 'R':
316 		env.replay_mode = true;
317 		break;
318 	case 'f':
319 		if (arg[0] == '@')
320 			err = append_filter_file(arg + 1);
321 		else if (arg[0] == '!')
322 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
323 		else
324 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
325 		if (err) {
326 			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
327 			return err;
328 		}
329 		break;
330 	case ARGP_KEY_ARG:
331 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
332 		if (!tmp)
333 			return -ENOMEM;
334 		env.filenames = tmp;
335 		env.filenames[env.filename_cnt] = strdup(arg);
336 		if (!env.filenames[env.filename_cnt])
337 			return -ENOMEM;
338 		env.filename_cnt++;
339 		break;
340 	default:
341 		return ARGP_ERR_UNKNOWN;
342 	}
343 	return 0;
344 }
345 
346 static const struct argp argp = {
347 	.options = opts,
348 	.parser = parse_arg,
349 	.doc = argp_program_doc,
350 };
351 
352 
353 /* Adapted from perf/util/string.c */
glob_matches(const char * str,const char * pat)354 static bool glob_matches(const char *str, const char *pat)
355 {
356 	while (*str && *pat && *pat != '*') {
357 		if (*str != *pat)
358 			return false;
359 		str++;
360 		pat++;
361 	}
362 	/* Check wild card */
363 	if (*pat == '*') {
364 		while (*pat == '*')
365 			pat++;
366 		if (!*pat) /* Tail wild card matches all */
367 			return true;
368 		while (*str)
369 			if (glob_matches(str++, pat))
370 				return true;
371 	}
372 	return !*str && !*pat;
373 }
374 
is_bpf_obj_file(const char * path)375 static bool is_bpf_obj_file(const char *path) {
376 	Elf64_Ehdr *ehdr;
377 	int fd, err = -EINVAL;
378 	Elf *elf = NULL;
379 
380 	fd = open(path, O_RDONLY | O_CLOEXEC);
381 	if (fd < 0)
382 		return true; /* we'll fail later and propagate error */
383 
384 	/* ensure libelf is initialized */
385 	(void)elf_version(EV_CURRENT);
386 
387 	elf = elf_begin(fd, ELF_C_READ, NULL);
388 	if (!elf)
389 		goto cleanup;
390 
391 	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
392 		goto cleanup;
393 
394 	ehdr = elf64_getehdr(elf);
395 	/* Old LLVM set e_machine to EM_NONE */
396 	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
397 		goto cleanup;
398 
399 	err = 0;
400 cleanup:
401 	if (elf)
402 		elf_end(elf);
403 	close(fd);
404 	return err == 0;
405 }
406 
should_process_file_prog(const char * filename,const char * prog_name)407 static bool should_process_file_prog(const char *filename, const char *prog_name)
408 {
409 	struct filter *f;
410 	int i, allow_cnt = 0;
411 
412 	for (i = 0; i < env.deny_filter_cnt; i++) {
413 		f = &env.deny_filters[i];
414 		if (f->kind != FILTER_NAME)
415 			continue;
416 
417 		if (f->any_glob && glob_matches(filename, f->any_glob))
418 			return false;
419 		if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
420 			return false;
421 		if (f->file_glob && glob_matches(filename, f->file_glob))
422 			return false;
423 		if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
424 			return false;
425 	}
426 
427 	for (i = 0; i < env.allow_filter_cnt; i++) {
428 		f = &env.allow_filters[i];
429 		if (f->kind != FILTER_NAME)
430 			continue;
431 
432 		allow_cnt++;
433 		if (f->any_glob) {
434 			if (glob_matches(filename, f->any_glob))
435 				return true;
436 			/* If we don't know program name yet, any_glob filter
437 			 * has to assume that current BPF object file might be
438 			 * relevant; we'll check again later on after opening
439 			 * BPF object file, at which point program name will
440 			 * be known finally.
441 			 */
442 			if (!prog_name || glob_matches(prog_name, f->any_glob))
443 				return true;
444 		} else {
445 			if (f->file_glob && !glob_matches(filename, f->file_glob))
446 				continue;
447 			if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
448 				continue;
449 			return true;
450 		}
451 	}
452 
453 	/* if there are no file/prog name allow filters, allow all progs,
454 	 * unless they are denied earlier explicitly
455 	 */
456 	return allow_cnt == 0;
457 }
458 
459 static struct {
460 	enum operator_kind op_kind;
461 	const char *op_str;
462 } operators[] = {
463 	/* Order of these definitions matter to avoid situations like '<'
464 	 * matching part of what is actually a '<>' operator. That is,
465 	 * substrings should go last.
466 	 */
467 	{ OP_EQ, "==" },
468 	{ OP_NEQ, "!=" },
469 	{ OP_NEQ, "<>" },
470 	{ OP_LE, "<=" },
471 	{ OP_LT, "<" },
472 	{ OP_GE, ">=" },
473 	{ OP_GT, ">" },
474 	{ OP_EQ, "=" },
475 };
476 
477 static bool parse_stat_id_var(const char *name, size_t len, int *id,
478 			      enum stat_variant *var, bool *is_abs);
479 
append_filter(struct filter ** filters,int * cnt,const char * str)480 static int append_filter(struct filter **filters, int *cnt, const char *str)
481 {
482 	struct filter *f;
483 	void *tmp;
484 	const char *p;
485 	int i;
486 
487 	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
488 	if (!tmp)
489 		return -ENOMEM;
490 	*filters = tmp;
491 
492 	f = &(*filters)[*cnt];
493 	memset(f, 0, sizeof(*f));
494 
495 	/* First, let's check if it's a stats filter of the following form:
496 	 * <stat><op><value, where:
497 	 *   - <stat> is one of supported numerical stats (verdict is also
498 	 *     considered numerical, failure == 0, success == 1);
499 	 *   - <op> is comparison operator (see `operators` definitions);
500 	 *   - <value> is an integer (or failure/success, or false/true as
501 	 *     special aliases for 0 and 1, respectively).
502 	 * If the form doesn't match what user provided, we assume file/prog
503 	 * glob filter.
504 	 */
505 	for (i = 0; i < ARRAY_SIZE(operators); i++) {
506 		enum stat_variant var;
507 		int id;
508 		long val;
509 		const char *end = str;
510 		const char *op_str;
511 		bool is_abs;
512 
513 		op_str = operators[i].op_str;
514 		p = strstr(str, op_str);
515 		if (!p)
516 			continue;
517 
518 		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
519 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
520 			return -EINVAL;
521 		}
522 		if (id >= FILE_NAME) {
523 			fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
524 			return -EINVAL;
525 		}
526 
527 		p += strlen(op_str);
528 
529 		if (strcasecmp(p, "true") == 0 ||
530 		    strcasecmp(p, "t") == 0 ||
531 		    strcasecmp(p, "success") == 0 ||
532 		    strcasecmp(p, "succ") == 0 ||
533 		    strcasecmp(p, "s") == 0 ||
534 		    strcasecmp(p, "match") == 0 ||
535 		    strcasecmp(p, "m") == 0) {
536 			val = 1;
537 		} else if (strcasecmp(p, "false") == 0 ||
538 			   strcasecmp(p, "f") == 0 ||
539 			   strcasecmp(p, "failure") == 0 ||
540 			   strcasecmp(p, "fail") == 0 ||
541 			   strcasecmp(p, "mismatch") == 0 ||
542 			   strcasecmp(p, "mis") == 0) {
543 			val = 0;
544 		} else {
545 			errno = 0;
546 			val = strtol(p, (char **)&end, 10);
547 			if (errno || end == p || *end != '\0' ) {
548 				fprintf(stderr, "Invalid integer value in '%s'!\n", str);
549 				return -EINVAL;
550 			}
551 		}
552 
553 		f->kind = FILTER_STAT;
554 		f->stat_id = id;
555 		f->stat_var = var;
556 		f->op = operators[i].op_kind;
557 		f->abs = true;
558 		f->value = val;
559 
560 		*cnt += 1;
561 		return 0;
562 	}
563 
564 	/* File/prog filter can be specified either as '<glob>' or
565 	 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
566 	 * both file and program names. This seems to be way more useful in
567 	 * practice. If user needs full control, they can use '/<prog-glob>'
568 	 * form to glob just program name, or '<file-glob>/' to glob only file
569 	 * name. But usually common <glob> seems to be the most useful and
570 	 * ergonomic way.
571 	 */
572 	f->kind = FILTER_NAME;
573 	p = strchr(str, '/');
574 	if (!p) {
575 		f->any_glob = strdup(str);
576 		if (!f->any_glob)
577 			return -ENOMEM;
578 	} else {
579 		if (str != p) {
580 			/* non-empty file glob */
581 			f->file_glob = strndup(str, p - str);
582 			if (!f->file_glob)
583 				return -ENOMEM;
584 		}
585 		if (strlen(p + 1) > 0) {
586 			/* non-empty prog glob */
587 			f->prog_glob = strdup(p + 1);
588 			if (!f->prog_glob) {
589 				free(f->file_glob);
590 				f->file_glob = NULL;
591 				return -ENOMEM;
592 			}
593 		}
594 	}
595 
596 	*cnt += 1;
597 	return 0;
598 }
599 
append_filter_file(const char * path)600 static int append_filter_file(const char *path)
601 {
602 	char buf[1024];
603 	FILE *f;
604 	int err = 0;
605 
606 	f = fopen(path, "r");
607 	if (!f) {
608 		err = -errno;
609 		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
610 		return err;
611 	}
612 
613 	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
614 		/* lines starting with # are comments, skip them */
615 		if (buf[0] == '\0' || buf[0] == '#')
616 			continue;
617 		/* lines starting with ! are negative match filters */
618 		if (buf[0] == '!')
619 			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
620 		else
621 			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
622 		if (err)
623 			goto cleanup;
624 	}
625 
626 cleanup:
627 	fclose(f);
628 	return err;
629 }
630 
631 static const struct stat_specs default_output_spec = {
632 	.spec_cnt = 7,
633 	.ids = {
634 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
635 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
636 	},
637 };
638 
639 static const struct stat_specs default_csv_output_spec = {
640 	.spec_cnt = 9,
641 	.ids = {
642 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
643 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
644 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
645 	},
646 };
647 
648 static const struct stat_specs default_sort_spec = {
649 	.spec_cnt = 2,
650 	.ids = {
651 		FILE_NAME, PROG_NAME,
652 	},
653 	.asc = { true, true, },
654 };
655 
656 /* sorting for comparison mode to join two data sets */
657 static const struct stat_specs join_sort_spec = {
658 	.spec_cnt = 2,
659 	.ids = {
660 		FILE_NAME, PROG_NAME,
661 	},
662 	.asc = { true, true, },
663 };
664 
665 static struct stat_def {
666 	const char *header;
667 	const char *names[4];
668 	bool asc_by_default;
669 	bool left_aligned;
670 } stat_defs[] = {
671 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
672 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
673 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
674 	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
675 	[TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
676 	[TOTAL_STATES] = { "States", {"total_states", "states"}, },
677 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
678 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
679 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
680 };
681 
parse_stat_id_var(const char * name,size_t len,int * id,enum stat_variant * var,bool * is_abs)682 static bool parse_stat_id_var(const char *name, size_t len, int *id,
683 			      enum stat_variant *var, bool *is_abs)
684 {
685 	static const char *var_sfxs[] = {
686 		[VARIANT_A] = "_a",
687 		[VARIANT_B] = "_b",
688 		[VARIANT_DIFF] = "_diff",
689 		[VARIANT_PCT] = "_pct",
690 	};
691 	int i, j, k;
692 
693 	/* |<stat>| means we take absolute value of given stat */
694 	*is_abs = false;
695 	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
696 		*is_abs = true;
697 		name += 1;
698 		len -= 2;
699 	}
700 
701 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
702 		struct stat_def *def = &stat_defs[i];
703 		size_t alias_len, sfx_len;
704 		const char *alias;
705 
706 		for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
707 			alias = def->names[j];
708 			if (!alias)
709 				continue;
710 
711 			alias_len = strlen(alias);
712 			if (strncmp(name, alias, alias_len) != 0)
713 				continue;
714 
715 			if (alias_len == len) {
716 				/* If no variant suffix is specified, we
717 				 * assume control group (just in case we are
718 				 * in comparison mode. Variant is ignored in
719 				 * non-comparison mode.
720 				 */
721 				*var = VARIANT_B;
722 				*id = i;
723 				return true;
724 			}
725 
726 			for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
727 				sfx_len = strlen(var_sfxs[k]);
728 				if (alias_len + sfx_len != len)
729 					continue;
730 
731 				if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
732 					*var = (enum stat_variant)k;
733 					*id = i;
734 					return true;
735 				}
736 			}
737 		}
738 	}
739 
740 	return false;
741 }
742 
is_asc_sym(char c)743 static bool is_asc_sym(char c)
744 {
745 	return c == '^';
746 }
747 
is_desc_sym(char c)748 static bool is_desc_sym(char c)
749 {
750 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
751 }
752 
parse_stat(const char * stat_name,struct stat_specs * specs)753 static int parse_stat(const char *stat_name, struct stat_specs *specs)
754 {
755 	int id;
756 	bool has_order = false, is_asc = false, is_abs = false;
757 	size_t len = strlen(stat_name);
758 	enum stat_variant var;
759 
760 	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
761 		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
762 		return -E2BIG;
763 	}
764 
765 	if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
766 		has_order = true;
767 		is_asc = is_asc_sym(stat_name[len - 1]);
768 		len -= 1;
769 	}
770 
771 	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
772 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
773 		return -ESRCH;
774 	}
775 
776 	specs->ids[specs->spec_cnt] = id;
777 	specs->variants[specs->spec_cnt] = var;
778 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
779 	specs->abs[specs->spec_cnt] = is_abs;
780 	specs->spec_cnt++;
781 
782 	return 0;
783 }
784 
parse_stats(const char * stats_str,struct stat_specs * specs)785 static int parse_stats(const char *stats_str, struct stat_specs *specs)
786 {
787 	char *input, *state = NULL, *next;
788 	int err, cnt = 0;
789 
790 	input = strdup(stats_str);
791 	if (!input)
792 		return -ENOMEM;
793 
794 	while ((next = strtok_r(cnt++ ? NULL : input, ",", &state))) {
795 		err = parse_stat(next, specs);
796 		if (err) {
797 			free(input);
798 			return err;
799 		}
800 	}
801 
802 	free(input);
803 	return 0;
804 }
805 
free_verif_stats(struct verif_stats * stats,size_t stat_cnt)806 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
807 {
808 	int i;
809 
810 	if (!stats)
811 		return;
812 
813 	for (i = 0; i < stat_cnt; i++) {
814 		free(stats[i].file_name);
815 		free(stats[i].prog_name);
816 	}
817 	free(stats);
818 }
819 
820 static char verif_log_buf[64 * 1024];
821 
822 #define MAX_PARSED_LOG_LINES 100
823 
parse_verif_log(char * const buf,size_t buf_sz,struct verif_stats * s)824 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
825 {
826 	const char *cur;
827 	int pos, lines;
828 
829 	buf[buf_sz - 1] = '\0';
830 
831 	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
832 		/* find previous endline or otherwise take the start of log buf */
833 		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
834 		}
835 		/* next time start from end of previous line (or pos goes to <0) */
836 		pos--;
837 		/* if we found endline, point right after endline symbol;
838 		 * otherwise, stay at the beginning of log buf
839 		 */
840 		if (cur[0] == '\n')
841 			cur++;
842 
843 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
844 			continue;
845 		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
846 				&s->stats[TOTAL_INSNS],
847 				&s->stats[MAX_STATES_PER_INSN],
848 				&s->stats[TOTAL_STATES],
849 				&s->stats[PEAK_STATES],
850 				&s->stats[MARK_READ_MAX_LEN]))
851 			continue;
852 	}
853 
854 	return 0;
855 }
856 
guess_prog_type_by_ctx_name(const char * ctx_name,enum bpf_prog_type * prog_type,enum bpf_attach_type * attach_type)857 static int guess_prog_type_by_ctx_name(const char *ctx_name,
858 				       enum bpf_prog_type *prog_type,
859 				       enum bpf_attach_type *attach_type)
860 {
861 	/* We need to guess program type based on its declared context type.
862 	 * This guess can't be perfect as many different program types might
863 	 * share the same context type.  So we can only hope to reasonably
864 	 * well guess this and get lucky.
865 	 *
866 	 * Just in case, we support both UAPI-side type names and
867 	 * kernel-internal names.
868 	 */
869 	static struct {
870 		const char *uapi_name;
871 		const char *kern_name;
872 		enum bpf_prog_type prog_type;
873 		enum bpf_attach_type attach_type;
874 	} ctx_map[] = {
875 		/* __sk_buff is most ambiguous, we assume TC program */
876 		{ "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
877 		{ "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
878 		{ "bpf_sock_addr", "bpf_sock_addr_kern",  BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
879 		{ "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
880 		{ "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
881 		{ "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
882 		{ "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
883 		{ "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
884 		{ "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
885 		{ "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
886 		{ "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
887 		/* tracing types with no expected attach type */
888 		{ "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
889 		{ "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
890 		/* raw_tp programs use u64[] from kernel side, we don't want
891 		 * to match on that, probably; so NULL for kern-side type
892 		 */
893 		{ "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
894 	};
895 	int i;
896 
897 	if (!ctx_name)
898 		return -EINVAL;
899 
900 	for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
901 		if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
902 		    (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
903 			*prog_type = ctx_map[i].prog_type;
904 			*attach_type = ctx_map[i].attach_type;
905 			return 0;
906 		}
907 	}
908 
909 	return -ESRCH;
910 }
911 
fixup_obj(struct bpf_object * obj,struct bpf_program * prog,const char * filename)912 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
913 {
914 	struct bpf_map *map;
915 
916 	bpf_object__for_each_map(map, obj) {
917 		/* disable pinning */
918 		bpf_map__set_pin_path(map, NULL);
919 
920 		/* fix up map size, if necessary */
921 		switch (bpf_map__type(map)) {
922 		case BPF_MAP_TYPE_SK_STORAGE:
923 		case BPF_MAP_TYPE_TASK_STORAGE:
924 		case BPF_MAP_TYPE_INODE_STORAGE:
925 		case BPF_MAP_TYPE_CGROUP_STORAGE:
926 			break;
927 		default:
928 			if (bpf_map__max_entries(map) == 0)
929 				bpf_map__set_max_entries(map, 1);
930 		}
931 	}
932 
933 	/* SEC(freplace) programs can't be loaded with veristat as is,
934 	 * but we can try guessing their target program's expected type by
935 	 * looking at the type of program's first argument and substituting
936 	 * corresponding program type
937 	 */
938 	if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
939 		const struct btf *btf = bpf_object__btf(obj);
940 		const char *prog_name = bpf_program__name(prog);
941 		enum bpf_prog_type prog_type;
942 		enum bpf_attach_type attach_type;
943 		const struct btf_type *t;
944 		const char *ctx_name;
945 		int id;
946 
947 		if (!btf)
948 			goto skip_freplace_fixup;
949 
950 		id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
951 		t = btf__type_by_id(btf, id);
952 		t = btf__type_by_id(btf, t->type);
953 		if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
954 			goto skip_freplace_fixup;
955 
956 		/* context argument is a pointer to a struct/typedef */
957 		t = btf__type_by_id(btf, btf_params(t)[0].type);
958 		while (t && btf_is_mod(t))
959 			t = btf__type_by_id(btf, t->type);
960 		if (!t || !btf_is_ptr(t))
961 			goto skip_freplace_fixup;
962 		t = btf__type_by_id(btf, t->type);
963 		while (t && btf_is_mod(t))
964 			t = btf__type_by_id(btf, t->type);
965 		if (!t)
966 			goto skip_freplace_fixup;
967 
968 		ctx_name = btf__name_by_offset(btf, t->name_off);
969 
970 		if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
971 			bpf_program__set_type(prog, prog_type);
972 			bpf_program__set_expected_attach_type(prog, attach_type);
973 
974 			if (!env.quiet) {
975 				printf("Using guessed program type '%s' for %s/%s...\n",
976 					libbpf_bpf_prog_type_str(prog_type),
977 					filename, prog_name);
978 			}
979 		} else {
980 			if (!env.quiet) {
981 				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
982 					ctx_name, filename, prog_name);
983 			}
984 		}
985 	}
986 skip_freplace_fixup:
987 	return;
988 }
989 
process_prog(const char * filename,struct bpf_object * obj,struct bpf_program * prog)990 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
991 {
992 	const char *base_filename = basename(strdupa(filename));
993 	const char *prog_name = bpf_program__name(prog);
994 	char *buf;
995 	int buf_sz, log_level;
996 	struct verif_stats *stats;
997 	int err = 0;
998 	void *tmp;
999 
1000 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
1001 		env.progs_skipped++;
1002 		return 0;
1003 	}
1004 
1005 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
1006 	if (!tmp)
1007 		return -ENOMEM;
1008 	env.prog_stats = tmp;
1009 	stats = &env.prog_stats[env.prog_stat_cnt++];
1010 	memset(stats, 0, sizeof(*stats));
1011 
1012 	if (env.verbose) {
1013 		buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
1014 		buf = malloc(buf_sz);
1015 		if (!buf)
1016 			return -ENOMEM;
1017 		/* ensure we always request stats */
1018 		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
1019 	} else {
1020 		buf = verif_log_buf;
1021 		buf_sz = sizeof(verif_log_buf);
1022 		/* request only verifier stats */
1023 		log_level = 4 | (env.log_fixed ? 8 : 0);
1024 	}
1025 	verif_log_buf[0] = '\0';
1026 
1027 	bpf_program__set_log_buf(prog, buf, buf_sz);
1028 	bpf_program__set_log_level(prog, log_level);
1029 
1030 	/* increase chances of successful BPF object loading */
1031 	fixup_obj(obj, prog, base_filename);
1032 
1033 	if (env.force_checkpoints)
1034 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
1035 	if (env.force_reg_invariants)
1036 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
1037 
1038 	err = bpf_object__load(obj);
1039 	env.progs_processed++;
1040 
1041 	stats->file_name = strdup(base_filename);
1042 	stats->prog_name = strdup(bpf_program__name(prog));
1043 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
1044 	parse_verif_log(buf, buf_sz, stats);
1045 
1046 	if (env.verbose) {
1047 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
1048 		       filename, prog_name, stats->stats[DURATION],
1049 		       err ? "failure" : "success", buf);
1050 	}
1051 
1052 	if (verif_log_buf != buf)
1053 		free(buf);
1054 
1055 	return 0;
1056 };
1057 
process_obj(const char * filename)1058 static int process_obj(const char *filename)
1059 {
1060 	const char *base_filename = basename(strdupa(filename));
1061 	struct bpf_object *obj = NULL, *tobj;
1062 	struct bpf_program *prog, *tprog, *lprog;
1063 	libbpf_print_fn_t old_libbpf_print_fn;
1064 	LIBBPF_OPTS(bpf_object_open_opts, opts);
1065 	int err = 0, prog_cnt = 0;
1066 
1067 	if (!should_process_file_prog(base_filename, NULL)) {
1068 		if (env.verbose)
1069 			printf("Skipping '%s' due to filters...\n", filename);
1070 		env.files_skipped++;
1071 		return 0;
1072 	}
1073 	if (!is_bpf_obj_file(filename)) {
1074 		if (env.verbose)
1075 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
1076 		env.files_skipped++;
1077 		return 0;
1078 	}
1079 
1080 	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
1081 		printf("Processing '%s'...\n", base_filename);
1082 
1083 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
1084 	obj = bpf_object__open_file(filename, &opts);
1085 	if (!obj) {
1086 		/* if libbpf can't open BPF object file, it could be because
1087 		 * that BPF object file is incomplete and has to be statically
1088 		 * linked into a final BPF object file; instead of bailing
1089 		 * out, report it into stderr, mark it as skipped, and
1090 		 * proceed
1091 		 */
1092 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
1093 		env.files_skipped++;
1094 		err = 0;
1095 		goto cleanup;
1096 	}
1097 
1098 	env.files_processed++;
1099 
1100 	bpf_object__for_each_program(prog, obj) {
1101 		prog_cnt++;
1102 	}
1103 
1104 	if (prog_cnt == 1) {
1105 		prog = bpf_object__next_program(obj, NULL);
1106 		bpf_program__set_autoload(prog, true);
1107 		process_prog(filename, obj, prog);
1108 		goto cleanup;
1109 	}
1110 
1111 	bpf_object__for_each_program(prog, obj) {
1112 		const char *prog_name = bpf_program__name(prog);
1113 
1114 		tobj = bpf_object__open_file(filename, &opts);
1115 		if (!tobj) {
1116 			err = -errno;
1117 			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1118 			goto cleanup;
1119 		}
1120 
1121 		lprog = NULL;
1122 		bpf_object__for_each_program(tprog, tobj) {
1123 			const char *tprog_name = bpf_program__name(tprog);
1124 
1125 			if (strcmp(prog_name, tprog_name) == 0) {
1126 				bpf_program__set_autoload(tprog, true);
1127 				lprog = tprog;
1128 			} else {
1129 				bpf_program__set_autoload(tprog, false);
1130 			}
1131 		}
1132 
1133 		process_prog(filename, tobj, lprog);
1134 		bpf_object__close(tobj);
1135 	}
1136 
1137 cleanup:
1138 	bpf_object__close(obj);
1139 	libbpf_set_print(old_libbpf_print_fn);
1140 	return err;
1141 }
1142 
cmp_stat(const struct verif_stats * s1,const struct verif_stats * s2,enum stat_id id,bool asc,bool abs)1143 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
1144 		    enum stat_id id, bool asc, bool abs)
1145 {
1146 	int cmp = 0;
1147 
1148 	switch (id) {
1149 	case FILE_NAME:
1150 		cmp = strcmp(s1->file_name, s2->file_name);
1151 		break;
1152 	case PROG_NAME:
1153 		cmp = strcmp(s1->prog_name, s2->prog_name);
1154 		break;
1155 	case VERDICT:
1156 	case DURATION:
1157 	case TOTAL_INSNS:
1158 	case TOTAL_STATES:
1159 	case PEAK_STATES:
1160 	case MAX_STATES_PER_INSN:
1161 	case MARK_READ_MAX_LEN: {
1162 		long v1 = s1->stats[id];
1163 		long v2 = s2->stats[id];
1164 
1165 		if (abs) {
1166 			v1 = v1 < 0 ? -v1 : v1;
1167 			v2 = v2 < 0 ? -v2 : v2;
1168 		}
1169 
1170 		if (v1 != v2)
1171 			cmp = v1 < v2 ? -1 : 1;
1172 		break;
1173 	}
1174 	default:
1175 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1176 		exit(1);
1177 	}
1178 
1179 	return asc ? cmp : -cmp;
1180 }
1181 
cmp_prog_stats(const void * v1,const void * v2)1182 static int cmp_prog_stats(const void *v1, const void *v2)
1183 {
1184 	const struct verif_stats *s1 = v1, *s2 = v2;
1185 	int i, cmp;
1186 
1187 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1188 		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
1189 			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
1190 		if (cmp != 0)
1191 			return cmp;
1192 	}
1193 
1194 	/* always disambiguate with file+prog, which are unique */
1195 	cmp = strcmp(s1->file_name, s2->file_name);
1196 	if (cmp != 0)
1197 		return cmp;
1198 	return strcmp(s1->prog_name, s2->prog_name);
1199 }
1200 
fetch_join_stat_value(const struct verif_stats_join * s,enum stat_id id,enum stat_variant var,const char ** str_val,double * num_val)1201 static void fetch_join_stat_value(const struct verif_stats_join *s,
1202 				  enum stat_id id, enum stat_variant var,
1203 				  const char **str_val,
1204 				  double *num_val)
1205 {
1206 	long v1, v2;
1207 
1208 	if (id == FILE_NAME) {
1209 		*str_val = s->file_name;
1210 		return;
1211 	}
1212 	if (id == PROG_NAME) {
1213 		*str_val = s->prog_name;
1214 		return;
1215 	}
1216 
1217 	v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1218 	v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1219 
1220 	switch (var) {
1221 	case VARIANT_A:
1222 		if (!s->stats_a)
1223 			*num_val = -DBL_MAX;
1224 		else
1225 			*num_val = s->stats_a->stats[id];
1226 		return;
1227 	case VARIANT_B:
1228 		if (!s->stats_b)
1229 			*num_val = -DBL_MAX;
1230 		else
1231 			*num_val = s->stats_b->stats[id];
1232 		return;
1233 	case VARIANT_DIFF:
1234 		if (!s->stats_a || !s->stats_b)
1235 			*num_val = -DBL_MAX;
1236 		else if (id == VERDICT)
1237 			*num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1238 		else
1239 			*num_val = (double)(v2 - v1);
1240 		return;
1241 	case VARIANT_PCT:
1242 		if (!s->stats_a || !s->stats_b) {
1243 			*num_val = -DBL_MAX;
1244 		} else if (v1 == 0) {
1245 			if (v1 == v2)
1246 				*num_val = 0.0;
1247 			else
1248 				*num_val = v2 < v1 ? -100.0 : 100.0;
1249 		} else {
1250 			 *num_val = (v2 - v1) * 100.0 / v1;
1251 		}
1252 		return;
1253 	}
1254 }
1255 
cmp_join_stat(const struct verif_stats_join * s1,const struct verif_stats_join * s2,enum stat_id id,enum stat_variant var,bool asc,bool abs)1256 static int cmp_join_stat(const struct verif_stats_join *s1,
1257 			 const struct verif_stats_join *s2,
1258 			 enum stat_id id, enum stat_variant var,
1259 			 bool asc, bool abs)
1260 {
1261 	const char *str1 = NULL, *str2 = NULL;
1262 	double v1 = 0.0, v2 = 0.0;
1263 	int cmp = 0;
1264 
1265 	fetch_join_stat_value(s1, id, var, &str1, &v1);
1266 	fetch_join_stat_value(s2, id, var, &str2, &v2);
1267 
1268 	if (abs) {
1269 		v1 = fabs(v1);
1270 		v2 = fabs(v2);
1271 	}
1272 
1273 	if (str1)
1274 		cmp = strcmp(str1, str2);
1275 	else if (v1 != v2)
1276 		cmp = v1 < v2 ? -1 : 1;
1277 
1278 	return asc ? cmp : -cmp;
1279 }
1280 
cmp_join_stats(const void * v1,const void * v2)1281 static int cmp_join_stats(const void *v1, const void *v2)
1282 {
1283 	const struct verif_stats_join *s1 = v1, *s2 = v2;
1284 	int i, cmp;
1285 
1286 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1287 		cmp = cmp_join_stat(s1, s2,
1288 				    env.sort_spec.ids[i],
1289 				    env.sort_spec.variants[i],
1290 				    env.sort_spec.asc[i],
1291 				    env.sort_spec.abs[i]);
1292 		if (cmp != 0)
1293 			return cmp;
1294 	}
1295 
1296 	/* always disambiguate with file+prog, which are unique */
1297 	cmp = strcmp(s1->file_name, s2->file_name);
1298 	if (cmp != 0)
1299 		return cmp;
1300 	return strcmp(s1->prog_name, s2->prog_name);
1301 }
1302 
1303 #define HEADER_CHAR '-'
1304 #define COLUMN_SEP "  "
1305 
output_header_underlines(void)1306 static void output_header_underlines(void)
1307 {
1308 	int i, j, len;
1309 
1310 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1311 		len = env.output_spec.lens[i];
1312 
1313 		printf("%s", i == 0 ? "" : COLUMN_SEP);
1314 		for (j = 0; j < len; j++)
1315 			printf("%c", HEADER_CHAR);
1316 	}
1317 	printf("\n");
1318 }
1319 
output_headers(enum resfmt fmt)1320 static void output_headers(enum resfmt fmt)
1321 {
1322 	const char *fmt_str;
1323 	int i, len;
1324 
1325 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1326 		int id = env.output_spec.ids[i];
1327 		int *max_len = &env.output_spec.lens[i];
1328 
1329 		switch (fmt) {
1330 		case RESFMT_TABLE_CALCLEN:
1331 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1332 			if (len > *max_len)
1333 				*max_len = len;
1334 			break;
1335 		case RESFMT_TABLE:
1336 			fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1337 			printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1338 			if (i == env.output_spec.spec_cnt - 1)
1339 				printf("\n");
1340 			break;
1341 		case RESFMT_CSV:
1342 			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1343 			if (i == env.output_spec.spec_cnt - 1)
1344 				printf("\n");
1345 			break;
1346 		}
1347 	}
1348 
1349 	if (fmt == RESFMT_TABLE)
1350 		output_header_underlines();
1351 }
1352 
prepare_value(const struct verif_stats * s,enum stat_id id,const char ** str,long * val)1353 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1354 			  const char **str, long *val)
1355 {
1356 	switch (id) {
1357 	case FILE_NAME:
1358 		*str = s ? s->file_name : "N/A";
1359 		break;
1360 	case PROG_NAME:
1361 		*str = s ? s->prog_name : "N/A";
1362 		break;
1363 	case VERDICT:
1364 		if (!s)
1365 			*str = "N/A";
1366 		else
1367 			*str = s->stats[VERDICT] ? "success" : "failure";
1368 		break;
1369 	case DURATION:
1370 	case TOTAL_INSNS:
1371 	case TOTAL_STATES:
1372 	case PEAK_STATES:
1373 	case MAX_STATES_PER_INSN:
1374 	case MARK_READ_MAX_LEN:
1375 		*val = s ? s->stats[id] : 0;
1376 		break;
1377 	default:
1378 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1379 		exit(1);
1380 	}
1381 }
1382 
output_stats(const struct verif_stats * s,enum resfmt fmt,bool last)1383 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1384 {
1385 	int i;
1386 
1387 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1388 		int id = env.output_spec.ids[i];
1389 		int *max_len = &env.output_spec.lens[i], len;
1390 		const char *str = NULL;
1391 		long val = 0;
1392 
1393 		prepare_value(s, id, &str, &val);
1394 
1395 		switch (fmt) {
1396 		case RESFMT_TABLE_CALCLEN:
1397 			if (str)
1398 				len = snprintf(NULL, 0, "%s", str);
1399 			else
1400 				len = snprintf(NULL, 0, "%ld", val);
1401 			if (len > *max_len)
1402 				*max_len = len;
1403 			break;
1404 		case RESFMT_TABLE:
1405 			if (str)
1406 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1407 			else
1408 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1409 			if (i == env.output_spec.spec_cnt - 1)
1410 				printf("\n");
1411 			break;
1412 		case RESFMT_CSV:
1413 			if (str)
1414 				printf("%s%s", i == 0 ? "" : ",", str);
1415 			else
1416 				printf("%s%ld", i == 0 ? "" : ",", val);
1417 			if (i == env.output_spec.spec_cnt - 1)
1418 				printf("\n");
1419 			break;
1420 		}
1421 	}
1422 
1423 	if (last && fmt == RESFMT_TABLE) {
1424 		output_header_underlines();
1425 		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1426 		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1427 	}
1428 }
1429 
parse_stat_value(const char * str,enum stat_id id,struct verif_stats * st)1430 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1431 {
1432 	switch (id) {
1433 	case FILE_NAME:
1434 		st->file_name = strdup(str);
1435 		if (!st->file_name)
1436 			return -ENOMEM;
1437 		break;
1438 	case PROG_NAME:
1439 		st->prog_name = strdup(str);
1440 		if (!st->prog_name)
1441 			return -ENOMEM;
1442 		break;
1443 	case VERDICT:
1444 		if (strcmp(str, "success") == 0) {
1445 			st->stats[VERDICT] = true;
1446 		} else if (strcmp(str, "failure") == 0) {
1447 			st->stats[VERDICT] = false;
1448 		} else {
1449 			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1450 			return -EINVAL;
1451 		}
1452 		break;
1453 	case DURATION:
1454 	case TOTAL_INSNS:
1455 	case TOTAL_STATES:
1456 	case PEAK_STATES:
1457 	case MAX_STATES_PER_INSN:
1458 	case MARK_READ_MAX_LEN: {
1459 		long val;
1460 		int err, n;
1461 
1462 		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1463 			err = -errno;
1464 			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1465 			return err;
1466 		}
1467 
1468 		st->stats[id] = val;
1469 		break;
1470 	}
1471 	default:
1472 		fprintf(stderr, "Unrecognized stat #%d\n", id);
1473 		return -EINVAL;
1474 	}
1475 	return 0;
1476 }
1477 
parse_stats_csv(const char * filename,struct stat_specs * specs,struct verif_stats ** statsp,int * stat_cntp)1478 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1479 			   struct verif_stats **statsp, int *stat_cntp)
1480 {
1481 	char line[4096];
1482 	FILE *f;
1483 	int err = 0;
1484 	bool header = true;
1485 
1486 	f = fopen(filename, "r");
1487 	if (!f) {
1488 		err = -errno;
1489 		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1490 		return err;
1491 	}
1492 
1493 	*stat_cntp = 0;
1494 
1495 	while (fgets(line, sizeof(line), f)) {
1496 		char *input = line, *state = NULL, *next;
1497 		struct verif_stats *st = NULL;
1498 		int col = 0, cnt = 0;
1499 
1500 		if (!header) {
1501 			void *tmp;
1502 
1503 			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1504 			if (!tmp) {
1505 				err = -ENOMEM;
1506 				goto cleanup;
1507 			}
1508 			*statsp = tmp;
1509 
1510 			st = &(*statsp)[*stat_cntp];
1511 			memset(st, 0, sizeof(*st));
1512 
1513 			*stat_cntp += 1;
1514 		}
1515 
1516 		while ((next = strtok_r(cnt++ ? NULL : input, ",\n", &state))) {
1517 			if (header) {
1518 				/* for the first line, set up spec stats */
1519 				err = parse_stat(next, specs);
1520 				if (err)
1521 					goto cleanup;
1522 				continue;
1523 			}
1524 
1525 			/* for all other lines, parse values based on spec */
1526 			if (col >= specs->spec_cnt) {
1527 				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1528 					col, *stat_cntp, filename);
1529 				err = -EINVAL;
1530 				goto cleanup;
1531 			}
1532 			err = parse_stat_value(next, specs->ids[col], st);
1533 			if (err)
1534 				goto cleanup;
1535 			col++;
1536 		}
1537 
1538 		if (header) {
1539 			header = false;
1540 			continue;
1541 		}
1542 
1543 		if (col < specs->spec_cnt) {
1544 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1545 				*stat_cntp, filename);
1546 			err = -EINVAL;
1547 			goto cleanup;
1548 		}
1549 
1550 		if (!st->file_name || !st->prog_name) {
1551 			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1552 				*stat_cntp, filename);
1553 			err = -EINVAL;
1554 			goto cleanup;
1555 		}
1556 
1557 		/* in comparison mode we can only check filters after we
1558 		 * parsed entire line; if row should be ignored we pretend we
1559 		 * never parsed it
1560 		 */
1561 		if (!should_process_file_prog(st->file_name, st->prog_name)) {
1562 			free(st->file_name);
1563 			free(st->prog_name);
1564 			*stat_cntp -= 1;
1565 		}
1566 	}
1567 
1568 	if (!feof(f)) {
1569 		err = -errno;
1570 		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1571 	}
1572 
1573 cleanup:
1574 	fclose(f);
1575 	return err;
1576 }
1577 
1578 /* empty/zero stats for mismatched rows */
1579 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1580 
is_key_stat(enum stat_id id)1581 static bool is_key_stat(enum stat_id id)
1582 {
1583 	return id == FILE_NAME || id == PROG_NAME;
1584 }
1585 
output_comp_header_underlines(void)1586 static void output_comp_header_underlines(void)
1587 {
1588 	int i, j, k;
1589 
1590 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1591 		int id = env.output_spec.ids[i];
1592 		int max_j = is_key_stat(id) ? 1 : 3;
1593 
1594 		for (j = 0; j < max_j; j++) {
1595 			int len = env.output_spec.lens[3 * i + j];
1596 
1597 			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1598 
1599 			for (k = 0; k < len; k++)
1600 				printf("%c", HEADER_CHAR);
1601 		}
1602 	}
1603 	printf("\n");
1604 }
1605 
output_comp_headers(enum resfmt fmt)1606 static void output_comp_headers(enum resfmt fmt)
1607 {
1608 	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1609 	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1610 	int i, j, len;
1611 
1612 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1613 		int id = env.output_spec.ids[i];
1614 		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
1615 		int max_j = is_key_stat(id) ? 1 : 3;
1616 
1617 		for (j = 0; j < max_j; j++) {
1618 			int *max_len = &env.output_spec.lens[3 * i + j];
1619 			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1620 			const char *sfx;
1621 
1622 			switch (fmt) {
1623 			case RESFMT_TABLE_CALCLEN:
1624 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1625 				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1626 				if (len > *max_len)
1627 					*max_len = len;
1628 				break;
1629 			case RESFMT_TABLE:
1630 				sfx = is_key_stat(id) ? "" : table_sfxs[j];
1631 				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1632 				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1633 				if (last)
1634 					printf("\n");
1635 				break;
1636 			case RESFMT_CSV:
1637 				sfx = is_key_stat(id) ? "" : name_sfxs[j];
1638 				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1639 				if (last)
1640 					printf("\n");
1641 				break;
1642 			}
1643 		}
1644 	}
1645 
1646 	if (fmt == RESFMT_TABLE)
1647 		output_comp_header_underlines();
1648 }
1649 
output_comp_stats(const struct verif_stats_join * join_stats,enum resfmt fmt,bool last)1650 static void output_comp_stats(const struct verif_stats_join *join_stats,
1651 			      enum resfmt fmt, bool last)
1652 {
1653 	const struct verif_stats *base = join_stats->stats_a;
1654 	const struct verif_stats *comp = join_stats->stats_b;
1655 	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1656 	int i;
1657 
1658 	for (i = 0; i < env.output_spec.spec_cnt; i++) {
1659 		int id = env.output_spec.ids[i], len;
1660 		int *max_len_base = &env.output_spec.lens[3 * i + 0];
1661 		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1662 		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1663 		const char *base_str = NULL, *comp_str = NULL;
1664 		long base_val = 0, comp_val = 0, diff_val = 0;
1665 
1666 		prepare_value(base, id, &base_str, &base_val);
1667 		prepare_value(comp, id, &comp_str, &comp_val);
1668 
1669 		/* normalize all the outputs to be in string buffers for simplicity */
1670 		if (is_key_stat(id)) {
1671 			/* key stats (file and program name) are always strings */
1672 			if (base)
1673 				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1674 			else
1675 				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1676 		} else if (base_str) {
1677 			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1678 			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1679 			if (!base || !comp)
1680 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1681 			else if (strcmp(base_str, comp_str) == 0)
1682 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1683 			else
1684 				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1685 		} else {
1686 			double p = 0.0;
1687 
1688 			if (base)
1689 				snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1690 			else
1691 				snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1692 			if (comp)
1693 				snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1694 			else
1695 				snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1696 
1697 			diff_val = comp_val - base_val;
1698 			if (!base || !comp) {
1699 				snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1700 			} else {
1701 				if (base_val == 0) {
1702 					if (comp_val == base_val)
1703 						p = 0.0; /* avoid +0 (+100%) case */
1704 					else
1705 						p = comp_val < base_val ? -100.0 : 100.0;
1706 				} else {
1707 					 p = diff_val * 100.0 / base_val;
1708 				}
1709 				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1710 			}
1711 		}
1712 
1713 		switch (fmt) {
1714 		case RESFMT_TABLE_CALCLEN:
1715 			len = strlen(base_buf);
1716 			if (len > *max_len_base)
1717 				*max_len_base = len;
1718 			if (!is_key_stat(id)) {
1719 				len = strlen(comp_buf);
1720 				if (len > *max_len_comp)
1721 					*max_len_comp = len;
1722 				len = strlen(diff_buf);
1723 				if (len > *max_len_diff)
1724 					*max_len_diff = len;
1725 			}
1726 			break;
1727 		case RESFMT_TABLE: {
1728 			/* string outputs are left-aligned, number outputs are right-aligned */
1729 			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1730 
1731 			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1732 			if (!is_key_stat(id)) {
1733 				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1734 				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1735 			}
1736 			if (i == env.output_spec.spec_cnt - 1)
1737 				printf("\n");
1738 			break;
1739 		}
1740 		case RESFMT_CSV:
1741 			printf("%s%s", i == 0 ? "" : ",", base_buf);
1742 			if (!is_key_stat(id)) {
1743 				printf("%s%s", i == 0 ? "" : ",", comp_buf);
1744 				printf("%s%s", i == 0 ? "" : ",", diff_buf);
1745 			}
1746 			if (i == env.output_spec.spec_cnt - 1)
1747 				printf("\n");
1748 			break;
1749 		}
1750 	}
1751 
1752 	if (last && fmt == RESFMT_TABLE)
1753 		output_comp_header_underlines();
1754 }
1755 
cmp_stats_key(const struct verif_stats * base,const struct verif_stats * comp)1756 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1757 {
1758 	int r;
1759 
1760 	r = strcmp(base->file_name, comp->file_name);
1761 	if (r != 0)
1762 		return r;
1763 	return strcmp(base->prog_name, comp->prog_name);
1764 }
1765 
is_join_stat_filter_matched(struct filter * f,const struct verif_stats_join * stats)1766 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1767 {
1768 	static const double eps = 1e-9;
1769 	const char *str = NULL;
1770 	double value = 0.0;
1771 
1772 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1773 
1774 	if (f->abs)
1775 		value = fabs(value);
1776 
1777 	switch (f->op) {
1778 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
1779 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1780 	case OP_LT: return value < f->value - eps;
1781 	case OP_LE: return value <= f->value + eps;
1782 	case OP_GT: return value > f->value + eps;
1783 	case OP_GE: return value >= f->value - eps;
1784 	}
1785 
1786 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1787 	return false;
1788 }
1789 
should_output_join_stats(const struct verif_stats_join * stats)1790 static bool should_output_join_stats(const struct verif_stats_join *stats)
1791 {
1792 	struct filter *f;
1793 	int i, allow_cnt = 0;
1794 
1795 	for (i = 0; i < env.deny_filter_cnt; i++) {
1796 		f = &env.deny_filters[i];
1797 		if (f->kind != FILTER_STAT)
1798 			continue;
1799 
1800 		if (is_join_stat_filter_matched(f, stats))
1801 			return false;
1802 	}
1803 
1804 	for (i = 0; i < env.allow_filter_cnt; i++) {
1805 		f = &env.allow_filters[i];
1806 		if (f->kind != FILTER_STAT)
1807 			continue;
1808 		allow_cnt++;
1809 
1810 		if (is_join_stat_filter_matched(f, stats))
1811 			return true;
1812 	}
1813 
1814 	/* if there are no stat allowed filters, pass everything through */
1815 	return allow_cnt == 0;
1816 }
1817 
handle_comparison_mode(void)1818 static int handle_comparison_mode(void)
1819 {
1820 	struct stat_specs base_specs = {}, comp_specs = {};
1821 	struct stat_specs tmp_sort_spec;
1822 	enum resfmt cur_fmt;
1823 	int err, i, j, last_idx, cnt;
1824 
1825 	if (env.filename_cnt != 2) {
1826 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1827 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1828 		return -EINVAL;
1829 	}
1830 
1831 	err = parse_stats_csv(env.filenames[0], &base_specs,
1832 			      &env.baseline_stats, &env.baseline_stat_cnt);
1833 	if (err) {
1834 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1835 		return err;
1836 	}
1837 	err = parse_stats_csv(env.filenames[1], &comp_specs,
1838 			      &env.prog_stats, &env.prog_stat_cnt);
1839 	if (err) {
1840 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1841 		return err;
1842 	}
1843 
1844 	/* To keep it simple we validate that the set and order of stats in
1845 	 * both CSVs are exactly the same. This can be lifted with a bit more
1846 	 * pre-processing later.
1847 	 */
1848 	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1849 		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1850 			env.filenames[0], env.filenames[1],
1851 			base_specs.spec_cnt, comp_specs.spec_cnt);
1852 		return -EINVAL;
1853 	}
1854 	for (i = 0; i < base_specs.spec_cnt; i++) {
1855 		if (base_specs.ids[i] != comp_specs.ids[i]) {
1856 			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1857 				env.filenames[0], env.filenames[1],
1858 				stat_defs[base_specs.ids[i]].names[0],
1859 				stat_defs[comp_specs.ids[i]].names[0]);
1860 			return -EINVAL;
1861 		}
1862 	}
1863 
1864 	/* Replace user-specified sorting spec with file+prog sorting rule to
1865 	 * be able to join two datasets correctly. Once we are done, we will
1866 	 * restore the original sort spec.
1867 	 */
1868 	tmp_sort_spec = env.sort_spec;
1869 	env.sort_spec = join_sort_spec;
1870 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1871 	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1872 	env.sort_spec = tmp_sort_spec;
1873 
1874 	/* Join two datasets together. If baseline and comparison datasets
1875 	 * have different subset of rows (we match by 'object + prog' as
1876 	 * a unique key) then assume empty/missing/zero value for rows that
1877 	 * are missing in the opposite data set.
1878 	 */
1879 	i = j = 0;
1880 	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1881 		const struct verif_stats *base, *comp;
1882 		struct verif_stats_join *join;
1883 		void *tmp;
1884 		int r;
1885 
1886 		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1887 		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1888 
1889 		if (!base->file_name || !base->prog_name) {
1890 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1891 				i, env.filenames[0]);
1892 			return -EINVAL;
1893 		}
1894 		if (!comp->file_name || !comp->prog_name) {
1895 			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1896 				j, env.filenames[1]);
1897 			return -EINVAL;
1898 		}
1899 
1900 		tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1901 		if (!tmp)
1902 			return -ENOMEM;
1903 		env.join_stats = tmp;
1904 
1905 		join = &env.join_stats[env.join_stat_cnt];
1906 		memset(join, 0, sizeof(*join));
1907 
1908 		r = cmp_stats_key(base, comp);
1909 		if (r == 0) {
1910 			join->file_name = base->file_name;
1911 			join->prog_name = base->prog_name;
1912 			join->stats_a = base;
1913 			join->stats_b = comp;
1914 			i++;
1915 			j++;
1916 		} else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
1917 			join->file_name = base->file_name;
1918 			join->prog_name = base->prog_name;
1919 			join->stats_a = base;
1920 			join->stats_b = NULL;
1921 			i++;
1922 		} else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
1923 			join->file_name = comp->file_name;
1924 			join->prog_name = comp->prog_name;
1925 			join->stats_a = NULL;
1926 			join->stats_b = comp;
1927 			j++;
1928 		} else {
1929 			fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
1930 				__FILE__, __LINE__, i, j);
1931 			return -EINVAL;
1932 		}
1933 		env.join_stat_cnt += 1;
1934 	}
1935 
1936 	/* now sort joined results according to sort spec */
1937 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1938 
1939 	/* for human-readable table output we need to do extra pass to
1940 	 * calculate column widths, so we substitute current output format
1941 	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1942 	 * and do everything again.
1943 	 */
1944 	if (env.out_fmt == RESFMT_TABLE)
1945 		cur_fmt = RESFMT_TABLE_CALCLEN;
1946 	else
1947 		cur_fmt = env.out_fmt;
1948 
1949 one_more_time:
1950 	output_comp_headers(cur_fmt);
1951 
1952 	last_idx = -1;
1953 	cnt = 0;
1954 	for (i = 0; i < env.join_stat_cnt; i++) {
1955 		const struct verif_stats_join *join = &env.join_stats[i];
1956 
1957 		if (!should_output_join_stats(join))
1958 			continue;
1959 
1960 		if (env.top_n && cnt >= env.top_n)
1961 			break;
1962 
1963 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
1964 			last_idx = i;
1965 
1966 		output_comp_stats(join, cur_fmt, i == last_idx);
1967 
1968 		cnt++;
1969 	}
1970 
1971 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1972 		cur_fmt = RESFMT_TABLE;
1973 		goto one_more_time; /* ... this time with feeling */
1974 	}
1975 
1976 	return 0;
1977 }
1978 
is_stat_filter_matched(struct filter * f,const struct verif_stats * stats)1979 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1980 {
1981 	long value = stats->stats[f->stat_id];
1982 
1983 	if (f->abs)
1984 		value = value < 0 ? -value : value;
1985 
1986 	switch (f->op) {
1987 	case OP_EQ: return value == f->value;
1988 	case OP_NEQ: return value != f->value;
1989 	case OP_LT: return value < f->value;
1990 	case OP_LE: return value <= f->value;
1991 	case OP_GT: return value > f->value;
1992 	case OP_GE: return value >= f->value;
1993 	}
1994 
1995 	fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1996 	return false;
1997 }
1998 
should_output_stats(const struct verif_stats * stats)1999 static bool should_output_stats(const struct verif_stats *stats)
2000 {
2001 	struct filter *f;
2002 	int i, allow_cnt = 0;
2003 
2004 	for (i = 0; i < env.deny_filter_cnt; i++) {
2005 		f = &env.deny_filters[i];
2006 		if (f->kind != FILTER_STAT)
2007 			continue;
2008 
2009 		if (is_stat_filter_matched(f, stats))
2010 			return false;
2011 	}
2012 
2013 	for (i = 0; i < env.allow_filter_cnt; i++) {
2014 		f = &env.allow_filters[i];
2015 		if (f->kind != FILTER_STAT)
2016 			continue;
2017 		allow_cnt++;
2018 
2019 		if (is_stat_filter_matched(f, stats))
2020 			return true;
2021 	}
2022 
2023 	/* if there are no stat allowed filters, pass everything through */
2024 	return allow_cnt == 0;
2025 }
2026 
output_prog_stats(void)2027 static void output_prog_stats(void)
2028 {
2029 	const struct verif_stats *stats;
2030 	int i, last_stat_idx = 0, cnt = 0;
2031 
2032 	if (env.out_fmt == RESFMT_TABLE) {
2033 		/* calculate column widths */
2034 		output_headers(RESFMT_TABLE_CALCLEN);
2035 		for (i = 0; i < env.prog_stat_cnt; i++) {
2036 			stats = &env.prog_stats[i];
2037 			if (!should_output_stats(stats))
2038 				continue;
2039 			output_stats(stats, RESFMT_TABLE_CALCLEN, false);
2040 			last_stat_idx = i;
2041 		}
2042 	}
2043 
2044 	/* actually output the table */
2045 	output_headers(env.out_fmt);
2046 	for (i = 0; i < env.prog_stat_cnt; i++) {
2047 		stats = &env.prog_stats[i];
2048 		if (!should_output_stats(stats))
2049 			continue;
2050 		if (env.top_n && cnt >= env.top_n)
2051 			break;
2052 		output_stats(stats, env.out_fmt, i == last_stat_idx);
2053 		cnt++;
2054 	}
2055 }
2056 
handle_verif_mode(void)2057 static int handle_verif_mode(void)
2058 {
2059 	int i, err;
2060 
2061 	if (env.filename_cnt == 0) {
2062 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
2063 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2064 		return -EINVAL;
2065 	}
2066 
2067 	for (i = 0; i < env.filename_cnt; i++) {
2068 		err = process_obj(env.filenames[i]);
2069 		if (err) {
2070 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2071 			return err;
2072 		}
2073 	}
2074 
2075 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2076 
2077 	output_prog_stats();
2078 
2079 	return 0;
2080 }
2081 
handle_replay_mode(void)2082 static int handle_replay_mode(void)
2083 {
2084 	struct stat_specs specs = {};
2085 	int err;
2086 
2087 	if (env.filename_cnt != 1) {
2088 		fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
2089 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2090 		return -EINVAL;
2091 	}
2092 
2093 	err = parse_stats_csv(env.filenames[0], &specs,
2094 			      &env.prog_stats, &env.prog_stat_cnt);
2095 	if (err) {
2096 		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
2097 		return err;
2098 	}
2099 
2100 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
2101 
2102 	output_prog_stats();
2103 
2104 	return 0;
2105 }
2106 
main(int argc,char ** argv)2107 int main(int argc, char **argv)
2108 {
2109 	int err = 0, i;
2110 
2111 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
2112 		return 1;
2113 
2114 	if (env.show_version) {
2115 		printf("%s\n", argp_program_version);
2116 		return 0;
2117 	}
2118 
2119 	if (env.verbose && env.quiet) {
2120 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
2121 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2122 		return 1;
2123 	}
2124 	if (env.verbose && env.log_level == 0)
2125 		env.log_level = 1;
2126 
2127 	if (env.output_spec.spec_cnt == 0) {
2128 		if (env.out_fmt == RESFMT_CSV)
2129 			env.output_spec = default_csv_output_spec;
2130 		else
2131 			env.output_spec = default_output_spec;
2132 	}
2133 	if (env.sort_spec.spec_cnt == 0)
2134 		env.sort_spec = default_sort_spec;
2135 
2136 	if (env.comparison_mode && env.replay_mode) {
2137 		fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
2138 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
2139 		return 1;
2140 	}
2141 
2142 	if (env.comparison_mode)
2143 		err = handle_comparison_mode();
2144 	else if (env.replay_mode)
2145 		err = handle_replay_mode();
2146 	else
2147 		err = handle_verif_mode();
2148 
2149 	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
2150 	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
2151 	free(env.join_stats);
2152 	for (i = 0; i < env.filename_cnt; i++)
2153 		free(env.filenames[i]);
2154 	free(env.filenames);
2155 	for (i = 0; i < env.allow_filter_cnt; i++) {
2156 		free(env.allow_filters[i].any_glob);
2157 		free(env.allow_filters[i].file_glob);
2158 		free(env.allow_filters[i].prog_glob);
2159 	}
2160 	free(env.allow_filters);
2161 	for (i = 0; i < env.deny_filter_cnt; i++) {
2162 		free(env.deny_filters[i].any_glob);
2163 		free(env.deny_filters[i].file_glob);
2164 		free(env.deny_filters[i].prog_glob);
2165 	}
2166 	free(env.deny_filters);
2167 	return -err;
2168 }
2169