1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# (c) 2017 Tobin C. Harding <me@tobin.cc>
5#
6# leaking_addresses.pl: Scan the kernel for potential leaking addresses.
7#  - Scans dmesg output.
8#  - Walks directory tree and parses each file (for each directory in @DIRS).
9#
10# Use --debug to output path before parsing, this is useful to find files that
11# cause the script to choke.
12
13#
14# When the system is idle it is likely that most files under /proc/PID will be
15# identical for various processes.  Scanning _all_ the PIDs under /proc is
16# unnecessary and implies that we are thoroughly scanning /proc.  This is _not_
17# the case because there may be ways userspace can trigger creation of /proc
18# files that leak addresses but were not present during a scan.  For these two
19# reasons we exclude all PID directories under /proc except '1/'
20
21use warnings;
22use strict;
23use POSIX;
24use File::Basename;
25use File::Spec;
26use File::Temp qw/tempfile/;
27use Cwd 'abs_path';
28use Term::ANSIColor qw(:constants);
29use Getopt::Long qw(:config no_auto_abbrev);
30use Config;
31use bigint qw/hex/;
32use feature 'state';
33
34my $P = $0;
35
36# Directories to scan.
37my @DIRS = ('/proc', '/sys');
38
39# Timer for parsing each file, in seconds.
40my $TIMEOUT = 10;
41
42# Kernel addresses vary by architecture.  We can only auto-detect the following
43# architectures (using `uname -m`).  (flag --32-bit overrides auto-detection.)
44my @SUPPORTED_ARCHITECTURES = ('x86_64', 'ppc64', 'x86');
45
46# Command line options.
47my $help = 0;
48my $debug = 0;
49my $raw = 0;
50my $output_raw = "";	# Write raw results to file.
51my $input_raw = "";	# Read raw results from file instead of scanning.
52my $suppress_dmesg = 0;		# Don't show dmesg in output.
53my $squash_by_path = 0;		# Summary report grouped by absolute path.
54my $squash_by_filename = 0;	# Summary report grouped by filename.
55my $kallsyms_file = "";		# Kernel symbols file.
56my $kernel_config_file = "";	# Kernel configuration file.
57my $opt_32bit = 0;		# Scan 32-bit kernel.
58my $page_offset_32bit = 0;	# Page offset for 32-bit kernel.
59
60my @kallsyms = ();
61
62# Skip these absolute paths.
63my @skip_abs = (
64	'/proc/kmsg',
65	'/proc/device-tree',
66	'/proc/1/syscall',
67	'/sys/firmware/devicetree',
68	'/sys/kernel/tracing/trace_pipe',
69	'/sys/kernel/debug/tracing/trace_pipe',
70	'/sys/kernel/security/apparmor/revision');
71
72# Skip these under any subdirectory.
73my @skip_any = (
74	'pagemap',
75	'events',
76	'access',
77	'registers',
78	'snapshot_raw',
79	'trace_pipe_raw',
80	'ptmx',
81	'trace_pipe',
82	'fd',
83	'usbmon');
84
85sub help
86{
87	my ($exitcode) = @_;
88
89	print << "EOM";
90
91Usage: $P [OPTIONS]
92
93Options:
94
95	-o, --output-raw=<file>		Save results for future processing.
96	-i, --input-raw=<file>		Read results from file instead of scanning.
97	      --raw			Show raw results (default).
98	      --suppress-dmesg		Do not show dmesg results.
99	      --squash-by-path		Show one result per unique path.
100	      --squash-by-filename	Show one result per unique filename.
101	--kernel-config-file=<file>     Kernel configuration file (e.g /boot/config)
102	--kallsyms=<file>		Read kernel symbol addresses from file (for
103						scanning binary files).
104	--32-bit			Scan 32-bit kernel.
105	--page-offset-32-bit=o		Page offset (for 32-bit kernel 0xABCD1234).
106	-d, --debug			Display debugging output.
107	-h, --help			Display this help and exit.
108
109Scans the running kernel for potential leaking addresses.
110
111EOM
112	exit($exitcode);
113}
114
115GetOptions(
116	'd|debug'		=> \$debug,
117	'h|help'		=> \$help,
118	'o|output-raw=s'        => \$output_raw,
119	'i|input-raw=s'         => \$input_raw,
120	'suppress-dmesg'        => \$suppress_dmesg,
121	'squash-by-path'        => \$squash_by_path,
122	'squash-by-filename'    => \$squash_by_filename,
123	'raw'                   => \$raw,
124	'kallsyms=s'            => \$kallsyms_file,
125	'kernel-config-file=s'	=> \$kernel_config_file,
126	'32-bit'		=> \$opt_32bit,
127	'page-offset-32-bit=o'	=> \$page_offset_32bit,
128) or help(1);
129
130help(0) if ($help);
131
132if ($input_raw) {
133	format_output($input_raw);
134	exit(0);
135}
136
137if (!$input_raw and ($squash_by_path or $squash_by_filename)) {
138	printf "\nSummary reporting only available with --input-raw=<file>\n";
139	printf "(First run scan with --output-raw=<file>.)\n";
140	exit(128);
141}
142
143if (!(is_supported_architecture() or $opt_32bit or $page_offset_32bit)) {
144	printf "\nScript does not support your architecture, sorry.\n";
145	printf "\nCurrently we support: \n\n";
146	foreach(@SUPPORTED_ARCHITECTURES) {
147		printf "\t%s\n", $_;
148	}
149	printf("\n");
150
151	printf("If you are running a 32-bit architecture you may use:\n");
152	printf("\n\t--32-bit or --page-offset-32-bit=<page offset>\n\n");
153
154	my $archname = `uname -m`;
155	printf("Machine hardware name (`uname -m`): %s\n", $archname);
156
157	exit(129);
158}
159
160if ($output_raw) {
161	open my $fh, '>', $output_raw or die "$0: $output_raw: $!\n";
162	select $fh;
163}
164
165if ($kallsyms_file) {
166	open my $fh, '<', $kallsyms_file or die "$0: $kallsyms_file: $!\n";
167	while (<$fh>) {
168		chomp;
169		my @entry = split / /, $_;
170		my $addr_text = $entry[0];
171		if ($addr_text !~ /^0/) {
172			# TODO: Why is hex() so impossibly slow?
173			my $addr = hex($addr_text);
174			my $symbol = $entry[2];
175			# Only keep kernel text addresses.
176			my $long = pack("J", $addr);
177			my $entry = [$long, $symbol];
178			push @kallsyms, $entry;
179		}
180	}
181	close $fh;
182}
183
184parse_dmesg();
185walk(@DIRS);
186
187exit 0;
188
189sub dprint
190{
191	printf(STDERR @_) if $debug;
192}
193
194sub is_supported_architecture
195{
196	return (is_x86_64() or is_ppc64() or is_ix86_32());
197}
198
199sub is_32bit
200{
201	# Allow --32-bit or --page-offset-32-bit to override
202	if ($opt_32bit or $page_offset_32bit) {
203		return 1;
204	}
205
206	return is_ix86_32();
207}
208
209sub is_ix86_32
210{
211       state $arch = `uname -m`;
212
213       chomp $arch;
214       if ($arch =~ m/i[3456]86/) {
215               return 1;
216       }
217       return 0;
218}
219
220sub is_arch
221{
222       my ($desc) = @_;
223       my $arch = `uname -m`;
224
225       chomp $arch;
226       if ($arch eq $desc) {
227               return 1;
228       }
229       return 0;
230}
231
232sub is_x86_64
233{
234	state $is = is_arch('x86_64');
235	return $is;
236}
237
238sub is_ppc64
239{
240	state $is = is_arch('ppc64');
241	return $is;
242}
243
244# Gets config option value from kernel config file.
245# Returns "" on error or if config option not found.
246sub get_kernel_config_option
247{
248	my ($option) = @_;
249	my $value = "";
250	my $tmp_fh;
251	my $tmp_file = "";
252	my @config_files;
253
254	# Allow --kernel-config-file to override.
255	if ($kernel_config_file ne "") {
256		@config_files = ($kernel_config_file);
257	} elsif (-R "/proc/config.gz") {
258		($tmp_fh, $tmp_file) = tempfile("config.gz-XXXXXX",
259						UNLINK => 1);
260
261		if (system("gunzip < /proc/config.gz > $tmp_file")) {
262			dprint("system(gunzip < /proc/config.gz) failed\n");
263			return "";
264		} else {
265			@config_files = ($tmp_file);
266		}
267	} else {
268		my $file = '/boot/config-' . `uname -r`;
269		chomp $file;
270		@config_files = ($file, '/boot/config');
271	}
272
273	foreach my $file (@config_files) {
274		dprint("parsing config file: $file\n");
275		$value = option_from_file($option, $file);
276		if ($value ne "") {
277			last;
278		}
279	}
280
281	return $value;
282}
283
284# Parses $file and returns kernel configuration option value.
285sub option_from_file
286{
287	my ($option, $file) = @_;
288	my $str = "";
289	my $val = "";
290
291	open(my $fh, "<", $file) or return "";
292	while (my $line = <$fh> ) {
293		if ($line =~ /^$option/) {
294			($str, $val) = split /=/, $line;
295			chomp $val;
296			last;
297		}
298	}
299
300	close $fh;
301	return $val;
302}
303
304sub is_false_positive
305{
306	my ($match) = @_;
307
308	if (is_32bit()) {
309		return is_false_positive_32bit($match);
310	}
311
312	# Ignore 64 bit false positives:
313	# 0xfffffffffffffff[0-f]
314	# 0x0000000000000000
315	if ($match =~ '\b(0x)?(f|F){15}[0-9a-f]\b' or
316	    $match =~ '\b(0x)?0{16}\b') {
317		return 1;
318	}
319
320	if (is_x86_64() and is_in_vsyscall_memory_region($match)) {
321		return 1;
322	}
323
324	return 0;
325}
326
327sub is_false_positive_32bit
328{
329       my ($match) = @_;
330       state $page_offset = get_page_offset();
331
332       if ($match =~ '\b(0x)?(f|F){7}[0-9a-f]\b') {
333               return 1;
334       }
335
336       if (hex($match) < $page_offset) {
337               return 1;
338       }
339
340       return 0;
341}
342
343# returns integer value
344sub get_page_offset
345{
346       my $page_offset;
347       my $default_offset = 0xc0000000;
348
349       # Allow --page-offset-32bit to override.
350       if ($page_offset_32bit != 0) {
351               return $page_offset_32bit;
352       }
353
354       $page_offset = get_kernel_config_option('CONFIG_PAGE_OFFSET');
355       if (!$page_offset) {
356	       return $default_offset;
357       }
358       return $page_offset;
359}
360
361sub is_in_vsyscall_memory_region
362{
363	my ($match) = @_;
364
365	my $hex = hex($match);
366	my $region_min = hex("0xffffffffff600000");
367	my $region_max = hex("0xffffffffff601000");
368
369	return ($hex >= $region_min and $hex <= $region_max);
370}
371
372# True if argument potentially contains a kernel address.
373sub may_leak_address
374{
375	my ($path, $line) = @_;
376	my $address_re;
377
378	# Ignore Signal masks.
379	if ($line =~ '^SigBlk:' or
380	    $line =~ '^SigIgn:' or
381	    $line =~ '^SigCgt:') {
382		return 0;
383	}
384
385	# Ignore input device reporting.
386	# /proc/bus/input/devices: B: KEY=402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
387	# /sys/devices/platform/i8042/serio0/input/input1/uevent: KEY=402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
388	# /sys/devices/platform/i8042/serio0/input/input1/capabilities/key: 402000000 3803078f800d001 feffffdfffefffff fffffffffffffffe
389	if ($line =~ '\bKEY=[[:xdigit:]]{9,14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b' or
390            ($path =~ '\bkey$' and
391             $line =~ '\b[[:xdigit:]]{9,14} [[:xdigit:]]{16} [[:xdigit:]]{16}\b')) {
392		return 0;
393	}
394
395	$address_re = get_address_re();
396	while ($line =~ /($address_re)/g) {
397		if (!is_false_positive($1)) {
398			return 1;
399		}
400	}
401
402	return 0;
403}
404
405sub get_address_re
406{
407	if (is_ppc64()) {
408		return '\b(0x)?[89abcdef]00[[:xdigit:]]{13}\b';
409	} elsif (is_32bit()) {
410		return '\b(0x)?[[:xdigit:]]{8}\b';
411	}
412
413	return get_x86_64_re();
414}
415
416sub get_x86_64_re
417{
418	# We handle page table levels but only if explicitly configured using
419	# CONFIG_PGTABLE_LEVELS.  If config file parsing fails or config option
420	# is not found we default to using address regular expression suitable
421	# for 4 page table levels.
422	state $ptl = get_kernel_config_option('CONFIG_PGTABLE_LEVELS');
423
424	if ($ptl == 5) {
425		return '\b(0x)?ff[[:xdigit:]]{14}\b';
426	}
427	return '\b(0x)?ffff[[:xdigit:]]{12}\b';
428}
429
430sub parse_dmesg
431{
432	open my $cmd, '-|', 'dmesg';
433	while (<$cmd>) {
434		if (may_leak_address("dmesg", $_)) {
435			print 'dmesg: ' . $_;
436		}
437	}
438	close $cmd;
439}
440
441# True if we should skip this path.
442sub skip
443{
444	my ($path) = @_;
445
446	foreach (@skip_abs) {
447		return 1 if (/^$path$/);
448	}
449
450	my($filename, $dirs, $suffix) = fileparse($path);
451	foreach (@skip_any) {
452		return 1 if (/^$filename$/);
453	}
454
455	return 0;
456}
457
458sub timed_parse_file
459{
460	my ($file) = @_;
461
462	eval {
463		local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required.
464		alarm $TIMEOUT;
465		parse_file($file);
466		alarm 0;
467	};
468
469	if ($@) {
470		die unless $@ eq "alarm\n";	# Propagate unexpected errors.
471		printf STDERR "timed out parsing: %s\n", $file;
472	}
473}
474
475sub parse_binary
476{
477	my ($file) = @_;
478
479	open my $fh, "<:raw", $file or return;
480	local $/ = undef;
481	my $bytes = <$fh>;
482	close $fh;
483
484	foreach my $entry (@kallsyms) {
485		my $addr = $entry->[0];
486		my $symbol = $entry->[1];
487		my $offset = index($bytes, $addr);
488		if ($offset != -1) {
489			printf("$file: $symbol @ $offset\n");
490		}
491	}
492}
493
494sub parse_file
495{
496	my ($file) = @_;
497
498	if (! -R $file) {
499		return;
500	}
501
502	if (! -T $file) {
503		if ($file =~ m|^/sys/kernel/btf/| or
504		    $file =~ m|^/sys/devices/pci| or
505		    $file =~ m|^/sys/firmware/efi/efivars/| or
506		    $file =~ m|^/proc/bus/pci/|) {
507			return;
508		}
509		if (scalar @kallsyms > 0) {
510			parse_binary($file);
511		}
512		return;
513	}
514
515	open my $fh, "<", $file or return;
516	while ( <$fh> ) {
517		chomp;
518		if (may_leak_address($file, $_)) {
519			printf("$file: $_\n");
520		}
521	}
522	close $fh;
523}
524
525# Checks if the actual path name is leaking a kernel address.
526sub check_path_for_leaks
527{
528	my ($path) = @_;
529
530	if (may_leak_address($path, $path)) {
531		printf("Path name may contain address: $path\n");
532	}
533}
534
535# Recursively walk directory tree.
536sub walk
537{
538	my @dirs = @_;
539
540	while (my $pwd = shift @dirs) {
541		next if (!opendir(DIR, $pwd));
542		my @files = readdir(DIR);
543		closedir(DIR);
544
545		foreach my $file (@files) {
546			next if ($file eq '.' or $file eq '..');
547
548			my $path = "$pwd/$file";
549			next if (-l $path);
550
551			# skip /proc/PID except /proc/1
552			next if (($path =~ /^\/proc\/[0-9]+$/) &&
553				 ($path !~ /^\/proc\/1$/));
554
555			next if (skip($path));
556
557			check_path_for_leaks($path);
558
559			if (-d $path) {
560				push @dirs, $path;
561				next;
562			}
563
564			dprint("parsing: $path\n");
565			timed_parse_file($path);
566		}
567	}
568}
569
570sub format_output
571{
572	my ($file) = @_;
573
574	# Default is to show raw results.
575	if ($raw or (!$squash_by_path and !$squash_by_filename)) {
576		dump_raw_output($file);
577		return;
578	}
579
580	my ($total, $dmesg, $paths, $files) = parse_raw_file($file);
581
582	printf "\nTotal number of results from scan (incl dmesg): %d\n", $total;
583
584	if (!$suppress_dmesg) {
585		print_dmesg($dmesg);
586	}
587
588	if ($squash_by_filename) {
589		squash_by($files, 'filename');
590	}
591
592	if ($squash_by_path) {
593		squash_by($paths, 'path');
594	}
595}
596
597sub dump_raw_output
598{
599	my ($file) = @_;
600
601	open (my $fh, '<', $file) or die "$0: $file: $!\n";
602	while (<$fh>) {
603		if ($suppress_dmesg) {
604			if ("dmesg:" eq substr($_, 0, 6)) {
605				next;
606			}
607		}
608		print $_;
609	}
610	close $fh;
611}
612
613sub parse_raw_file
614{
615	my ($file) = @_;
616
617	my $total = 0;          # Total number of lines parsed.
618	my @dmesg;              # dmesg output.
619	my %files;              # Unique filenames containing leaks.
620	my %paths;              # Unique paths containing leaks.
621
622	open (my $fh, '<', $file) or die "$0: $file: $!\n";
623	while (my $line = <$fh>) {
624		$total++;
625
626		if ("dmesg:" eq substr($line, 0, 6)) {
627			push @dmesg, $line;
628			next;
629		}
630
631		cache_path(\%paths, $line);
632		cache_filename(\%files, $line);
633	}
634
635	return $total, \@dmesg, \%paths, \%files;
636}
637
638sub print_dmesg
639{
640	my ($dmesg) = @_;
641
642	print "\ndmesg output:\n";
643
644	if (@$dmesg == 0) {
645		print "<no results>\n";
646		return;
647	}
648
649	foreach(@$dmesg) {
650		my $index = index($_, ': ');
651		$index += 2;    # skid ': '
652		print substr($_, $index);
653	}
654}
655
656sub squash_by
657{
658	my ($ref, $desc) = @_;
659
660	print "\nResults squashed by $desc (excl dmesg). ";
661	print "Displaying [<number of results> <$desc>], <example result>\n";
662
663	if (keys %$ref == 0) {
664		print "<no results>\n";
665		return;
666	}
667
668	foreach(keys %$ref) {
669		my $lines = $ref->{$_};
670		my $length = @$lines;
671		printf "[%d %s] %s", $length, $_, @$lines[0];
672	}
673}
674
675sub cache_path
676{
677	my ($paths, $line) = @_;
678
679	my $index = index($line, ': ');
680	my $path = substr($line, 0, $index);
681
682	$index += 2;            # skip ': '
683	add_to_cache($paths, $path, substr($line, $index));
684}
685
686sub cache_filename
687{
688	my ($files, $line) = @_;
689
690	my $index = index($line, ': ');
691	my $path = substr($line, 0, $index);
692	my $filename = basename($path);
693
694	$index += 2;            # skip ': '
695	add_to_cache($files, $filename, substr($line, $index));
696}
697
698sub add_to_cache
699{
700	my ($cache, $key, $value) = @_;
701
702	if (!$cache->{$key}) {
703		$cache->{$key} = ();
704	}
705	push @{$cache->{$key}}, $value;
706}
707