scripts/python/parallel-perf.py

2 # SPDX-License-Identifier: GPL-2.0
4 # Run a perf script command multiple times in parallel, using perf script
5 # options --cpu and --time so that each job processes a different chunk
20 glb_prog_name = "parallel-perf.py"
52 		sh_cmd = [ shlex.quote(x) for x in self.cmd ]
88 				return [ f"Non-empty error file {self.stderr_name}" ]
126 	for w in worklist:
128 	for w in worklist:
139 		x = "0" * (10 - len(x)) + x
140 	return x[:len(x) - 9] + "." + x[-9:]
150 	max_len = len(str(cpus[-1]))
151 	cpu_dir_fmt = f"cpu-%.{max_len}u"
154 	for cpu in cpus:
157 			cpu_option = f"--cpu={cpu}"
162 		tr_dir_fmt = "time-range"
166 			tr_dir_fmt += f"-{pos}"
172 		tr_dir_fmt += f"-%.{max_len}u"
175 		for r in time_ranges:
180 				time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
243 	for w in worklist:
247 		for e in errorlist:
257 		for w in worklist:
263 …process.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).std…
268 	for line in lines:
269 		if ":" in line and line[0] == "#":
271 			name = line[1:pos-1].strip()
273 			if name in result:
278 					if name not in result:
285 	if hdr_fld not in hdr_dict:
293 	def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
305 		pos = -1
306 		for opt in args:
308 			if m and opt[:2] == f"-{short_name}":
313 						self.Init(error = f"-{short_name} option missing value")
317 			if opt[:n] == f"--{long_name}":
322 						self.Init(error = f"--{long_name} option missing value")
326 					self.Init(error = f"--{long_name} option expected '='")
328 			if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
330 				if "-" in opt[1:]:
331 					hpos = opt[1:].index("-")
338 						self.Init(error = f"-{short_name} option missing value")
353 		if self.opt_element == -1:
392 	# Use double-quick sampling to determine trace data density
393 	times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]
395 		times_cmd.append(f"--cpu={cpu_str}")
397 		times_cmd.append(f"--time={time_str}")
399 	cnts_cmd.append("-Fcpu")
400 	times_cmd.append("-Fcpu,time")
415 	cpu_time_range.remaining -= 1
416 	cpu_time_range.interval_remaining -= 1
421 		time = TimeVal(line[1][:-1], 0)
423 		time_ranges[cpu_time_range.tr_pos][1] = time - 1
439 	pat = re.compile(r"\s*\[[0-9]+\]")
444 			line = line.decode("utf-8")
449 					cpu = int(line[0][1:-1])
497 	nr_cpus = cpus[-1] + 1 if per_cpu else 1
499 		nr_cpus = cpus[-1] + 1
500 		cpu_time_ranges = [ CPUTimeRange(cpu) for cpu in range(nr_cpus) ]
503 		cpu_time_ranges = [ CPUTimeRange(-1) ]
515 	for cpu_time_range in cpu_time_ranges:
550 	max_time = time_ranges[-1][1]
552 	for cpu_time_range in cpu_time_ranges:
576 	for cpu_time_range in cpu_time_ranges:
580 	return [cpu_time_ranges[cpu].time_ranges for cpu in cpus]
587 	duration = int((end - start + 1) / n)
591 	for i in range(n):
592 		time_ranges.append([start, start + duration - 1])
594 	time_ranges[-1][1] = end
598 	return r[1] - r[0] + 1
602 	for r in time_ranges:
608 	for r in time_ranges:
630 		if new_tr[i - 1][1] + 1 == new_tr[i][0]:
631 			new_tr[i][0] = new_tr[i - 1][0]
632 			del new_tr[i - 1]
640 	if time_ranges[-1][1] >= max_time:
641 		time_ranges[-1][1] = None
644 …e_str}'\nCheck also 'time of first sample' and 'time of last sample' in perf script --header-only")
648 	for i in range(n):
651 		if i != 0 and start <= time_ranges[i - 1][1]:
672 …"perf command bad cpu option: '{cpu_str}'\nCheck also 'nrcpus avail' in perf script --header-only")
678 	for r in time_str.split():
693 		return [-1]
695 	for r in cpu_str.split(","):
696 		a = r.split("-")
717 		for arg_name in vars(a):
725 			raise Exception("Bad options (negative values): try -h option for help")
757 				self.cpus = [ x for x in range(nr_cpus) ]
762 			self.cpus = [-1]
781 		for tr in self.split_time_ranges_for_each_cpu:
782 			# Re-combined time ranges should be the same
791 		for time_ranges in self.split_time_ranges_for_each_cpu:
798 		if "--per-thread" in self.cmd_line.split():
803 		# --no-per-cpu option takes precedence
808 		# Default to per-cpu for Intel PT data that was recorded per-cpu,
825 		# Prefer open-ended time range to starting / ending with min_time / max_time resp.
832 			for w in self.worklist:
850 Run a perf script command multiple times in parallel, using perf script options
851 --cpu and --time so that each job processes a different chunk of the data.
855 Follow the options by '--' and then the perf script command e.g.
857 	$ perf record -a -- sleep 10
858 	$ parallel-perf.py --nr=4 -- perf script --ns
860 	$ tree parallel-perf-output/
861 	parallel-perf-output/
862 	├── time-range-0
865 	├── time-range-1
868 	├── time-range-2
871 	└── time-range-3
874 	$ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
875 	parallel-perf-output/time-range-0/cmd.txt:perf script --time=,9466.504461499 --ns
876 	parallel-perf-output/time-range-1/cmd.txt:perf script --time=9466.504461500,9469.005396999 --ns
877 	parallel-perf-output/time-range-2/cmd.txt:perf script --time=9469.005397000,9471.506332499 --ns
878 	parallel-perf-output/time-range-3/cmd.txt:perf script --time=9471.506332500, --ns
881 --dlfilter and --script, so that the benefit of running parallel jobs
884 If option --pipe-to is used, standard output is first piped through that
888 Final standard output is redirected to files named out.txt in separate
890 written to files named err.txt. In addition, files named cmd.txt contain the
894 If any job exits with a non-zero exit code, then all jobs are killed and no
895 more are started. A message is printed if any job results in a non-empty
898 There is a separate output subdirectory for each time range. If the --per-cpu
899 option is used, these are further grouped under cpu-n subdirectories, e.g.
901 	$ parallel-perf.py --per-cpu --nr=2 -- perf script --ns --cpu=0,1
903 	$ tree parallel-perf-output
904 	parallel-perf-output/
905 	├── cpu-0
906 	│   ├── time-range-0
909 	│   └── time-range-1
912 	└── cpu-1
913 	    ├── time-range-0
916 	    └── time-range-1
919 	$ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
920 	parallel-perf-output/cpu-0/time-range-0/cmd.txt:perf script --cpu=0 --time=,9469.005396999 --ns
921 	parallel-perf-output/cpu-0/time-range-1/cmd.txt:perf script --cpu=0 --time=9469.005397000, --ns
922 	parallel-perf-output/cpu-1/time-range-0/cmd.txt:perf script --cpu=1 --time=,9469.005396999 --ns
923 	parallel-perf-output/cpu-1/time-range-1/cmd.txt:perf script --cpu=1 --time=9469.005397000, --ns
925 Subdivisions of time range, and cpus if the --per-cpu option is used, are
926 expressed by the --time and --cpu perf script options respectively. If the
927 supplied perf script command has a --time option, then that time range is
929 'time of last sample' is used (refer perf script --header-only). Similarly, the
930 supplied perf script command may provide a --cpu option, and only those CPUs
933 To prevent time intervals becoming too small, the --min-interval option can
938 time range will be subdivided in order to produce subdivisions that contain
940 double-quick (--itrace=qqi) samples, and choosing time ranges that encompass
941 approximately the same number of samples. In that case, time ranges may not be
942 the same for each CPU processed. For Intel PT, --per-cpu is the default, but
943 that can be overridden by --no-per-cpu. Note, for Intel PT, double-quick
944 decoding produces 1 sample for each PSB synchronization packet, which in turn
946 perf Intel PT documentation). The minimum number of double-quick samples that
947 will define a time range can be set by the --min_size option, which defaults to
950 …ap.add_argument("-o", "--output-dir", default="parallel-perf-output", help="output directory (defa…
951 …ap.add_argument("-j", "--jobs", type=int, default=0, help="maximum number of jobs to run in parall…
952 …ap.add_argument("-n", "--nr", type=int, default=0, help="number of time subdivisions (default is t…
953 …ap.add_argument("-i", "--interval", type=float, default=0, help="subdivide the time range using th…
954 …ap.add_argument("-c", "--per-cpu", action="store_true", help="process data for each CPU in paralle…
955 …ap.add_argument("-m", "--min-interval", type=float, default=glb_min_interval, help=f"minimum inter…
956 	ap.add_argument("-p", "--pipe-to", help="command to pipe output to (optional)")
957 …ap.add_argument("-N", "--no-per-cpu", action="store_true", help="do not process data for each CPU …
958 …ap.add_argument("-b", "--min_size", type=int, default=glb_min_samples, help="minimum data size (fo…
959 …ap.add_argument("-D", "--dry-run", action="store_true", help="do not run any jobs, just show the p…
960 …ap.add_argument("-q", "--quiet", action="store_true", help="do not print any messages except error…
961 	ap.add_argument("-v", "--verbose", action="store_true", help="print more messages")
962 	ap.add_argument("-d", "--debug", action="store_true", help="print debugging messages")
965 		split_pos = cmd_line.index("--")
979 			raise Exception("Command line must contain '--' before perf command")