1  // SPDX-License-Identifier: GPL-2.0
2  #include "debug.h"
3  #include "evlist.h"
4  #include "evsel.h"
5  #include "evsel_config.h"
6  #include "parse-events.h"
7  #include <errno.h>
8  #include <limits.h>
9  #include <stdlib.h>
10  #include <api/fs/fs.h>
11  #include <subcmd/parse-options.h>
12  #include <perf/cpumap.h>
13  #include "cloexec.h"
14  #include "util/perf_api_probe.h"
15  #include "record.h"
16  #include "../perf-sys.h"
17  #include "topdown.h"
18  #include "map_symbol.h"
19  #include "mem-events.h"
20  
21  /*
22   * evsel__config_leader_sampling() uses special rules for leader sampling.
23   * However, if the leader is an AUX area event, then assume the event to sample
24   * is the next event.
25   */
evsel__read_sampler(struct evsel * evsel,struct evlist * evlist)26  static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
27  {
28  	struct evsel *leader = evsel__leader(evsel);
29  
30  	if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
31  	    is_mem_loads_aux_event(leader)) {
32  		evlist__for_each_entry(evlist, evsel) {
33  			if (evsel__leader(evsel) == leader && evsel != evsel__leader(evsel))
34  				return evsel;
35  		}
36  	}
37  
38  	return leader;
39  }
40  
evsel__config_term_mask(struct evsel * evsel)41  static u64 evsel__config_term_mask(struct evsel *evsel)
42  {
43  	struct evsel_config_term *term;
44  	struct list_head *config_terms = &evsel->config_terms;
45  	u64 term_types = 0;
46  
47  	list_for_each_entry(term, config_terms, list) {
48  		term_types |= 1 << term->type;
49  	}
50  	return term_types;
51  }
52  
evsel__config_leader_sampling(struct evsel * evsel,struct evlist * evlist)53  static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
54  {
55  	struct perf_event_attr *attr = &evsel->core.attr;
56  	struct evsel *leader = evsel__leader(evsel);
57  	struct evsel *read_sampler;
58  	u64 term_types, freq_mask;
59  
60  	if (!leader->sample_read)
61  		return;
62  
63  	read_sampler = evsel__read_sampler(evsel, evlist);
64  
65  	if (evsel == read_sampler)
66  		return;
67  
68  	term_types = evsel__config_term_mask(evsel);
69  	/*
70  	 * Disable sampling for all group members except those with explicit
71  	 * config terms or the leader. In the case of an AUX area event, the 2nd
72  	 * event in the group is the one that 'leads' the sampling.
73  	 */
74  	freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
75  	if ((term_types & freq_mask) == 0) {
76  		attr->freq           = 0;
77  		attr->sample_freq    = 0;
78  		attr->sample_period  = 0;
79  	}
80  	if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
81  		attr->write_backward = 0;
82  
83  	/*
84  	 * We don't get a sample for slave events, we make them when delivering
85  	 * the group leader sample. Set the slave event to follow the master
86  	 * sample_type to ease up reporting.
87  	 * An AUX area event also has sample_type requirements, so also include
88  	 * the sample type bits from the leader's sample_type to cover that
89  	 * case.
90  	 */
91  	attr->sample_type = read_sampler->core.attr.sample_type |
92  			    leader->core.attr.sample_type;
93  }
94  
evlist__config(struct evlist * evlist,struct record_opts * opts,struct callchain_param * callchain)95  void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
96  {
97  	struct evsel *evsel;
98  	bool use_sample_identifier = false;
99  	bool use_comm_exec;
100  	bool sample_id = opts->sample_id;
101  
102  	if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
103  		opts->no_inherit = true;
104  
105  	use_comm_exec = perf_can_comm_exec();
106  
107  	evlist__for_each_entry(evlist, evsel) {
108  		evsel__config(evsel, opts, callchain);
109  		if (evsel->tracking && use_comm_exec)
110  			evsel->core.attr.comm_exec = 1;
111  	}
112  
113  	/* Configure leader sampling here now that the sample type is known */
114  	evlist__for_each_entry(evlist, evsel)
115  		evsel__config_leader_sampling(evsel, evlist);
116  
117  	if (opts->full_auxtrace || opts->sample_identifier) {
118  		/*
119  		 * Need to be able to synthesize and parse selected events with
120  		 * arbitrary sample types, which requires always being able to
121  		 * match the id.
122  		 */
123  		use_sample_identifier = perf_can_sample_identifier();
124  		sample_id = true;
125  	} else if (evlist->core.nr_entries > 1) {
126  		struct evsel *first = evlist__first(evlist);
127  
128  		evlist__for_each_entry(evlist, evsel) {
129  			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
130  				continue;
131  			use_sample_identifier = perf_can_sample_identifier();
132  			break;
133  		}
134  		sample_id = true;
135  	}
136  
137  	if (sample_id) {
138  		evlist__for_each_entry(evlist, evsel)
139  			evsel__set_sample_id(evsel, use_sample_identifier);
140  	}
141  
142  	evlist__set_id_pos(evlist);
143  }
144  
get_max_rate(unsigned int * rate)145  static int get_max_rate(unsigned int *rate)
146  {
147  	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
148  }
149  
record_opts__config_freq(struct record_opts * opts)150  static int record_opts__config_freq(struct record_opts *opts)
151  {
152  	bool user_freq = opts->user_freq != UINT_MAX;
153  	bool user_interval = opts->user_interval != ULLONG_MAX;
154  	unsigned int max_rate;
155  
156  	if (user_interval && user_freq) {
157  		pr_err("cannot set frequency and period at the same time\n");
158  		return -1;
159  	}
160  
161  	if (user_interval)
162  		opts->default_interval = opts->user_interval;
163  	if (user_freq)
164  		opts->freq = opts->user_freq;
165  
166  	/*
167  	 * User specified count overrides default frequency.
168  	 */
169  	if (opts->default_interval)
170  		opts->freq = 0;
171  	else if (opts->freq) {
172  		opts->default_interval = opts->freq;
173  	} else {
174  		pr_err("frequency and count are zero, aborting\n");
175  		return -1;
176  	}
177  
178  	if (get_max_rate(&max_rate))
179  		return 0;
180  
181  	/*
182  	 * User specified frequency is over current maximum.
183  	 */
184  	if (user_freq && (max_rate < opts->freq)) {
185  		if (opts->strict_freq) {
186  			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
187  			       "       Please use -F freq option with a lower value or consider\n"
188  			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
189  			       max_rate);
190  			return -1;
191  		} else {
192  			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
193  				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
194  				   "         The kernel will lower it when perf's interrupts take too long.\n"
195  				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
196  				   max_rate, opts->freq, max_rate);
197  
198  			opts->freq = max_rate;
199  		}
200  	}
201  
202  	/*
203  	 * Default frequency is over current maximum.
204  	 */
205  	if (max_rate < opts->freq) {
206  		pr_warning("Lowering default frequency rate from %u to %u.\n"
207  			   "Please consider tweaking "
208  			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
209  			   opts->freq, max_rate);
210  		opts->freq = max_rate;
211  	}
212  
213  	return 0;
214  }
215  
record_opts__config(struct record_opts * opts)216  int record_opts__config(struct record_opts *opts)
217  {
218  	return record_opts__config_freq(opts);
219  }
220  
evlist__can_select_event(struct evlist * evlist,const char * str)221  bool evlist__can_select_event(struct evlist *evlist, const char *str)
222  {
223  	struct evlist *temp_evlist;
224  	struct evsel *evsel;
225  	int err, fd;
226  	struct perf_cpu cpu = { .cpu = 0 };
227  	bool ret = false;
228  	pid_t pid = -1;
229  
230  	temp_evlist = evlist__new();
231  	if (!temp_evlist)
232  		return false;
233  
234  	err = parse_event(temp_evlist, str);
235  	if (err)
236  		goto out_delete;
237  
238  	evsel = evlist__last(temp_evlist);
239  
240  	if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
241  		struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
242  
243  		if (cpus)
244  			cpu =  perf_cpu_map__cpu(cpus, 0);
245  
246  		perf_cpu_map__put(cpus);
247  	} else {
248  		cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0);
249  	}
250  
251  	while (1) {
252  		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1,
253  					 perf_event_open_cloexec_flag());
254  		if (fd < 0) {
255  			if (pid == -1 && errno == EACCES) {
256  				pid = 0;
257  				continue;
258  			}
259  			goto out_delete;
260  		}
261  		break;
262  	}
263  	close(fd);
264  	ret = true;
265  
266  out_delete:
267  	evlist__delete(temp_evlist);
268  	return ret;
269  }
270  
record__parse_freq(const struct option * opt,const char * str,int unset __maybe_unused)271  int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
272  {
273  	unsigned int freq;
274  	struct record_opts *opts = opt->value;
275  
276  	if (!str)
277  		return -EINVAL;
278  
279  	if (strcasecmp(str, "max") == 0) {
280  		if (get_max_rate(&freq)) {
281  			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
282  			return -1;
283  		}
284  		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
285  	} else {
286  		freq = atoi(str);
287  	}
288  
289  	opts->user_freq = freq;
290  	return 0;
291  }
292