1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4   *
5   * Parts came from builtin-{top,stat,record}.c, see those files for further
6   * copyright notes.
7   */
8  
9  #include <byteswap.h>
10  #include <errno.h>
11  #include <inttypes.h>
12  #include <linux/bitops.h>
13  #include <api/io.h>
14  #include <api/fs/fs.h>
15  #include <api/fs/tracing_path.h>
16  #include <linux/hw_breakpoint.h>
17  #include <linux/perf_event.h>
18  #include <linux/compiler.h>
19  #include <linux/err.h>
20  #include <linux/zalloc.h>
21  #include <sys/ioctl.h>
22  #include <sys/resource.h>
23  #include <sys/types.h>
24  #include <dirent.h>
25  #include <stdlib.h>
26  #include <perf/evsel.h>
27  #include "asm/bug.h"
28  #include "bpf_counter.h"
29  #include "callchain.h"
30  #include "cgroup.h"
31  #include "counts.h"
32  #include "event.h"
33  #include "evsel.h"
34  #include "time-utils.h"
35  #include "util/env.h"
36  #include "util/evsel_config.h"
37  #include "util/evsel_fprintf.h"
38  #include "evlist.h"
39  #include <perf/cpumap.h>
40  #include "thread_map.h"
41  #include "target.h"
42  #include "perf_regs.h"
43  #include "record.h"
44  #include "debug.h"
45  #include "trace-event.h"
46  #include "stat.h"
47  #include "string2.h"
48  #include "memswap.h"
49  #include "util.h"
50  #include "util/hashmap.h"
51  #include "off_cpu.h"
52  #include "pmu.h"
53  #include "pmus.h"
54  #include "rlimit.h"
55  #include "../perf-sys.h"
56  #include "util/parse-branch-options.h"
57  #include "util/bpf-filter.h"
58  #include "util/hist.h"
59  #include <internal/xyarray.h>
60  #include <internal/lib.h>
61  #include <internal/threadmap.h>
62  #include "util/intel-tpebs.h"
63  
64  #include <linux/ctype.h>
65  
66  #ifdef HAVE_LIBTRACEEVENT
67  #include <traceevent/event-parse.h>
68  #endif
69  
70  struct perf_missing_features perf_missing_features;
71  
72  static clockid_t clockid;
73  
74  static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
75  	NULL,
76  	"duration_time",
77  	"user_time",
78  	"system_time",
79  };
80  
perf_tool_event__to_str(enum perf_tool_event ev)81  const char *perf_tool_event__to_str(enum perf_tool_event ev)
82  {
83  	if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
84  		return perf_tool_event__tool_names[ev];
85  
86  	return NULL;
87  }
88  
perf_tool_event__from_str(const char * str)89  enum perf_tool_event perf_tool_event__from_str(const char *str)
90  {
91  	int i;
92  
93  	perf_tool_event__for_each_event(i) {
94  		if (!strcmp(str, perf_tool_event__tool_names[i]))
95  			return i;
96  	}
97  	return PERF_TOOL_NONE;
98  }
99  
100  
evsel__no_extra_init(struct evsel * evsel __maybe_unused)101  static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
102  {
103  	return 0;
104  }
105  
test_attr__ready(void)106  void __weak test_attr__ready(void) { }
107  
evsel__no_extra_fini(struct evsel * evsel __maybe_unused)108  static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
109  {
110  }
111  
112  static struct {
113  	size_t	size;
114  	int	(*init)(struct evsel *evsel);
115  	void	(*fini)(struct evsel *evsel);
116  } perf_evsel__object = {
117  	.size = sizeof(struct evsel),
118  	.init = evsel__no_extra_init,
119  	.fini = evsel__no_extra_fini,
120  };
121  
evsel__object_config(size_t object_size,int (* init)(struct evsel * evsel),void (* fini)(struct evsel * evsel))122  int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
123  			 void (*fini)(struct evsel *evsel))
124  {
125  
126  	if (object_size == 0)
127  		goto set_methods;
128  
129  	if (perf_evsel__object.size > object_size)
130  		return -EINVAL;
131  
132  	perf_evsel__object.size = object_size;
133  
134  set_methods:
135  	if (init != NULL)
136  		perf_evsel__object.init = init;
137  
138  	if (fini != NULL)
139  		perf_evsel__object.fini = fini;
140  
141  	return 0;
142  }
143  
144  #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
145  
__evsel__sample_size(u64 sample_type)146  int __evsel__sample_size(u64 sample_type)
147  {
148  	u64 mask = sample_type & PERF_SAMPLE_MASK;
149  	int size = 0;
150  	int i;
151  
152  	for (i = 0; i < 64; i++) {
153  		if (mask & (1ULL << i))
154  			size++;
155  	}
156  
157  	size *= sizeof(u64);
158  
159  	return size;
160  }
161  
162  /**
163   * __perf_evsel__calc_id_pos - calculate id_pos.
164   * @sample_type: sample type
165   *
166   * This function returns the position of the event id (PERF_SAMPLE_ID or
167   * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
168   * perf_record_sample.
169   */
__perf_evsel__calc_id_pos(u64 sample_type)170  static int __perf_evsel__calc_id_pos(u64 sample_type)
171  {
172  	int idx = 0;
173  
174  	if (sample_type & PERF_SAMPLE_IDENTIFIER)
175  		return 0;
176  
177  	if (!(sample_type & PERF_SAMPLE_ID))
178  		return -1;
179  
180  	if (sample_type & PERF_SAMPLE_IP)
181  		idx += 1;
182  
183  	if (sample_type & PERF_SAMPLE_TID)
184  		idx += 1;
185  
186  	if (sample_type & PERF_SAMPLE_TIME)
187  		idx += 1;
188  
189  	if (sample_type & PERF_SAMPLE_ADDR)
190  		idx += 1;
191  
192  	return idx;
193  }
194  
195  /**
196   * __perf_evsel__calc_is_pos - calculate is_pos.
197   * @sample_type: sample type
198   *
199   * This function returns the position (counting backwards) of the event id
200   * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
201   * sample_id_all is used there is an id sample appended to non-sample events.
202   */
__perf_evsel__calc_is_pos(u64 sample_type)203  static int __perf_evsel__calc_is_pos(u64 sample_type)
204  {
205  	int idx = 1;
206  
207  	if (sample_type & PERF_SAMPLE_IDENTIFIER)
208  		return 1;
209  
210  	if (!(sample_type & PERF_SAMPLE_ID))
211  		return -1;
212  
213  	if (sample_type & PERF_SAMPLE_CPU)
214  		idx += 1;
215  
216  	if (sample_type & PERF_SAMPLE_STREAM_ID)
217  		idx += 1;
218  
219  	return idx;
220  }
221  
evsel__calc_id_pos(struct evsel * evsel)222  void evsel__calc_id_pos(struct evsel *evsel)
223  {
224  	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
225  	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
226  }
227  
__evsel__set_sample_bit(struct evsel * evsel,enum perf_event_sample_format bit)228  void __evsel__set_sample_bit(struct evsel *evsel,
229  				  enum perf_event_sample_format bit)
230  {
231  	if (!(evsel->core.attr.sample_type & bit)) {
232  		evsel->core.attr.sample_type |= bit;
233  		evsel->sample_size += sizeof(u64);
234  		evsel__calc_id_pos(evsel);
235  	}
236  }
237  
__evsel__reset_sample_bit(struct evsel * evsel,enum perf_event_sample_format bit)238  void __evsel__reset_sample_bit(struct evsel *evsel,
239  				    enum perf_event_sample_format bit)
240  {
241  	if (evsel->core.attr.sample_type & bit) {
242  		evsel->core.attr.sample_type &= ~bit;
243  		evsel->sample_size -= sizeof(u64);
244  		evsel__calc_id_pos(evsel);
245  	}
246  }
247  
evsel__set_sample_id(struct evsel * evsel,bool can_sample_identifier)248  void evsel__set_sample_id(struct evsel *evsel,
249  			       bool can_sample_identifier)
250  {
251  	if (can_sample_identifier) {
252  		evsel__reset_sample_bit(evsel, ID);
253  		evsel__set_sample_bit(evsel, IDENTIFIER);
254  	} else {
255  		evsel__set_sample_bit(evsel, ID);
256  	}
257  	evsel->core.attr.read_format |= PERF_FORMAT_ID;
258  }
259  
260  /**
261   * evsel__is_function_event - Return whether given evsel is a function
262   * trace event
263   *
264   * @evsel - evsel selector to be tested
265   *
266   * Return %true if event is function trace event
267   */
evsel__is_function_event(struct evsel * evsel)268  bool evsel__is_function_event(struct evsel *evsel)
269  {
270  #define FUNCTION_EVENT "ftrace:function"
271  
272  	return evsel->name &&
273  	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
274  
275  #undef FUNCTION_EVENT
276  }
277  
evsel__init(struct evsel * evsel,struct perf_event_attr * attr,int idx)278  void evsel__init(struct evsel *evsel,
279  		 struct perf_event_attr *attr, int idx)
280  {
281  	perf_evsel__init(&evsel->core, attr, idx);
282  	evsel->tracking	   = !idx;
283  	evsel->unit	   = strdup("");
284  	evsel->scale	   = 1.0;
285  	evsel->max_events  = ULONG_MAX;
286  	evsel->evlist	   = NULL;
287  	evsel->bpf_obj	   = NULL;
288  	evsel->bpf_fd	   = -1;
289  	INIT_LIST_HEAD(&evsel->config_terms);
290  	INIT_LIST_HEAD(&evsel->bpf_counter_list);
291  	INIT_LIST_HEAD(&evsel->bpf_filters);
292  	perf_evsel__object.init(evsel);
293  	evsel->sample_size = __evsel__sample_size(attr->sample_type);
294  	evsel__calc_id_pos(evsel);
295  	evsel->cmdline_group_boundary = false;
296  	evsel->metric_events = NULL;
297  	evsel->per_pkg_mask  = NULL;
298  	evsel->collect_stat  = false;
299  	evsel->pmu_name      = NULL;
300  	evsel->group_pmu_name = NULL;
301  	evsel->skippable     = false;
302  }
303  
evsel__new_idx(struct perf_event_attr * attr,int idx)304  struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
305  {
306  	struct evsel *evsel = zalloc(perf_evsel__object.size);
307  
308  	if (!evsel)
309  		return NULL;
310  	evsel__init(evsel, attr, idx);
311  
312  	if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
313  		evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
314  					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
315  		evsel->core.attr.sample_period = 1;
316  	}
317  
318  	if (evsel__is_clock(evsel)) {
319  		free((char *)evsel->unit);
320  		evsel->unit = strdup("msec");
321  		evsel->scale = 1e-6;
322  	}
323  
324  	return evsel;
325  }
326  
copy_config_terms(struct list_head * dst,struct list_head * src)327  int copy_config_terms(struct list_head *dst, struct list_head *src)
328  {
329  	struct evsel_config_term *pos, *tmp;
330  
331  	list_for_each_entry(pos, src, list) {
332  		tmp = malloc(sizeof(*tmp));
333  		if (tmp == NULL)
334  			return -ENOMEM;
335  
336  		*tmp = *pos;
337  		if (tmp->free_str) {
338  			tmp->val.str = strdup(pos->val.str);
339  			if (tmp->val.str == NULL) {
340  				free(tmp);
341  				return -ENOMEM;
342  			}
343  		}
344  		list_add_tail(&tmp->list, dst);
345  	}
346  	return 0;
347  }
348  
evsel__copy_config_terms(struct evsel * dst,struct evsel * src)349  static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
350  {
351  	return copy_config_terms(&dst->config_terms, &src->config_terms);
352  }
353  
354  /**
355   * evsel__clone - create a new evsel copied from @orig
356   * @orig: original evsel
357   *
358   * The assumption is that @orig is not configured nor opened yet.
359   * So we only care about the attributes that can be set while it's parsed.
360   */
evsel__clone(struct evsel * orig)361  struct evsel *evsel__clone(struct evsel *orig)
362  {
363  	struct evsel *evsel;
364  
365  	BUG_ON(orig->core.fd);
366  	BUG_ON(orig->counts);
367  	BUG_ON(orig->priv);
368  	BUG_ON(orig->per_pkg_mask);
369  
370  	/* cannot handle BPF objects for now */
371  	if (orig->bpf_obj)
372  		return NULL;
373  
374  	evsel = evsel__new(&orig->core.attr);
375  	if (evsel == NULL)
376  		return NULL;
377  
378  	evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
379  	evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
380  	evsel->core.threads = perf_thread_map__get(orig->core.threads);
381  	evsel->core.nr_members = orig->core.nr_members;
382  	evsel->core.system_wide = orig->core.system_wide;
383  	evsel->core.requires_cpu = orig->core.requires_cpu;
384  	evsel->core.is_pmu_core = orig->core.is_pmu_core;
385  
386  	if (orig->name) {
387  		evsel->name = strdup(orig->name);
388  		if (evsel->name == NULL)
389  			goto out_err;
390  	}
391  	if (orig->group_name) {
392  		evsel->group_name = strdup(orig->group_name);
393  		if (evsel->group_name == NULL)
394  			goto out_err;
395  	}
396  	if (orig->pmu_name) {
397  		evsel->pmu_name = strdup(orig->pmu_name);
398  		if (evsel->pmu_name == NULL)
399  			goto out_err;
400  	}
401  	if (orig->group_pmu_name) {
402  		evsel->group_pmu_name = strdup(orig->group_pmu_name);
403  		if (evsel->group_pmu_name == NULL)
404  			goto out_err;
405  	}
406  	if (orig->filter) {
407  		evsel->filter = strdup(orig->filter);
408  		if (evsel->filter == NULL)
409  			goto out_err;
410  	}
411  	if (orig->metric_id) {
412  		evsel->metric_id = strdup(orig->metric_id);
413  		if (evsel->metric_id == NULL)
414  			goto out_err;
415  	}
416  	evsel->cgrp = cgroup__get(orig->cgrp);
417  #ifdef HAVE_LIBTRACEEVENT
418  	evsel->tp_format = orig->tp_format;
419  #endif
420  	evsel->handler = orig->handler;
421  	evsel->core.leader = orig->core.leader;
422  
423  	evsel->max_events = orig->max_events;
424  	evsel->tool_event = orig->tool_event;
425  	free((char *)evsel->unit);
426  	evsel->unit = strdup(orig->unit);
427  	if (evsel->unit == NULL)
428  		goto out_err;
429  
430  	evsel->scale = orig->scale;
431  	evsel->snapshot = orig->snapshot;
432  	evsel->per_pkg = orig->per_pkg;
433  	evsel->percore = orig->percore;
434  	evsel->precise_max = orig->precise_max;
435  	evsel->is_libpfm_event = orig->is_libpfm_event;
436  
437  	evsel->exclude_GH = orig->exclude_GH;
438  	evsel->sample_read = orig->sample_read;
439  	evsel->auto_merge_stats = orig->auto_merge_stats;
440  	evsel->collect_stat = orig->collect_stat;
441  	evsel->weak_group = orig->weak_group;
442  	evsel->use_config_name = orig->use_config_name;
443  	evsel->pmu = orig->pmu;
444  
445  	if (evsel__copy_config_terms(evsel, orig) < 0)
446  		goto out_err;
447  
448  	return evsel;
449  
450  out_err:
451  	evsel__delete(evsel);
452  	return NULL;
453  }
454  
455  /*
456   * Returns pointer with encoded error via <linux/err.h> interface.
457   */
458  #ifdef HAVE_LIBTRACEEVENT
evsel__newtp_idx(const char * sys,const char * name,int idx,bool format)459  struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format)
460  {
461  	struct evsel *evsel = zalloc(perf_evsel__object.size);
462  	int err = -ENOMEM;
463  
464  	if (evsel == NULL) {
465  		goto out_err;
466  	} else {
467  		struct perf_event_attr attr = {
468  			.type	       = PERF_TYPE_TRACEPOINT,
469  			.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
470  					  PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
471  		};
472  
473  		if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
474  			goto out_free;
475  
476  		event_attr_init(&attr);
477  
478  		if (format) {
479  			evsel->tp_format = trace_event__tp_format(sys, name);
480  			if (IS_ERR(evsel->tp_format)) {
481  				err = PTR_ERR(evsel->tp_format);
482  				goto out_free;
483  			}
484  			attr.config = evsel->tp_format->id;
485  		} else {
486  			attr.config = (__u64) -1;
487  		}
488  
489  
490  		attr.sample_period = 1;
491  		evsel__init(evsel, &attr, idx);
492  	}
493  
494  	return evsel;
495  
496  out_free:
497  	zfree(&evsel->name);
498  	free(evsel);
499  out_err:
500  	return ERR_PTR(err);
501  }
502  #endif
503  
504  const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
505  	"cycles",
506  	"instructions",
507  	"cache-references",
508  	"cache-misses",
509  	"branches",
510  	"branch-misses",
511  	"bus-cycles",
512  	"stalled-cycles-frontend",
513  	"stalled-cycles-backend",
514  	"ref-cycles",
515  };
516  
517  char *evsel__bpf_counter_events;
518  
evsel__match_bpf_counter_events(const char * name)519  bool evsel__match_bpf_counter_events(const char *name)
520  {
521  	int name_len;
522  	bool match;
523  	char *ptr;
524  
525  	if (!evsel__bpf_counter_events)
526  		return false;
527  
528  	ptr = strstr(evsel__bpf_counter_events, name);
529  	name_len = strlen(name);
530  
531  	/* check name matches a full token in evsel__bpf_counter_events */
532  	match = (ptr != NULL) &&
533  		((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) &&
534  		((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0'));
535  
536  	return match;
537  }
538  
__evsel__hw_name(u64 config)539  static const char *__evsel__hw_name(u64 config)
540  {
541  	if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
542  		return evsel__hw_names[config];
543  
544  	return "unknown-hardware";
545  }
546  
evsel__add_modifiers(struct evsel * evsel,char * bf,size_t size)547  static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
548  {
549  	int colon = 0, r = 0;
550  	struct perf_event_attr *attr = &evsel->core.attr;
551  	bool exclude_guest_default = false;
552  
553  #define MOD_PRINT(context, mod)	do {					\
554  		if (!attr->exclude_##context) {				\
555  			if (!colon) colon = ++r;			\
556  			r += scnprintf(bf + r, size - r, "%c", mod);	\
557  		} } while(0)
558  
559  	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
560  		MOD_PRINT(kernel, 'k');
561  		MOD_PRINT(user, 'u');
562  		MOD_PRINT(hv, 'h');
563  		exclude_guest_default = true;
564  	}
565  
566  	if (attr->precise_ip) {
567  		if (!colon)
568  			colon = ++r;
569  		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
570  		exclude_guest_default = true;
571  	}
572  
573  	if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
574  		MOD_PRINT(host, 'H');
575  		MOD_PRINT(guest, 'G');
576  	}
577  #undef MOD_PRINT
578  	if (colon)
579  		bf[colon - 1] = ':';
580  	return r;
581  }
582  
arch_evsel__hw_name(struct evsel * evsel,char * bf,size_t size)583  int __weak arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
584  {
585  	return scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
586  }
587  
evsel__hw_name(struct evsel * evsel,char * bf,size_t size)588  static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
589  {
590  	int r = arch_evsel__hw_name(evsel, bf, size);
591  	return r + evsel__add_modifiers(evsel, bf + r, size - r);
592  }
593  
594  const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
595  	"cpu-clock",
596  	"task-clock",
597  	"page-faults",
598  	"context-switches",
599  	"cpu-migrations",
600  	"minor-faults",
601  	"major-faults",
602  	"alignment-faults",
603  	"emulation-faults",
604  	"dummy",
605  };
606  
__evsel__sw_name(u64 config)607  static const char *__evsel__sw_name(u64 config)
608  {
609  	if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
610  		return evsel__sw_names[config];
611  	return "unknown-software";
612  }
613  
evsel__sw_name(struct evsel * evsel,char * bf,size_t size)614  static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
615  {
616  	int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
617  	return r + evsel__add_modifiers(evsel, bf + r, size - r);
618  }
619  
evsel__tool_name(enum perf_tool_event ev,char * bf,size_t size)620  static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
621  {
622  	return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
623  }
624  
__evsel__bp_name(char * bf,size_t size,u64 addr,u64 type)625  static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
626  {
627  	int r;
628  
629  	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
630  
631  	if (type & HW_BREAKPOINT_R)
632  		r += scnprintf(bf + r, size - r, "r");
633  
634  	if (type & HW_BREAKPOINT_W)
635  		r += scnprintf(bf + r, size - r, "w");
636  
637  	if (type & HW_BREAKPOINT_X)
638  		r += scnprintf(bf + r, size - r, "x");
639  
640  	return r;
641  }
642  
evsel__bp_name(struct evsel * evsel,char * bf,size_t size)643  static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
644  {
645  	struct perf_event_attr *attr = &evsel->core.attr;
646  	int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
647  	return r + evsel__add_modifiers(evsel, bf + r, size - r);
648  }
649  
650  const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
651   { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
652   { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
653   { "LLC",	"L2",							},
654   { "dTLB",	"d-tlb",	"Data-TLB",				},
655   { "iTLB",	"i-tlb",	"Instruction-TLB",			},
656   { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
657   { "node",								},
658  };
659  
660  const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
661   { "load",	"loads",	"read",					},
662   { "store",	"stores",	"write",				},
663   { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
664  };
665  
666  const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
667   { "refs",	"Reference",	"ops",		"access",		},
668   { "misses",	"miss",							},
669  };
670  
671  #define C(x)		PERF_COUNT_HW_CACHE_##x
672  #define CACHE_READ	(1 << C(OP_READ))
673  #define CACHE_WRITE	(1 << C(OP_WRITE))
674  #define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
675  #define COP(x)		(1 << x)
676  
677  /*
678   * cache operation stat
679   * L1I : Read and prefetch only
680   * ITLB and BPU : Read-only
681   */
682  static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
683   [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
684   [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
685   [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
686   [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
687   [C(ITLB)]	= (CACHE_READ),
688   [C(BPU)]	= (CACHE_READ),
689   [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
690  };
691  
evsel__is_cache_op_valid(u8 type,u8 op)692  bool evsel__is_cache_op_valid(u8 type, u8 op)
693  {
694  	if (evsel__hw_cache_stat[type] & COP(op))
695  		return true;	/* valid */
696  	else
697  		return false;	/* invalid */
698  }
699  
__evsel__hw_cache_type_op_res_name(u8 type,u8 op,u8 result,char * bf,size_t size)700  int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
701  {
702  	if (result) {
703  		return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
704  				 evsel__hw_cache_op[op][0],
705  				 evsel__hw_cache_result[result][0]);
706  	}
707  
708  	return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
709  			 evsel__hw_cache_op[op][1]);
710  }
711  
__evsel__hw_cache_name(u64 config,char * bf,size_t size)712  static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
713  {
714  	u8 op, result, type = (config >>  0) & 0xff;
715  	const char *err = "unknown-ext-hardware-cache-type";
716  
717  	if (type >= PERF_COUNT_HW_CACHE_MAX)
718  		goto out_err;
719  
720  	op = (config >>  8) & 0xff;
721  	err = "unknown-ext-hardware-cache-op";
722  	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
723  		goto out_err;
724  
725  	result = (config >> 16) & 0xff;
726  	err = "unknown-ext-hardware-cache-result";
727  	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
728  		goto out_err;
729  
730  	err = "invalid-cache";
731  	if (!evsel__is_cache_op_valid(type, op))
732  		goto out_err;
733  
734  	return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
735  out_err:
736  	return scnprintf(bf, size, "%s", err);
737  }
738  
evsel__hw_cache_name(struct evsel * evsel,char * bf,size_t size)739  static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
740  {
741  	int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
742  	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
743  }
744  
evsel__raw_name(struct evsel * evsel,char * bf,size_t size)745  static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
746  {
747  	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
748  	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
749  }
750  
evsel__name(struct evsel * evsel)751  const char *evsel__name(struct evsel *evsel)
752  {
753  	char bf[128];
754  
755  	if (!evsel)
756  		goto out_unknown;
757  
758  	if (evsel->name)
759  		return evsel->name;
760  
761  	switch (evsel->core.attr.type) {
762  	case PERF_TYPE_RAW:
763  		evsel__raw_name(evsel, bf, sizeof(bf));
764  		break;
765  
766  	case PERF_TYPE_HARDWARE:
767  		evsel__hw_name(evsel, bf, sizeof(bf));
768  		break;
769  
770  	case PERF_TYPE_HW_CACHE:
771  		evsel__hw_cache_name(evsel, bf, sizeof(bf));
772  		break;
773  
774  	case PERF_TYPE_SOFTWARE:
775  		if (evsel__is_tool(evsel))
776  			evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf));
777  		else
778  			evsel__sw_name(evsel, bf, sizeof(bf));
779  		break;
780  
781  	case PERF_TYPE_TRACEPOINT:
782  		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
783  		break;
784  
785  	case PERF_TYPE_BREAKPOINT:
786  		evsel__bp_name(evsel, bf, sizeof(bf));
787  		break;
788  
789  	default:
790  		scnprintf(bf, sizeof(bf), "unknown attr type: %d",
791  			  evsel->core.attr.type);
792  		break;
793  	}
794  
795  	evsel->name = strdup(bf);
796  
797  	if (evsel->name)
798  		return evsel->name;
799  out_unknown:
800  	return "unknown";
801  }
802  
evsel__name_is(struct evsel * evsel,const char * name)803  bool evsel__name_is(struct evsel *evsel, const char *name)
804  {
805  	return !strcmp(evsel__name(evsel), name);
806  }
807  
evsel__metric_id(const struct evsel * evsel)808  const char *evsel__metric_id(const struct evsel *evsel)
809  {
810  	if (evsel->metric_id)
811  		return evsel->metric_id;
812  
813  	if (evsel__is_tool(evsel))
814  		return perf_tool_event__to_str(evsel__tool_event(evsel));
815  
816  	return "unknown";
817  }
818  
evsel__group_name(struct evsel * evsel)819  const char *evsel__group_name(struct evsel *evsel)
820  {
821  	return evsel->group_name ?: "anon group";
822  }
823  
824  /*
825   * Returns the group details for the specified leader,
826   * with following rules.
827   *
828   *  For record -e '{cycles,instructions}'
829   *    'anon group { cycles:u, instructions:u }'
830   *
831   *  For record -e 'cycles,instructions' and report --group
832   *    'cycles:u, instructions:u'
833   */
evsel__group_desc(struct evsel * evsel,char * buf,size_t size)834  int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
835  {
836  	int ret = 0;
837  	bool first = true;
838  	struct evsel *pos;
839  	const char *group_name = evsel__group_name(evsel);
840  
841  	if (!evsel->forced_leader)
842  		ret = scnprintf(buf, size, "%s { ", group_name);
843  
844  	for_each_group_evsel(pos, evsel) {
845  		if (symbol_conf.skip_empty &&
846  		    evsel__hists(pos)->stats.nr_samples == 0)
847  			continue;
848  
849  		ret += scnprintf(buf + ret, size - ret, "%s%s",
850  				 first ? "" : ", ", evsel__name(pos));
851  		first = false;
852  	}
853  
854  	if (!evsel->forced_leader)
855  		ret += scnprintf(buf + ret, size - ret, " }");
856  
857  	return ret;
858  }
859  
__evsel__config_callchain(struct evsel * evsel,struct record_opts * opts,struct callchain_param * param)860  static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
861  				      struct callchain_param *param)
862  {
863  	bool function = evsel__is_function_event(evsel);
864  	struct perf_event_attr *attr = &evsel->core.attr;
865  	const char *arch = perf_env__arch(evsel__env(evsel));
866  
867  	evsel__set_sample_bit(evsel, CALLCHAIN);
868  
869  	attr->sample_max_stack = param->max_stack;
870  
871  	if (opts->kernel_callchains)
872  		attr->exclude_callchain_user = 1;
873  	if (opts->user_callchains)
874  		attr->exclude_callchain_kernel = 1;
875  	if (param->record_mode == CALLCHAIN_LBR) {
876  		if (!opts->branch_stack) {
877  			if (attr->exclude_user) {
878  				pr_warning("LBR callstack option is only available "
879  					   "to get user callchain information. "
880  					   "Falling back to framepointers.\n");
881  			} else {
882  				evsel__set_sample_bit(evsel, BRANCH_STACK);
883  				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
884  							PERF_SAMPLE_BRANCH_CALL_STACK |
885  							PERF_SAMPLE_BRANCH_NO_CYCLES |
886  							PERF_SAMPLE_BRANCH_NO_FLAGS |
887  							PERF_SAMPLE_BRANCH_HW_INDEX;
888  			}
889  		} else
890  			 pr_warning("Cannot use LBR callstack with branch stack. "
891  				    "Falling back to framepointers.\n");
892  	}
893  
894  	if (param->record_mode == CALLCHAIN_DWARF) {
895  		if (!function) {
896  			evsel__set_sample_bit(evsel, REGS_USER);
897  			evsel__set_sample_bit(evsel, STACK_USER);
898  			if (opts->sample_user_regs &&
899  			    DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) {
900  				attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch);
901  				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
902  					   "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
903  					   "so the minimal registers set (IP, SP) is explicitly forced.\n");
904  			} else {
905  				attr->sample_regs_user |= arch__user_reg_mask();
906  			}
907  			attr->sample_stack_user = param->dump_size;
908  			attr->exclude_callchain_user = 1;
909  		} else {
910  			pr_info("Cannot use DWARF unwind for function trace event,"
911  				" falling back to framepointers.\n");
912  		}
913  	}
914  
915  	if (function) {
916  		pr_info("Disabling user space callchains for function trace event.\n");
917  		attr->exclude_callchain_user = 1;
918  	}
919  }
920  
evsel__config_callchain(struct evsel * evsel,struct record_opts * opts,struct callchain_param * param)921  void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
922  			     struct callchain_param *param)
923  {
924  	if (param->enabled)
925  		return __evsel__config_callchain(evsel, opts, param);
926  }
927  
evsel__reset_callgraph(struct evsel * evsel,struct callchain_param * param)928  static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
929  {
930  	struct perf_event_attr *attr = &evsel->core.attr;
931  
932  	evsel__reset_sample_bit(evsel, CALLCHAIN);
933  	if (param->record_mode == CALLCHAIN_LBR) {
934  		evsel__reset_sample_bit(evsel, BRANCH_STACK);
935  		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
936  					      PERF_SAMPLE_BRANCH_CALL_STACK |
937  					      PERF_SAMPLE_BRANCH_HW_INDEX);
938  	}
939  	if (param->record_mode == CALLCHAIN_DWARF) {
940  		evsel__reset_sample_bit(evsel, REGS_USER);
941  		evsel__reset_sample_bit(evsel, STACK_USER);
942  	}
943  }
944  
evsel__apply_config_terms(struct evsel * evsel,struct record_opts * opts,bool track)945  static void evsel__apply_config_terms(struct evsel *evsel,
946  				      struct record_opts *opts, bool track)
947  {
948  	struct evsel_config_term *term;
949  	struct list_head *config_terms = &evsel->config_terms;
950  	struct perf_event_attr *attr = &evsel->core.attr;
951  	/* callgraph default */
952  	struct callchain_param param = {
953  		.record_mode = callchain_param.record_mode,
954  	};
955  	u32 dump_size = 0;
956  	int max_stack = 0;
957  	const char *callgraph_buf = NULL;
958  
959  	list_for_each_entry(term, config_terms, list) {
960  		switch (term->type) {
961  		case EVSEL__CONFIG_TERM_PERIOD:
962  			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
963  				attr->sample_period = term->val.period;
964  				attr->freq = 0;
965  				evsel__reset_sample_bit(evsel, PERIOD);
966  			}
967  			break;
968  		case EVSEL__CONFIG_TERM_FREQ:
969  			if (!(term->weak && opts->user_freq != UINT_MAX)) {
970  				attr->sample_freq = term->val.freq;
971  				attr->freq = 1;
972  				evsel__set_sample_bit(evsel, PERIOD);
973  			}
974  			break;
975  		case EVSEL__CONFIG_TERM_TIME:
976  			if (term->val.time)
977  				evsel__set_sample_bit(evsel, TIME);
978  			else
979  				evsel__reset_sample_bit(evsel, TIME);
980  			break;
981  		case EVSEL__CONFIG_TERM_CALLGRAPH:
982  			callgraph_buf = term->val.str;
983  			break;
984  		case EVSEL__CONFIG_TERM_BRANCH:
985  			if (term->val.str && strcmp(term->val.str, "no")) {
986  				evsel__set_sample_bit(evsel, BRANCH_STACK);
987  				parse_branch_str(term->val.str,
988  						 &attr->branch_sample_type);
989  			} else
990  				evsel__reset_sample_bit(evsel, BRANCH_STACK);
991  			break;
992  		case EVSEL__CONFIG_TERM_STACK_USER:
993  			dump_size = term->val.stack_user;
994  			break;
995  		case EVSEL__CONFIG_TERM_MAX_STACK:
996  			max_stack = term->val.max_stack;
997  			break;
998  		case EVSEL__CONFIG_TERM_MAX_EVENTS:
999  			evsel->max_events = term->val.max_events;
1000  			break;
1001  		case EVSEL__CONFIG_TERM_INHERIT:
1002  			/*
1003  			 * attr->inherit should has already been set by
1004  			 * evsel__config. If user explicitly set
1005  			 * inherit using config terms, override global
1006  			 * opt->no_inherit setting.
1007  			 */
1008  			attr->inherit = term->val.inherit ? 1 : 0;
1009  			break;
1010  		case EVSEL__CONFIG_TERM_OVERWRITE:
1011  			attr->write_backward = term->val.overwrite ? 1 : 0;
1012  			break;
1013  		case EVSEL__CONFIG_TERM_DRV_CFG:
1014  			break;
1015  		case EVSEL__CONFIG_TERM_PERCORE:
1016  			break;
1017  		case EVSEL__CONFIG_TERM_AUX_OUTPUT:
1018  			attr->aux_output = term->val.aux_output ? 1 : 0;
1019  			break;
1020  		case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
1021  			/* Already applied by auxtrace */
1022  			break;
1023  		case EVSEL__CONFIG_TERM_CFG_CHG:
1024  			break;
1025  		default:
1026  			break;
1027  		}
1028  	}
1029  
1030  	/* User explicitly set per-event callgraph, clear the old setting and reset. */
1031  	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
1032  		bool sample_address = false;
1033  
1034  		if (max_stack) {
1035  			param.max_stack = max_stack;
1036  			if (callgraph_buf == NULL)
1037  				callgraph_buf = "fp";
1038  		}
1039  
1040  		/* parse callgraph parameters */
1041  		if (callgraph_buf != NULL) {
1042  			if (!strcmp(callgraph_buf, "no")) {
1043  				param.enabled = false;
1044  				param.record_mode = CALLCHAIN_NONE;
1045  			} else {
1046  				param.enabled = true;
1047  				if (parse_callchain_record(callgraph_buf, &param)) {
1048  					pr_err("per-event callgraph setting for %s failed. "
1049  					       "Apply callgraph global setting for it\n",
1050  					       evsel->name);
1051  					return;
1052  				}
1053  				if (param.record_mode == CALLCHAIN_DWARF)
1054  					sample_address = true;
1055  			}
1056  		}
1057  		if (dump_size > 0) {
1058  			dump_size = round_up(dump_size, sizeof(u64));
1059  			param.dump_size = dump_size;
1060  		}
1061  
1062  		/* If global callgraph set, clear it */
1063  		if (callchain_param.enabled)
1064  			evsel__reset_callgraph(evsel, &callchain_param);
1065  
1066  		/* set perf-event callgraph */
1067  		if (param.enabled) {
1068  			if (sample_address) {
1069  				evsel__set_sample_bit(evsel, ADDR);
1070  				evsel__set_sample_bit(evsel, DATA_SRC);
1071  				evsel->core.attr.mmap_data = track;
1072  			}
1073  			evsel__config_callchain(evsel, opts, &param);
1074  		}
1075  	}
1076  }
1077  
__evsel__get_config_term(struct evsel * evsel,enum evsel_term_type type)1078  struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
1079  {
1080  	struct evsel_config_term *term, *found_term = NULL;
1081  
1082  	list_for_each_entry(term, &evsel->config_terms, list) {
1083  		if (term->type == type)
1084  			found_term = term;
1085  	}
1086  
1087  	return found_term;
1088  }
1089  
arch_evsel__set_sample_weight(struct evsel * evsel)1090  void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
1091  {
1092  	evsel__set_sample_bit(evsel, WEIGHT);
1093  }
1094  
arch__post_evsel_config(struct evsel * evsel __maybe_unused,struct perf_event_attr * attr __maybe_unused)1095  void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
1096  				    struct perf_event_attr *attr __maybe_unused)
1097  {
1098  }
1099  
evsel__set_default_freq_period(struct record_opts * opts,struct perf_event_attr * attr)1100  static void evsel__set_default_freq_period(struct record_opts *opts,
1101  					   struct perf_event_attr *attr)
1102  {
1103  	if (opts->freq) {
1104  		attr->freq = 1;
1105  		attr->sample_freq = opts->freq;
1106  	} else {
1107  		attr->sample_period = opts->default_interval;
1108  	}
1109  }
1110  
evsel__is_offcpu_event(struct evsel * evsel)1111  static bool evsel__is_offcpu_event(struct evsel *evsel)
1112  {
1113  	return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT);
1114  }
1115  
1116  /*
1117   * The enable_on_exec/disabled value strategy:
1118   *
1119   *  1) For any type of traced program:
1120   *    - all independent events and group leaders are disabled
1121   *    - all group members are enabled
1122   *
1123   *     Group members are ruled by group leaders. They need to
1124   *     be enabled, because the group scheduling relies on that.
1125   *
1126   *  2) For traced programs executed by perf:
1127   *     - all independent events and group leaders have
1128   *       enable_on_exec set
1129   *     - we don't specifically enable or disable any event during
1130   *       the record command
1131   *
1132   *     Independent events and group leaders are initially disabled
1133   *     and get enabled by exec. Group members are ruled by group
1134   *     leaders as stated in 1).
1135   *
1136   *  3) For traced programs attached by perf (pid/tid):
1137   *     - we specifically enable or disable all events during
1138   *       the record command
1139   *
1140   *     When attaching events to already running traced we
1141   *     enable/disable events specifically, as there's no
1142   *     initial traced exec call.
1143   */
evsel__config(struct evsel * evsel,struct record_opts * opts,struct callchain_param * callchain)1144  void evsel__config(struct evsel *evsel, struct record_opts *opts,
1145  		   struct callchain_param *callchain)
1146  {
1147  	struct evsel *leader = evsel__leader(evsel);
1148  	struct perf_event_attr *attr = &evsel->core.attr;
1149  	int track = evsel->tracking;
1150  	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
1151  
1152  	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
1153  	attr->inherit	    = !opts->no_inherit;
1154  	attr->write_backward = opts->overwrite ? 1 : 0;
1155  	attr->read_format   = PERF_FORMAT_LOST;
1156  
1157  	evsel__set_sample_bit(evsel, IP);
1158  	evsel__set_sample_bit(evsel, TID);
1159  
1160  	if (evsel->sample_read) {
1161  		evsel__set_sample_bit(evsel, READ);
1162  
1163  		/*
1164  		 * We need ID even in case of single event, because
1165  		 * PERF_SAMPLE_READ process ID specific data.
1166  		 */
1167  		evsel__set_sample_id(evsel, false);
1168  
1169  		/*
1170  		 * Apply group format only if we belong to group
1171  		 * with more than one members.
1172  		 */
1173  		if (leader->core.nr_members > 1) {
1174  			attr->read_format |= PERF_FORMAT_GROUP;
1175  			attr->inherit = 0;
1176  		}
1177  	}
1178  
1179  	/*
1180  	 * We default some events to have a default interval. But keep
1181  	 * it a weak assumption overridable by the user.
1182  	 */
1183  	if ((evsel->is_libpfm_event && !attr->sample_period) ||
1184  	    (!evsel->is_libpfm_event && (!attr->sample_period ||
1185  					 opts->user_freq != UINT_MAX ||
1186  					 opts->user_interval != ULLONG_MAX)))
1187  		evsel__set_default_freq_period(opts, attr);
1188  
1189  	/*
1190  	 * If attr->freq was set (here or earlier), ask for period
1191  	 * to be sampled.
1192  	 */
1193  	if (attr->freq)
1194  		evsel__set_sample_bit(evsel, PERIOD);
1195  
1196  	if (opts->no_samples)
1197  		attr->sample_freq = 0;
1198  
1199  	if (opts->inherit_stat) {
1200  		evsel->core.attr.read_format |=
1201  			PERF_FORMAT_TOTAL_TIME_ENABLED |
1202  			PERF_FORMAT_TOTAL_TIME_RUNNING |
1203  			PERF_FORMAT_ID;
1204  		attr->inherit_stat = 1;
1205  	}
1206  
1207  	if (opts->sample_address) {
1208  		evsel__set_sample_bit(evsel, ADDR);
1209  		attr->mmap_data = track;
1210  	}
1211  
1212  	/*
1213  	 * We don't allow user space callchains for  function trace
1214  	 * event, due to issues with page faults while tracing page
1215  	 * fault handler and its overall trickiness nature.
1216  	 */
1217  	if (evsel__is_function_event(evsel))
1218  		evsel->core.attr.exclude_callchain_user = 1;
1219  
1220  	if (callchain && callchain->enabled && !evsel->no_aux_samples)
1221  		evsel__config_callchain(evsel, opts, callchain);
1222  
1223  	if (opts->sample_intr_regs && !evsel->no_aux_samples &&
1224  	    !evsel__is_dummy_event(evsel)) {
1225  		attr->sample_regs_intr = opts->sample_intr_regs;
1226  		evsel__set_sample_bit(evsel, REGS_INTR);
1227  	}
1228  
1229  	if (opts->sample_user_regs && !evsel->no_aux_samples &&
1230  	    !evsel__is_dummy_event(evsel)) {
1231  		attr->sample_regs_user |= opts->sample_user_regs;
1232  		evsel__set_sample_bit(evsel, REGS_USER);
1233  	}
1234  
1235  	if (target__has_cpu(&opts->target) || opts->sample_cpu)
1236  		evsel__set_sample_bit(evsel, CPU);
1237  
1238  	/*
1239  	 * When the user explicitly disabled time don't force it here.
1240  	 */
1241  	if (opts->sample_time &&
1242  	    (!perf_missing_features.sample_id_all &&
1243  	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
1244  	     opts->sample_time_set)))
1245  		evsel__set_sample_bit(evsel, TIME);
1246  
1247  	if (opts->raw_samples && !evsel->no_aux_samples) {
1248  		evsel__set_sample_bit(evsel, TIME);
1249  		evsel__set_sample_bit(evsel, RAW);
1250  		evsel__set_sample_bit(evsel, CPU);
1251  	}
1252  
1253  	if (opts->sample_address)
1254  		evsel__set_sample_bit(evsel, DATA_SRC);
1255  
1256  	if (opts->sample_phys_addr)
1257  		evsel__set_sample_bit(evsel, PHYS_ADDR);
1258  
1259  	if (opts->no_buffering) {
1260  		attr->watermark = 0;
1261  		attr->wakeup_events = 1;
1262  	}
1263  	if (opts->branch_stack && !evsel->no_aux_samples) {
1264  		evsel__set_sample_bit(evsel, BRANCH_STACK);
1265  		attr->branch_sample_type = opts->branch_stack;
1266  	}
1267  
1268  	if (opts->sample_weight)
1269  		arch_evsel__set_sample_weight(evsel);
1270  
1271  	attr->task     = track;
1272  	attr->mmap     = track;
1273  	attr->mmap2    = track && !perf_missing_features.mmap2;
1274  	attr->comm     = track;
1275  	attr->build_id = track && opts->build_id;
1276  
1277  	/*
1278  	 * ksymbol is tracked separately with text poke because it needs to be
1279  	 * system wide and enabled immediately.
1280  	 */
1281  	if (!opts->text_poke)
1282  		attr->ksymbol = track && !perf_missing_features.ksymbol;
1283  	attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
1284  
1285  	if (opts->record_namespaces)
1286  		attr->namespaces  = track;
1287  
1288  	if (opts->record_cgroup) {
1289  		attr->cgroup = track && !perf_missing_features.cgroup;
1290  		evsel__set_sample_bit(evsel, CGROUP);
1291  	}
1292  
1293  	if (opts->sample_data_page_size)
1294  		evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
1295  
1296  	if (opts->sample_code_page_size)
1297  		evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
1298  
1299  	if (opts->record_switch_events)
1300  		attr->context_switch = track;
1301  
1302  	if (opts->sample_transaction)
1303  		evsel__set_sample_bit(evsel, TRANSACTION);
1304  
1305  	if (opts->running_time) {
1306  		evsel->core.attr.read_format |=
1307  			PERF_FORMAT_TOTAL_TIME_ENABLED |
1308  			PERF_FORMAT_TOTAL_TIME_RUNNING;
1309  	}
1310  
1311  	/*
1312  	 * XXX see the function comment above
1313  	 *
1314  	 * Disabling only independent events or group leaders,
1315  	 * keeping group members enabled.
1316  	 */
1317  	if (evsel__is_group_leader(evsel))
1318  		attr->disabled = 1;
1319  
1320  	/*
1321  	 * Setting enable_on_exec for independent events and
1322  	 * group leaders for traced executed by perf.
1323  	 */
1324  	if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
1325  	    !opts->target.initial_delay)
1326  		attr->enable_on_exec = 1;
1327  
1328  	if (evsel->immediate) {
1329  		attr->disabled = 0;
1330  		attr->enable_on_exec = 0;
1331  	}
1332  
1333  	clockid = opts->clockid;
1334  	if (opts->use_clockid) {
1335  		attr->use_clockid = 1;
1336  		attr->clockid = opts->clockid;
1337  	}
1338  
1339  	if (evsel->precise_max)
1340  		attr->precise_ip = 3;
1341  
1342  	if (opts->all_user) {
1343  		attr->exclude_kernel = 1;
1344  		attr->exclude_user   = 0;
1345  	}
1346  
1347  	if (opts->all_kernel) {
1348  		attr->exclude_kernel = 0;
1349  		attr->exclude_user   = 1;
1350  	}
1351  
1352  	if (evsel->core.own_cpus || evsel->unit)
1353  		evsel->core.attr.read_format |= PERF_FORMAT_ID;
1354  
1355  	/*
1356  	 * Apply event specific term settings,
1357  	 * it overloads any global configuration.
1358  	 */
1359  	evsel__apply_config_terms(evsel, opts, track);
1360  
1361  	evsel->ignore_missing_thread = opts->ignore_missing_thread;
1362  
1363  	/* The --period option takes the precedence. */
1364  	if (opts->period_set) {
1365  		if (opts->period)
1366  			evsel__set_sample_bit(evsel, PERIOD);
1367  		else
1368  			evsel__reset_sample_bit(evsel, PERIOD);
1369  	}
1370  
1371  	/*
1372  	 * A dummy event never triggers any actual counter and therefore
1373  	 * cannot be used with branch_stack.
1374  	 *
1375  	 * For initial_delay, a dummy event is added implicitly.
1376  	 * The software event will trigger -EOPNOTSUPP error out,
1377  	 * if BRANCH_STACK bit is set.
1378  	 */
1379  	if (evsel__is_dummy_event(evsel))
1380  		evsel__reset_sample_bit(evsel, BRANCH_STACK);
1381  
1382  	if (evsel__is_offcpu_event(evsel))
1383  		evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
1384  
1385  	arch__post_evsel_config(evsel, attr);
1386  }
1387  
evsel__set_filter(struct evsel * evsel,const char * filter)1388  int evsel__set_filter(struct evsel *evsel, const char *filter)
1389  {
1390  	char *new_filter = strdup(filter);
1391  
1392  	if (new_filter != NULL) {
1393  		free(evsel->filter);
1394  		evsel->filter = new_filter;
1395  		return 0;
1396  	}
1397  
1398  	return -1;
1399  }
1400  
evsel__append_filter(struct evsel * evsel,const char * fmt,const char * filter)1401  static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
1402  {
1403  	char *new_filter;
1404  
1405  	if (evsel->filter == NULL)
1406  		return evsel__set_filter(evsel, filter);
1407  
1408  	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1409  		free(evsel->filter);
1410  		evsel->filter = new_filter;
1411  		return 0;
1412  	}
1413  
1414  	return -1;
1415  }
1416  
evsel__append_tp_filter(struct evsel * evsel,const char * filter)1417  int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
1418  {
1419  	return evsel__append_filter(evsel, "(%s) && (%s)", filter);
1420  }
1421  
evsel__append_addr_filter(struct evsel * evsel,const char * filter)1422  int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
1423  {
1424  	return evsel__append_filter(evsel, "%s,%s", filter);
1425  }
1426  
1427  /* Caller has to clear disabled after going through all CPUs. */
evsel__enable_cpu(struct evsel * evsel,int cpu_map_idx)1428  int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
1429  {
1430  	return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
1431  }
1432  
evsel__enable(struct evsel * evsel)1433  int evsel__enable(struct evsel *evsel)
1434  {
1435  	int err = perf_evsel__enable(&evsel->core);
1436  
1437  	if (!err)
1438  		evsel->disabled = false;
1439  	return err;
1440  }
1441  
1442  /* Caller has to set disabled after going through all CPUs. */
evsel__disable_cpu(struct evsel * evsel,int cpu_map_idx)1443  int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
1444  {
1445  	return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
1446  }
1447  
evsel__disable(struct evsel * evsel)1448  int evsel__disable(struct evsel *evsel)
1449  {
1450  	int err = perf_evsel__disable(&evsel->core);
1451  	/*
1452  	 * We mark it disabled here so that tools that disable a event can
1453  	 * ignore events after they disable it. I.e. the ring buffer may have
1454  	 * already a few more events queued up before the kernel got the stop
1455  	 * request.
1456  	 */
1457  	if (!err)
1458  		evsel->disabled = true;
1459  
1460  	return err;
1461  }
1462  
free_config_terms(struct list_head * config_terms)1463  void free_config_terms(struct list_head *config_terms)
1464  {
1465  	struct evsel_config_term *term, *h;
1466  
1467  	list_for_each_entry_safe(term, h, config_terms, list) {
1468  		list_del_init(&term->list);
1469  		if (term->free_str)
1470  			zfree(&term->val.str);
1471  		free(term);
1472  	}
1473  }
1474  
evsel__free_config_terms(struct evsel * evsel)1475  static void evsel__free_config_terms(struct evsel *evsel)
1476  {
1477  	free_config_terms(&evsel->config_terms);
1478  }
1479  
evsel__exit(struct evsel * evsel)1480  void evsel__exit(struct evsel *evsel)
1481  {
1482  	assert(list_empty(&evsel->core.node));
1483  	assert(evsel->evlist == NULL);
1484  	bpf_counter__destroy(evsel);
1485  	perf_bpf_filter__destroy(evsel);
1486  	evsel__free_counts(evsel);
1487  	perf_evsel__free_fd(&evsel->core);
1488  	perf_evsel__free_id(&evsel->core);
1489  	evsel__free_config_terms(evsel);
1490  	cgroup__put(evsel->cgrp);
1491  	perf_cpu_map__put(evsel->core.cpus);
1492  	perf_cpu_map__put(evsel->core.own_cpus);
1493  	perf_thread_map__put(evsel->core.threads);
1494  	zfree(&evsel->group_name);
1495  	zfree(&evsel->name);
1496  	zfree(&evsel->filter);
1497  	zfree(&evsel->pmu_name);
1498  	zfree(&evsel->group_pmu_name);
1499  	zfree(&evsel->unit);
1500  	zfree(&evsel->metric_id);
1501  	evsel__zero_per_pkg(evsel);
1502  	hashmap__free(evsel->per_pkg_mask);
1503  	evsel->per_pkg_mask = NULL;
1504  	zfree(&evsel->metric_events);
1505  	perf_evsel__object.fini(evsel);
1506  	if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME ||
1507  	    evsel__tool_event(evsel) == PERF_TOOL_USER_TIME)
1508  		xyarray__delete(evsel->start_times);
1509  }
1510  
evsel__delete(struct evsel * evsel)1511  void evsel__delete(struct evsel *evsel)
1512  {
1513  	if (!evsel)
1514  		return;
1515  
1516  	evsel__exit(evsel);
1517  	free(evsel);
1518  }
1519  
evsel__compute_deltas(struct evsel * evsel,int cpu_map_idx,int thread,struct perf_counts_values * count)1520  void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
1521  			   struct perf_counts_values *count)
1522  {
1523  	struct perf_counts_values tmp;
1524  
1525  	if (!evsel->prev_raw_counts)
1526  		return;
1527  
1528  	tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
1529  	*perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
1530  
1531  	count->val = count->val - tmp.val;
1532  	count->ena = count->ena - tmp.ena;
1533  	count->run = count->run - tmp.run;
1534  }
1535  
evsel__read_one(struct evsel * evsel,int cpu_map_idx,int thread)1536  static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
1537  {
1538  	struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
1539  
1540  	return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
1541  }
1542  
evsel__read_retire_lat(struct evsel * evsel,int cpu_map_idx,int thread)1543  static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
1544  {
1545  	return tpebs_set_evsel(evsel, cpu_map_idx, thread);
1546  }
1547  
evsel__set_count(struct evsel * counter,int cpu_map_idx,int thread,u64 val,u64 ena,u64 run,u64 lost)1548  static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
1549  			     u64 val, u64 ena, u64 run, u64 lost)
1550  {
1551  	struct perf_counts_values *count;
1552  
1553  	count = perf_counts(counter->counts, cpu_map_idx, thread);
1554  
1555  	if (counter->retire_lat) {
1556  		evsel__read_retire_lat(counter, cpu_map_idx, thread);
1557  		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
1558  		return;
1559  	}
1560  
1561  	count->val    = val;
1562  	count->ena    = ena;
1563  	count->run    = run;
1564  	count->lost   = lost;
1565  
1566  	perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
1567  }
1568  
evsel__group_has_tpebs(struct evsel * leader)1569  static bool evsel__group_has_tpebs(struct evsel *leader)
1570  {
1571  	struct evsel *evsel;
1572  
1573  	for_each_group_evsel(evsel, leader) {
1574  		if (evsel__is_retire_lat(evsel))
1575  			return true;
1576  	}
1577  	return false;
1578  }
1579  
evsel__group_read_nr_members(struct evsel * leader)1580  static u64 evsel__group_read_nr_members(struct evsel *leader)
1581  {
1582  	u64 nr = leader->core.nr_members;
1583  	struct evsel *evsel;
1584  
1585  	for_each_group_evsel(evsel, leader) {
1586  		if (evsel__is_retire_lat(evsel))
1587  			nr--;
1588  	}
1589  	return nr;
1590  }
1591  
evsel__group_read_size(struct evsel * leader)1592  static u64 evsel__group_read_size(struct evsel *leader)
1593  {
1594  	u64 read_format = leader->core.attr.read_format;
1595  	int entry = sizeof(u64); /* value */
1596  	int size = 0;
1597  	int nr = 1;
1598  
1599  	if (!evsel__group_has_tpebs(leader))
1600  		return perf_evsel__read_size(&leader->core);
1601  
1602  	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1603  		size += sizeof(u64);
1604  
1605  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1606  		size += sizeof(u64);
1607  
1608  	if (read_format & PERF_FORMAT_ID)
1609  		entry += sizeof(u64);
1610  
1611  	if (read_format & PERF_FORMAT_LOST)
1612  		entry += sizeof(u64);
1613  
1614  	if (read_format & PERF_FORMAT_GROUP) {
1615  		nr = evsel__group_read_nr_members(leader);
1616  		size += sizeof(u64);
1617  	}
1618  
1619  	size += entry * nr;
1620  	return size;
1621  }
1622  
evsel__process_group_data(struct evsel * leader,int cpu_map_idx,int thread,u64 * data)1623  static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
1624  {
1625  	u64 read_format = leader->core.attr.read_format;
1626  	struct sample_read_value *v;
1627  	u64 nr, ena = 0, run = 0, lost = 0;
1628  
1629  	nr = *data++;
1630  
1631  	if (nr != evsel__group_read_nr_members(leader))
1632  		return -EINVAL;
1633  
1634  	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1635  		ena = *data++;
1636  
1637  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1638  		run = *data++;
1639  
1640  	v = (void *)data;
1641  	sample_read_group__for_each(v, nr, read_format) {
1642  		struct evsel *counter;
1643  
1644  		counter = evlist__id2evsel(leader->evlist, v->id);
1645  		if (!counter)
1646  			return -EINVAL;
1647  
1648  		if (read_format & PERF_FORMAT_LOST)
1649  			lost = v->lost;
1650  
1651  		evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost);
1652  	}
1653  
1654  	return 0;
1655  }
1656  
evsel__read_group(struct evsel * leader,int cpu_map_idx,int thread)1657  static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
1658  {
1659  	struct perf_stat_evsel *ps = leader->stats;
1660  	u64 read_format = leader->core.attr.read_format;
1661  	int size = evsel__group_read_size(leader);
1662  	u64 *data = ps->group_data;
1663  
1664  	if (!(read_format & PERF_FORMAT_ID))
1665  		return -EINVAL;
1666  
1667  	if (!evsel__is_group_leader(leader))
1668  		return -EINVAL;
1669  
1670  	if (!data) {
1671  		data = zalloc(size);
1672  		if (!data)
1673  			return -ENOMEM;
1674  
1675  		ps->group_data = data;
1676  	}
1677  
1678  	if (FD(leader, cpu_map_idx, thread) < 0)
1679  		return -EINVAL;
1680  
1681  	if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
1682  		return -errno;
1683  
1684  	return evsel__process_group_data(leader, cpu_map_idx, thread, data);
1685  }
1686  
read_until_char(struct io * io,char e)1687  static bool read_until_char(struct io *io, char e)
1688  {
1689  	int c;
1690  
1691  	do {
1692  		c = io__get_char(io);
1693  		if (c == -1)
1694  			return false;
1695  	} while (c != e);
1696  	return true;
1697  }
1698  
read_stat_field(int fd,struct perf_cpu cpu,int field,__u64 * val)1699  static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
1700  {
1701  	char buf[256];
1702  	struct io io;
1703  	int i;
1704  
1705  	io__init(&io, fd, buf, sizeof(buf));
1706  
1707  	/* Skip lines to relevant CPU. */
1708  	for (i = -1; i < cpu.cpu; i++) {
1709  		if (!read_until_char(&io, '\n'))
1710  			return -EINVAL;
1711  	}
1712  	/* Skip to "cpu". */
1713  	if (io__get_char(&io) != 'c') return -EINVAL;
1714  	if (io__get_char(&io) != 'p') return -EINVAL;
1715  	if (io__get_char(&io) != 'u') return -EINVAL;
1716  
1717  	/* Skip N of cpuN. */
1718  	if (!read_until_char(&io, ' '))
1719  		return -EINVAL;
1720  
1721  	i = 1;
1722  	while (true) {
1723  		if (io__get_dec(&io, val) != ' ')
1724  			break;
1725  		if (field == i)
1726  			return 0;
1727  		i++;
1728  	}
1729  	return -EINVAL;
1730  }
1731  
read_pid_stat_field(int fd,int field,__u64 * val)1732  static int read_pid_stat_field(int fd, int field, __u64 *val)
1733  {
1734  	char buf[256];
1735  	struct io io;
1736  	int c, i;
1737  
1738  	io__init(&io, fd, buf, sizeof(buf));
1739  	if (io__get_dec(&io, val) != ' ')
1740  		return -EINVAL;
1741  	if (field == 1)
1742  		return 0;
1743  
1744  	/* Skip comm. */
1745  	if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
1746  		return -EINVAL;
1747  	if (field == 2)
1748  		return -EINVAL; /* String can't be returned. */
1749  
1750  	/* Skip state */
1751  	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
1752  		return -EINVAL;
1753  	if (field == 3)
1754  		return -EINVAL; /* String can't be returned. */
1755  
1756  	/* Loop over numeric fields*/
1757  	if (io__get_char(&io) != ' ')
1758  		return -EINVAL;
1759  
1760  	i = 4;
1761  	while (true) {
1762  		c = io__get_dec(&io, val);
1763  		if (c == -1)
1764  			return -EINVAL;
1765  		if (c == -2) {
1766  			/* Assume a -ve was read */
1767  			c = io__get_dec(&io, val);
1768  			*val *= -1;
1769  		}
1770  		if (c != ' ')
1771  			return -EINVAL;
1772  		if (field == i)
1773  			return 0;
1774  		i++;
1775  	}
1776  	return -EINVAL;
1777  }
1778  
evsel__read_tool(struct evsel * evsel,int cpu_map_idx,int thread)1779  static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread)
1780  {
1781  	__u64 *start_time, cur_time, delta_start;
1782  	int fd, err = 0;
1783  	struct perf_counts_values *count;
1784  	bool adjust = false;
1785  
1786  	count = perf_counts(evsel->counts, cpu_map_idx, thread);
1787  
1788  	switch (evsel__tool_event(evsel)) {
1789  	case PERF_TOOL_DURATION_TIME:
1790  		/*
1791  		 * Pretend duration_time is only on the first CPU and thread, or
1792  		 * else aggregation will scale duration_time by the number of
1793  		 * CPUs/threads.
1794  		 */
1795  		start_time = &evsel->start_time;
1796  		if (cpu_map_idx == 0 && thread == 0)
1797  			cur_time = rdclock();
1798  		else
1799  			cur_time = *start_time;
1800  		break;
1801  	case PERF_TOOL_USER_TIME:
1802  	case PERF_TOOL_SYSTEM_TIME: {
1803  		bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME;
1804  
1805  		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
1806  		fd = FD(evsel, cpu_map_idx, thread);
1807  		lseek(fd, SEEK_SET, 0);
1808  		if (evsel->pid_stat) {
1809  			/* The event exists solely on 1 CPU. */
1810  			if (cpu_map_idx == 0)
1811  				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
1812  			else
1813  				cur_time = 0;
1814  		} else {
1815  			/* The event is for all threads. */
1816  			if (thread == 0) {
1817  				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
1818  									cpu_map_idx);
1819  
1820  				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
1821  			} else {
1822  				cur_time = 0;
1823  			}
1824  		}
1825  		adjust = true;
1826  		break;
1827  	}
1828  	case PERF_TOOL_NONE:
1829  	case PERF_TOOL_MAX:
1830  	default:
1831  		err = -EINVAL;
1832  	}
1833  	if (err)
1834  		return err;
1835  
1836  	delta_start = cur_time - *start_time;
1837  	if (adjust) {
1838  		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
1839  
1840  		delta_start *= 1000000000 / ticks_per_sec;
1841  	}
1842  	count->val    = delta_start;
1843  	count->ena    = count->run = delta_start;
1844  	count->lost   = 0;
1845  	return 0;
1846  }
1847  
evsel__read_counter(struct evsel * evsel,int cpu_map_idx,int thread)1848  int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
1849  {
1850  	if (evsel__is_tool(evsel))
1851  		return evsel__read_tool(evsel, cpu_map_idx, thread);
1852  
1853  	if (evsel__is_retire_lat(evsel))
1854  		return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
1855  
1856  	if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
1857  		return evsel__read_group(evsel, cpu_map_idx, thread);
1858  
1859  	return evsel__read_one(evsel, cpu_map_idx, thread);
1860  }
1861  
__evsel__read_on_cpu(struct evsel * evsel,int cpu_map_idx,int thread,bool scale)1862  int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
1863  {
1864  	struct perf_counts_values count;
1865  	size_t nv = scale ? 3 : 1;
1866  
1867  	if (FD(evsel, cpu_map_idx, thread) < 0)
1868  		return -EINVAL;
1869  
1870  	if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
1871  		return -ENOMEM;
1872  
1873  	if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
1874  		return -errno;
1875  
1876  	evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
1877  	perf_counts_values__scale(&count, scale, NULL);
1878  	*perf_counts(evsel->counts, cpu_map_idx, thread) = count;
1879  	return 0;
1880  }
1881  
evsel__match_other_cpu(struct evsel * evsel,struct evsel * other,int cpu_map_idx)1882  static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
1883  				  int cpu_map_idx)
1884  {
1885  	struct perf_cpu cpu;
1886  
1887  	cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
1888  	return perf_cpu_map__idx(other->core.cpus, cpu);
1889  }
1890  
evsel__hybrid_group_cpu_map_idx(struct evsel * evsel,int cpu_map_idx)1891  static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
1892  {
1893  	struct evsel *leader = evsel__leader(evsel);
1894  
1895  	if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
1896  	    (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
1897  		return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
1898  	}
1899  
1900  	return cpu_map_idx;
1901  }
1902  
get_group_fd(struct evsel * evsel,int cpu_map_idx,int thread)1903  static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
1904  {
1905  	struct evsel *leader = evsel__leader(evsel);
1906  	int fd;
1907  
1908  	if (evsel__is_group_leader(evsel))
1909  		return -1;
1910  
1911  	/*
1912  	 * Leader must be already processed/open,
1913  	 * if not it's a bug.
1914  	 */
1915  	BUG_ON(!leader->core.fd);
1916  
1917  	cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
1918  	if (cpu_map_idx == -1)
1919  		return -1;
1920  
1921  	fd = FD(leader, cpu_map_idx, thread);
1922  	BUG_ON(fd == -1 && !leader->skippable);
1923  
1924  	/*
1925  	 * When the leader has been skipped, return -2 to distinguish from no
1926  	 * group leader case.
1927  	 */
1928  	return fd == -1 ? -2 : fd;
1929  }
1930  
evsel__remove_fd(struct evsel * pos,int nr_cpus,int nr_threads,int thread_idx)1931  static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
1932  {
1933  	for (int cpu = 0; cpu < nr_cpus; cpu++)
1934  		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
1935  			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
1936  }
1937  
update_fds(struct evsel * evsel,int nr_cpus,int cpu_map_idx,int nr_threads,int thread_idx)1938  static int update_fds(struct evsel *evsel,
1939  		      int nr_cpus, int cpu_map_idx,
1940  		      int nr_threads, int thread_idx)
1941  {
1942  	struct evsel *pos;
1943  
1944  	if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
1945  		return -EINVAL;
1946  
1947  	evlist__for_each_entry(evsel->evlist, pos) {
1948  		nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
1949  
1950  		evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
1951  
1952  		/*
1953  		 * Since fds for next evsel has not been created,
1954  		 * there is no need to iterate whole event list.
1955  		 */
1956  		if (pos == evsel)
1957  			break;
1958  	}
1959  	return 0;
1960  }
1961  
evsel__ignore_missing_thread(struct evsel * evsel,int nr_cpus,int cpu_map_idx,struct perf_thread_map * threads,int thread,int err)1962  static bool evsel__ignore_missing_thread(struct evsel *evsel,
1963  					 int nr_cpus, int cpu_map_idx,
1964  					 struct perf_thread_map *threads,
1965  					 int thread, int err)
1966  {
1967  	pid_t ignore_pid = perf_thread_map__pid(threads, thread);
1968  
1969  	if (!evsel->ignore_missing_thread)
1970  		return false;
1971  
1972  	/* The system wide setup does not work with threads. */
1973  	if (evsel->core.system_wide)
1974  		return false;
1975  
1976  	/* The -ESRCH is perf event syscall errno for pid's not found. */
1977  	if (err != -ESRCH)
1978  		return false;
1979  
1980  	/* If there's only one thread, let it fail. */
1981  	if (threads->nr == 1)
1982  		return false;
1983  
1984  	/*
1985  	 * We should remove fd for missing_thread first
1986  	 * because thread_map__remove() will decrease threads->nr.
1987  	 */
1988  	if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
1989  		return false;
1990  
1991  	if (thread_map__remove(threads, thread))
1992  		return false;
1993  
1994  	pr_warning("WARNING: Ignored open failure for pid %d\n",
1995  		   ignore_pid);
1996  	return true;
1997  }
1998  
__open_attr__fprintf(FILE * fp,const char * name,const char * val,void * priv __maybe_unused)1999  static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
2000  				void *priv __maybe_unused)
2001  {
2002  	return fprintf(fp, "  %-32s %s\n", name, val);
2003  }
2004  
display_attr(struct perf_event_attr * attr)2005  static void display_attr(struct perf_event_attr *attr)
2006  {
2007  	if (verbose >= 2 || debug_peo_args) {
2008  		fprintf(stderr, "%.60s\n", graph_dotted_line);
2009  		fprintf(stderr, "perf_event_attr:\n");
2010  		perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
2011  		fprintf(stderr, "%.60s\n", graph_dotted_line);
2012  	}
2013  }
2014  
evsel__precise_ip_fallback(struct evsel * evsel)2015  bool evsel__precise_ip_fallback(struct evsel *evsel)
2016  {
2017  	/* Do not try less precise if not requested. */
2018  	if (!evsel->precise_max)
2019  		return false;
2020  
2021  	/*
2022  	 * We tried all the precise_ip values, and it's
2023  	 * still failing, so leave it to standard fallback.
2024  	 */
2025  	if (!evsel->core.attr.precise_ip) {
2026  		evsel->core.attr.precise_ip = evsel->precise_ip_original;
2027  		return false;
2028  	}
2029  
2030  	if (!evsel->precise_ip_original)
2031  		evsel->precise_ip_original = evsel->core.attr.precise_ip;
2032  
2033  	evsel->core.attr.precise_ip--;
2034  	pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
2035  	display_attr(&evsel->core.attr);
2036  	return true;
2037  }
2038  
2039  static struct perf_cpu_map *empty_cpu_map;
2040  static struct perf_thread_map *empty_thread_map;
2041  
__evsel__prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,struct perf_thread_map * threads)2042  static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
2043  		struct perf_thread_map *threads)
2044  {
2045  	int nthreads = perf_thread_map__nr(threads);
2046  
2047  	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
2048  	    (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
2049  		return -EINVAL;
2050  
2051  	if (cpus == NULL) {
2052  		if (empty_cpu_map == NULL) {
2053  			empty_cpu_map = perf_cpu_map__new_any_cpu();
2054  			if (empty_cpu_map == NULL)
2055  				return -ENOMEM;
2056  		}
2057  
2058  		cpus = empty_cpu_map;
2059  	}
2060  
2061  	if (threads == NULL) {
2062  		if (empty_thread_map == NULL) {
2063  			empty_thread_map = thread_map__new_by_tid(-1);
2064  			if (empty_thread_map == NULL)
2065  				return -ENOMEM;
2066  		}
2067  
2068  		threads = empty_thread_map;
2069  	}
2070  
2071  	if (evsel->core.fd == NULL &&
2072  	    perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
2073  		return -ENOMEM;
2074  
2075  	if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME ||
2076  	     evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) &&
2077  	    !evsel->start_times) {
2078  		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64));
2079  		if (!evsel->start_times)
2080  			return -ENOMEM;
2081  	}
2082  
2083  	evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
2084  	if (evsel->cgrp)
2085  		evsel->open_flags |= PERF_FLAG_PID_CGROUP;
2086  
2087  	return 0;
2088  }
2089  
evsel__disable_missing_features(struct evsel * evsel)2090  static void evsel__disable_missing_features(struct evsel *evsel)
2091  {
2092  	if (perf_missing_features.branch_counters)
2093  		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
2094  	if (perf_missing_features.read_lost)
2095  		evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
2096  	if (perf_missing_features.weight_struct) {
2097  		evsel__set_sample_bit(evsel, WEIGHT);
2098  		evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
2099  	}
2100  	if (perf_missing_features.clockid_wrong)
2101  		evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
2102  	if (perf_missing_features.clockid) {
2103  		evsel->core.attr.use_clockid = 0;
2104  		evsel->core.attr.clockid = 0;
2105  	}
2106  	if (perf_missing_features.cloexec)
2107  		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
2108  	if (perf_missing_features.mmap2)
2109  		evsel->core.attr.mmap2 = 0;
2110  	if (evsel->pmu && evsel->pmu->missing_features.exclude_guest)
2111  		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
2112  	if (perf_missing_features.lbr_flags)
2113  		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
2114  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
2115  	if (perf_missing_features.group_read && evsel->core.attr.inherit)
2116  		evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
2117  	if (perf_missing_features.ksymbol)
2118  		evsel->core.attr.ksymbol = 0;
2119  	if (perf_missing_features.bpf)
2120  		evsel->core.attr.bpf_event = 0;
2121  	if (perf_missing_features.branch_hw_idx)
2122  		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
2123  	if (perf_missing_features.sample_id_all)
2124  		evsel->core.attr.sample_id_all = 0;
2125  }
2126  
evsel__prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,struct perf_thread_map * threads)2127  int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
2128  			struct perf_thread_map *threads)
2129  {
2130  	int err;
2131  
2132  	err = __evsel__prepare_open(evsel, cpus, threads);
2133  	if (err)
2134  		return err;
2135  
2136  	evsel__disable_missing_features(evsel);
2137  
2138  	return err;
2139  }
2140  
evsel__detect_missing_features(struct evsel * evsel)2141  bool evsel__detect_missing_features(struct evsel *evsel)
2142  {
2143  	/*
2144  	 * Must probe features in the order they were added to the
2145  	 * perf_event_attr interface.
2146  	 */
2147  	if (!perf_missing_features.branch_counters &&
2148  	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
2149  		perf_missing_features.branch_counters = true;
2150  		pr_debug2("switching off branch counters support\n");
2151  		return true;
2152  	} else if (!perf_missing_features.read_lost &&
2153  	    (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
2154  		perf_missing_features.read_lost = true;
2155  		pr_debug2("switching off PERF_FORMAT_LOST support\n");
2156  		return true;
2157  	} else if (!perf_missing_features.weight_struct &&
2158  	    (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
2159  		perf_missing_features.weight_struct = true;
2160  		pr_debug2("switching off weight struct support\n");
2161  		return true;
2162  	} else if (!perf_missing_features.code_page_size &&
2163  	    (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
2164  		perf_missing_features.code_page_size = true;
2165  		pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
2166  		return false;
2167  	} else if (!perf_missing_features.data_page_size &&
2168  	    (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
2169  		perf_missing_features.data_page_size = true;
2170  		pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
2171  		return false;
2172  	} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
2173  		perf_missing_features.cgroup = true;
2174  		pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
2175  		return false;
2176  	} else if (!perf_missing_features.branch_hw_idx &&
2177  	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
2178  		perf_missing_features.branch_hw_idx = true;
2179  		pr_debug2("switching off branch HW index support\n");
2180  		return true;
2181  	} else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
2182  		perf_missing_features.aux_output = true;
2183  		pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
2184  		return false;
2185  	} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
2186  		perf_missing_features.bpf = true;
2187  		pr_debug2_peo("switching off bpf_event\n");
2188  		return true;
2189  	} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
2190  		perf_missing_features.ksymbol = true;
2191  		pr_debug2_peo("switching off ksymbol\n");
2192  		return true;
2193  	} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
2194  		perf_missing_features.write_backward = true;
2195  		pr_debug2_peo("switching off write_backward\n");
2196  		return false;
2197  	} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
2198  		perf_missing_features.clockid_wrong = true;
2199  		pr_debug2_peo("switching off clockid\n");
2200  		return true;
2201  	} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
2202  		perf_missing_features.clockid = true;
2203  		pr_debug2_peo("switching off use_clockid\n");
2204  		return true;
2205  	} else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
2206  		perf_missing_features.cloexec = true;
2207  		pr_debug2_peo("switching off cloexec flag\n");
2208  		return true;
2209  	} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
2210  		perf_missing_features.mmap2 = true;
2211  		pr_debug2_peo("switching off mmap2\n");
2212  		return true;
2213  	} else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) {
2214  		if (evsel->pmu == NULL)
2215  			evsel->pmu = evsel__find_pmu(evsel);
2216  
2217  		if (evsel->pmu)
2218  			evsel->pmu->missing_features.exclude_guest = true;
2219  		else {
2220  			/* we cannot find PMU, disable attrs now */
2221  			evsel->core.attr.exclude_host = false;
2222  			evsel->core.attr.exclude_guest = false;
2223  		}
2224  
2225  		if (evsel->exclude_GH) {
2226  			pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n");
2227  			return false;
2228  		}
2229  		if (!perf_missing_features.exclude_guest) {
2230  			perf_missing_features.exclude_guest = true;
2231  			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
2232  		}
2233  		return true;
2234  	} else if (!perf_missing_features.sample_id_all) {
2235  		perf_missing_features.sample_id_all = true;
2236  		pr_debug2_peo("switching off sample_id_all\n");
2237  		return true;
2238  	} else if (!perf_missing_features.lbr_flags &&
2239  			(evsel->core.attr.branch_sample_type &
2240  			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
2241  			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
2242  		perf_missing_features.lbr_flags = true;
2243  		pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
2244  		return true;
2245  	} else if (!perf_missing_features.group_read &&
2246  		    evsel->core.attr.inherit &&
2247  		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
2248  		   evsel__is_group_leader(evsel)) {
2249  		perf_missing_features.group_read = true;
2250  		pr_debug2_peo("switching off group read\n");
2251  		return true;
2252  	} else {
2253  		return false;
2254  	}
2255  }
2256  
evsel__open_cpu(struct evsel * evsel,struct perf_cpu_map * cpus,struct perf_thread_map * threads,int start_cpu_map_idx,int end_cpu_map_idx)2257  static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
2258  		struct perf_thread_map *threads,
2259  		int start_cpu_map_idx, int end_cpu_map_idx)
2260  {
2261  	int idx, thread, nthreads;
2262  	int pid = -1, err, old_errno;
2263  	enum rlimit_action set_rlimit = NO_CHANGE;
2264  
2265  	if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) {
2266  		if (evsel->core.attr.sample_period) /* no sampling */
2267  			return -EINVAL;
2268  		evsel->start_time = rdclock();
2269  		return 0;
2270  	}
2271  
2272  	if (evsel__is_retire_lat(evsel))
2273  		return tpebs_start(evsel->evlist);
2274  
2275  	err = __evsel__prepare_open(evsel, cpus, threads);
2276  	if (err)
2277  		return err;
2278  
2279  	if (cpus == NULL)
2280  		cpus = empty_cpu_map;
2281  
2282  	if (threads == NULL)
2283  		threads = empty_thread_map;
2284  
2285  	nthreads = perf_thread_map__nr(threads);
2286  
2287  	if (evsel->cgrp)
2288  		pid = evsel->cgrp->fd;
2289  
2290  fallback_missing_features:
2291  	evsel__disable_missing_features(evsel);
2292  
2293  	pr_debug3("Opening: %s\n", evsel__name(evsel));
2294  	display_attr(&evsel->core.attr);
2295  
2296  	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
2297  
2298  		for (thread = 0; thread < nthreads; thread++) {
2299  			int fd, group_fd;
2300  retry_open:
2301  			if (thread >= nthreads)
2302  				break;
2303  
2304  			if (!evsel->cgrp && !evsel->core.system_wide)
2305  				pid = perf_thread_map__pid(threads, thread);
2306  
2307  			if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME ||
2308  			    evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) {
2309  				bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME;
2310  				__u64 *start_time = NULL;
2311  
2312  				if (evsel->core.attr.sample_period) {
2313  					/* no sampling */
2314  					err = -EINVAL;
2315  					goto out_close;
2316  				}
2317  				if (pid > -1) {
2318  					char buf[64];
2319  
2320  					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
2321  					fd = open(buf, O_RDONLY);
2322  					evsel->pid_stat = true;
2323  				} else {
2324  					fd = open("/proc/stat", O_RDONLY);
2325  				}
2326  				FD(evsel, idx, thread) = fd;
2327  				if (fd < 0) {
2328  					err = -errno;
2329  					goto out_close;
2330  				}
2331  				start_time = xyarray__entry(evsel->start_times, idx, thread);
2332  				if (pid > -1) {
2333  					err = read_pid_stat_field(fd, system ? 15 : 14,
2334  								  start_time);
2335  				} else {
2336  					struct perf_cpu cpu;
2337  
2338  					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
2339  					err = read_stat_field(fd, cpu, system ? 3 : 1,
2340  							      start_time);
2341  				}
2342  				if (err)
2343  					goto out_close;
2344  				continue;
2345  			}
2346  
2347  			group_fd = get_group_fd(evsel, idx, thread);
2348  
2349  			if (group_fd == -2) {
2350  				pr_debug("broken group leader for %s\n", evsel->name);
2351  				err = -EINVAL;
2352  				goto out_close;
2353  			}
2354  
2355  			test_attr__ready();
2356  
2357  			/* Debug message used by test scripts */
2358  			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
2359  				pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
2360  
2361  			fd = sys_perf_event_open(&evsel->core.attr, pid,
2362  						perf_cpu_map__cpu(cpus, idx).cpu,
2363  						group_fd, evsel->open_flags);
2364  
2365  			FD(evsel, idx, thread) = fd;
2366  
2367  			if (fd < 0) {
2368  				err = -errno;
2369  
2370  				pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
2371  					  err);
2372  				goto try_fallback;
2373  			}
2374  
2375  			bpf_counter__install_pe(evsel, idx, fd);
2376  
2377  			if (unlikely(test_attr__enabled)) {
2378  				test_attr__open(&evsel->core.attr, pid,
2379  						perf_cpu_map__cpu(cpus, idx),
2380  						fd, group_fd, evsel->open_flags);
2381  			}
2382  
2383  			/* Debug message used by test scripts */
2384  			pr_debug2_peo(" = %d\n", fd);
2385  
2386  			if (evsel->bpf_fd >= 0) {
2387  				int evt_fd = fd;
2388  				int bpf_fd = evsel->bpf_fd;
2389  
2390  				err = ioctl(evt_fd,
2391  					    PERF_EVENT_IOC_SET_BPF,
2392  					    bpf_fd);
2393  				if (err && errno != EEXIST) {
2394  					pr_err("failed to attach bpf fd %d: %s\n",
2395  					       bpf_fd, strerror(errno));
2396  					err = -EINVAL;
2397  					goto out_close;
2398  				}
2399  			}
2400  
2401  			set_rlimit = NO_CHANGE;
2402  
2403  			/*
2404  			 * If we succeeded but had to kill clockid, fail and
2405  			 * have evsel__open_strerror() print us a nice error.
2406  			 */
2407  			if (perf_missing_features.clockid ||
2408  			    perf_missing_features.clockid_wrong) {
2409  				err = -EINVAL;
2410  				goto out_close;
2411  			}
2412  		}
2413  	}
2414  
2415  	return 0;
2416  
2417  try_fallback:
2418  	if (evsel__precise_ip_fallback(evsel))
2419  		goto retry_open;
2420  
2421  	if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
2422  					 idx, threads, thread, err)) {
2423  		/* We just removed 1 thread, so lower the upper nthreads limit. */
2424  		nthreads--;
2425  
2426  		/* ... and pretend like nothing have happened. */
2427  		err = 0;
2428  		goto retry_open;
2429  	}
2430  	/*
2431  	 * perf stat needs between 5 and 22 fds per CPU. When we run out
2432  	 * of them try to increase the limits.
2433  	 */
2434  	if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
2435  		goto retry_open;
2436  
2437  	if (err != -EINVAL || idx > 0 || thread > 0)
2438  		goto out_close;
2439  
2440  	if (evsel__detect_missing_features(evsel))
2441  		goto fallback_missing_features;
2442  out_close:
2443  	if (err)
2444  		threads->err_thread = thread;
2445  
2446  	old_errno = errno;
2447  	do {
2448  		while (--thread >= 0) {
2449  			if (FD(evsel, idx, thread) >= 0)
2450  				close(FD(evsel, idx, thread));
2451  			FD(evsel, idx, thread) = -1;
2452  		}
2453  		thread = nthreads;
2454  	} while (--idx >= 0);
2455  	errno = old_errno;
2456  	return err;
2457  }
2458  
evsel__open(struct evsel * evsel,struct perf_cpu_map * cpus,struct perf_thread_map * threads)2459  int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
2460  		struct perf_thread_map *threads)
2461  {
2462  	return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
2463  }
2464  
evsel__close(struct evsel * evsel)2465  void evsel__close(struct evsel *evsel)
2466  {
2467  	if (evsel__is_retire_lat(evsel))
2468  		tpebs_delete();
2469  	perf_evsel__close(&evsel->core);
2470  	perf_evsel__free_id(&evsel->core);
2471  }
2472  
evsel__open_per_cpu(struct evsel * evsel,struct perf_cpu_map * cpus,int cpu_map_idx)2473  int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
2474  {
2475  	if (cpu_map_idx == -1)
2476  		return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
2477  
2478  	return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
2479  }
2480  
evsel__open_per_thread(struct evsel * evsel,struct perf_thread_map * threads)2481  int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
2482  {
2483  	return evsel__open(evsel, NULL, threads);
2484  }
2485  
perf_evsel__parse_id_sample(const struct evsel * evsel,const union perf_event * event,struct perf_sample * sample)2486  static int perf_evsel__parse_id_sample(const struct evsel *evsel,
2487  				       const union perf_event *event,
2488  				       struct perf_sample *sample)
2489  {
2490  	u64 type = evsel->core.attr.sample_type;
2491  	const __u64 *array = event->sample.array;
2492  	bool swapped = evsel->needs_swap;
2493  	union u64_swap u;
2494  
2495  	array += ((event->header.size -
2496  		   sizeof(event->header)) / sizeof(u64)) - 1;
2497  
2498  	if (type & PERF_SAMPLE_IDENTIFIER) {
2499  		sample->id = *array;
2500  		array--;
2501  	}
2502  
2503  	if (type & PERF_SAMPLE_CPU) {
2504  		u.val64 = *array;
2505  		if (swapped) {
2506  			/* undo swap of u64, then swap on individual u32s */
2507  			u.val64 = bswap_64(u.val64);
2508  			u.val32[0] = bswap_32(u.val32[0]);
2509  		}
2510  
2511  		sample->cpu = u.val32[0];
2512  		array--;
2513  	}
2514  
2515  	if (type & PERF_SAMPLE_STREAM_ID) {
2516  		sample->stream_id = *array;
2517  		array--;
2518  	}
2519  
2520  	if (type & PERF_SAMPLE_ID) {
2521  		sample->id = *array;
2522  		array--;
2523  	}
2524  
2525  	if (type & PERF_SAMPLE_TIME) {
2526  		sample->time = *array;
2527  		array--;
2528  	}
2529  
2530  	if (type & PERF_SAMPLE_TID) {
2531  		u.val64 = *array;
2532  		if (swapped) {
2533  			/* undo swap of u64, then swap on individual u32s */
2534  			u.val64 = bswap_64(u.val64);
2535  			u.val32[0] = bswap_32(u.val32[0]);
2536  			u.val32[1] = bswap_32(u.val32[1]);
2537  		}
2538  
2539  		sample->pid = u.val32[0];
2540  		sample->tid = u.val32[1];
2541  		array--;
2542  	}
2543  
2544  	return 0;
2545  }
2546  
overflow(const void * endp,u16 max_size,const void * offset,u64 size)2547  static inline bool overflow(const void *endp, u16 max_size, const void *offset,
2548  			    u64 size)
2549  {
2550  	return size > max_size || offset + size > endp;
2551  }
2552  
2553  #define OVERFLOW_CHECK(offset, size, max_size)				\
2554  	do {								\
2555  		if (overflow(endp, (max_size), (offset), (size)))	\
2556  			return -EFAULT;					\
2557  	} while (0)
2558  
2559  #define OVERFLOW_CHECK_u64(offset) \
2560  	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
2561  
2562  static int
perf_event__check_size(union perf_event * event,unsigned int sample_size)2563  perf_event__check_size(union perf_event *event, unsigned int sample_size)
2564  {
2565  	/*
2566  	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
2567  	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
2568  	 * check the format does not go past the end of the event.
2569  	 */
2570  	if (sample_size + sizeof(event->header) > event->header.size)
2571  		return -EFAULT;
2572  
2573  	return 0;
2574  }
2575  
arch_perf_parse_sample_weight(struct perf_sample * data,const __u64 * array,u64 type __maybe_unused)2576  void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
2577  					  const __u64 *array,
2578  					  u64 type __maybe_unused)
2579  {
2580  	data->weight = *array;
2581  }
2582  
evsel__bitfield_swap_branch_flags(u64 value)2583  u64 evsel__bitfield_swap_branch_flags(u64 value)
2584  {
2585  	u64 new_val = 0;
2586  
2587  	/*
2588  	 * branch_flags
2589  	 * union {
2590  	 * 	u64 values;
2591  	 * 	struct {
2592  	 * 		mispred:1	//target mispredicted
2593  	 * 		predicted:1	//target predicted
2594  	 * 		in_tx:1		//in transaction
2595  	 * 		abort:1		//transaction abort
2596  	 * 		cycles:16	//cycle count to last branch
2597  	 * 		type:4		//branch type
2598  	 * 		spec:2		//branch speculation info
2599  	 * 		new_type:4	//additional branch type
2600  	 * 		priv:3		//privilege level
2601  	 * 		reserved:31
2602  	 * 	}
2603  	 * }
2604  	 *
2605  	 * Avoid bswap64() the entire branch_flag.value,
2606  	 * as it has variable bit-field sizes. Instead the
2607  	 * macro takes the bit-field position/size,
2608  	 * swaps it based on the host endianness.
2609  	 */
2610  	if (host_is_bigendian()) {
2611  		new_val = bitfield_swap(value, 0, 1);
2612  		new_val |= bitfield_swap(value, 1, 1);
2613  		new_val |= bitfield_swap(value, 2, 1);
2614  		new_val |= bitfield_swap(value, 3, 1);
2615  		new_val |= bitfield_swap(value, 4, 16);
2616  		new_val |= bitfield_swap(value, 20, 4);
2617  		new_val |= bitfield_swap(value, 24, 2);
2618  		new_val |= bitfield_swap(value, 26, 4);
2619  		new_val |= bitfield_swap(value, 30, 3);
2620  		new_val |= bitfield_swap(value, 33, 31);
2621  	} else {
2622  		new_val = bitfield_swap(value, 63, 1);
2623  		new_val |= bitfield_swap(value, 62, 1);
2624  		new_val |= bitfield_swap(value, 61, 1);
2625  		new_val |= bitfield_swap(value, 60, 1);
2626  		new_val |= bitfield_swap(value, 44, 16);
2627  		new_val |= bitfield_swap(value, 40, 4);
2628  		new_val |= bitfield_swap(value, 38, 2);
2629  		new_val |= bitfield_swap(value, 34, 4);
2630  		new_val |= bitfield_swap(value, 31, 3);
2631  		new_val |= bitfield_swap(value, 0, 31);
2632  	}
2633  
2634  	return new_val;
2635  }
2636  
evsel__has_branch_counters(const struct evsel * evsel)2637  static inline bool evsel__has_branch_counters(const struct evsel *evsel)
2638  {
2639  	struct evsel *leader = evsel__leader(evsel);
2640  
2641  	/* The branch counters feature only supports group */
2642  	if (!leader || !evsel->evlist)
2643  		return false;
2644  
2645  	if (evsel->evlist->nr_br_cntr < 0)
2646  		evlist__update_br_cntr(evsel->evlist);
2647  
2648  	if (leader->br_cntr_nr > 0)
2649  		return true;
2650  
2651  	return false;
2652  }
2653  
evsel__parse_sample(struct evsel * evsel,union perf_event * event,struct perf_sample * data)2654  int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
2655  			struct perf_sample *data)
2656  {
2657  	u64 type = evsel->core.attr.sample_type;
2658  	bool swapped = evsel->needs_swap;
2659  	const __u64 *array;
2660  	u16 max_size = event->header.size;
2661  	const void *endp = (void *)event + max_size;
2662  	u64 sz;
2663  
2664  	/*
2665  	 * used for cross-endian analysis. See git commit 65014ab3
2666  	 * for why this goofiness is needed.
2667  	 */
2668  	union u64_swap u;
2669  
2670  	memset(data, 0, sizeof(*data));
2671  	data->cpu = data->pid = data->tid = -1;
2672  	data->stream_id = data->id = data->time = -1ULL;
2673  	data->period = evsel->core.attr.sample_period;
2674  	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2675  	data->misc    = event->header.misc;
2676  	data->data_src = PERF_MEM_DATA_SRC_NONE;
2677  	data->vcpu = -1;
2678  
2679  	if (event->header.type != PERF_RECORD_SAMPLE) {
2680  		if (!evsel->core.attr.sample_id_all)
2681  			return 0;
2682  		return perf_evsel__parse_id_sample(evsel, event, data);
2683  	}
2684  
2685  	array = event->sample.array;
2686  
2687  	if (perf_event__check_size(event, evsel->sample_size))
2688  		return -EFAULT;
2689  
2690  	if (type & PERF_SAMPLE_IDENTIFIER) {
2691  		data->id = *array;
2692  		array++;
2693  	}
2694  
2695  	if (type & PERF_SAMPLE_IP) {
2696  		data->ip = *array;
2697  		array++;
2698  	}
2699  
2700  	if (type & PERF_SAMPLE_TID) {
2701  		u.val64 = *array;
2702  		if (swapped) {
2703  			/* undo swap of u64, then swap on individual u32s */
2704  			u.val64 = bswap_64(u.val64);
2705  			u.val32[0] = bswap_32(u.val32[0]);
2706  			u.val32[1] = bswap_32(u.val32[1]);
2707  		}
2708  
2709  		data->pid = u.val32[0];
2710  		data->tid = u.val32[1];
2711  		array++;
2712  	}
2713  
2714  	if (type & PERF_SAMPLE_TIME) {
2715  		data->time = *array;
2716  		array++;
2717  	}
2718  
2719  	if (type & PERF_SAMPLE_ADDR) {
2720  		data->addr = *array;
2721  		array++;
2722  	}
2723  
2724  	if (type & PERF_SAMPLE_ID) {
2725  		data->id = *array;
2726  		array++;
2727  	}
2728  
2729  	if (type & PERF_SAMPLE_STREAM_ID) {
2730  		data->stream_id = *array;
2731  		array++;
2732  	}
2733  
2734  	if (type & PERF_SAMPLE_CPU) {
2735  
2736  		u.val64 = *array;
2737  		if (swapped) {
2738  			/* undo swap of u64, then swap on individual u32s */
2739  			u.val64 = bswap_64(u.val64);
2740  			u.val32[0] = bswap_32(u.val32[0]);
2741  		}
2742  
2743  		data->cpu = u.val32[0];
2744  		array++;
2745  	}
2746  
2747  	if (type & PERF_SAMPLE_PERIOD) {
2748  		data->period = *array;
2749  		array++;
2750  	}
2751  
2752  	if (type & PERF_SAMPLE_READ) {
2753  		u64 read_format = evsel->core.attr.read_format;
2754  
2755  		OVERFLOW_CHECK_u64(array);
2756  		if (read_format & PERF_FORMAT_GROUP)
2757  			data->read.group.nr = *array;
2758  		else
2759  			data->read.one.value = *array;
2760  
2761  		array++;
2762  
2763  		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2764  			OVERFLOW_CHECK_u64(array);
2765  			data->read.time_enabled = *array;
2766  			array++;
2767  		}
2768  
2769  		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2770  			OVERFLOW_CHECK_u64(array);
2771  			data->read.time_running = *array;
2772  			array++;
2773  		}
2774  
2775  		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2776  		if (read_format & PERF_FORMAT_GROUP) {
2777  			const u64 max_group_nr = UINT64_MAX /
2778  					sizeof(struct sample_read_value);
2779  
2780  			if (data->read.group.nr > max_group_nr)
2781  				return -EFAULT;
2782  
2783  			sz = data->read.group.nr * sample_read_value_size(read_format);
2784  			OVERFLOW_CHECK(array, sz, max_size);
2785  			data->read.group.values =
2786  					(struct sample_read_value *)array;
2787  			array = (void *)array + sz;
2788  		} else {
2789  			OVERFLOW_CHECK_u64(array);
2790  			data->read.one.id = *array;
2791  			array++;
2792  
2793  			if (read_format & PERF_FORMAT_LOST) {
2794  				OVERFLOW_CHECK_u64(array);
2795  				data->read.one.lost = *array;
2796  				array++;
2797  			}
2798  		}
2799  	}
2800  
2801  	if (type & PERF_SAMPLE_CALLCHAIN) {
2802  		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2803  
2804  		OVERFLOW_CHECK_u64(array);
2805  		data->callchain = (struct ip_callchain *)array++;
2806  		if (data->callchain->nr > max_callchain_nr)
2807  			return -EFAULT;
2808  		sz = data->callchain->nr * sizeof(u64);
2809  		OVERFLOW_CHECK(array, sz, max_size);
2810  		array = (void *)array + sz;
2811  	}
2812  
2813  	if (type & PERF_SAMPLE_RAW) {
2814  		OVERFLOW_CHECK_u64(array);
2815  		u.val64 = *array;
2816  
2817  		/*
2818  		 * Undo swap of u64, then swap on individual u32s,
2819  		 * get the size of the raw area and undo all of the
2820  		 * swap. The pevent interface handles endianness by
2821  		 * itself.
2822  		 */
2823  		if (swapped) {
2824  			u.val64 = bswap_64(u.val64);
2825  			u.val32[0] = bswap_32(u.val32[0]);
2826  			u.val32[1] = bswap_32(u.val32[1]);
2827  		}
2828  		data->raw_size = u.val32[0];
2829  
2830  		/*
2831  		 * The raw data is aligned on 64bits including the
2832  		 * u32 size, so it's safe to use mem_bswap_64.
2833  		 */
2834  		if (swapped)
2835  			mem_bswap_64((void *) array, data->raw_size);
2836  
2837  		array = (void *)array + sizeof(u32);
2838  
2839  		OVERFLOW_CHECK(array, data->raw_size, max_size);
2840  		data->raw_data = (void *)array;
2841  		array = (void *)array + data->raw_size;
2842  	}
2843  
2844  	if (type & PERF_SAMPLE_BRANCH_STACK) {
2845  		const u64 max_branch_nr = UINT64_MAX /
2846  					  sizeof(struct branch_entry);
2847  		struct branch_entry *e;
2848  		unsigned int i;
2849  
2850  		OVERFLOW_CHECK_u64(array);
2851  		data->branch_stack = (struct branch_stack *)array++;
2852  
2853  		if (data->branch_stack->nr > max_branch_nr)
2854  			return -EFAULT;
2855  
2856  		sz = data->branch_stack->nr * sizeof(struct branch_entry);
2857  		if (evsel__has_branch_hw_idx(evsel)) {
2858  			sz += sizeof(u64);
2859  			e = &data->branch_stack->entries[0];
2860  		} else {
2861  			data->no_hw_idx = true;
2862  			/*
2863  			 * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
2864  			 * only nr and entries[] will be output by kernel.
2865  			 */
2866  			e = (struct branch_entry *)&data->branch_stack->hw_idx;
2867  		}
2868  
2869  		if (swapped) {
2870  			/*
2871  			 * struct branch_flag does not have endian
2872  			 * specific bit field definition. And bswap
2873  			 * will not resolve the issue, since these
2874  			 * are bit fields.
2875  			 *
2876  			 * evsel__bitfield_swap_branch_flags() uses a
2877  			 * bitfield_swap macro to swap the bit position
2878  			 * based on the host endians.
2879  			 */
2880  			for (i = 0; i < data->branch_stack->nr; i++, e++)
2881  				e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
2882  		}
2883  
2884  		OVERFLOW_CHECK(array, sz, max_size);
2885  		array = (void *)array + sz;
2886  
2887  		if (evsel__has_branch_counters(evsel)) {
2888  			data->branch_stack_cntr = (u64 *)array;
2889  			sz = data->branch_stack->nr * sizeof(u64);
2890  
2891  			OVERFLOW_CHECK(array, sz, max_size);
2892  			array = (void *)array + sz;
2893  		}
2894  	}
2895  
2896  	if (type & PERF_SAMPLE_REGS_USER) {
2897  		OVERFLOW_CHECK_u64(array);
2898  		data->user_regs.abi = *array;
2899  		array++;
2900  
2901  		if (data->user_regs.abi) {
2902  			u64 mask = evsel->core.attr.sample_regs_user;
2903  
2904  			sz = hweight64(mask) * sizeof(u64);
2905  			OVERFLOW_CHECK(array, sz, max_size);
2906  			data->user_regs.mask = mask;
2907  			data->user_regs.regs = (u64 *)array;
2908  			array = (void *)array + sz;
2909  		}
2910  	}
2911  
2912  	if (type & PERF_SAMPLE_STACK_USER) {
2913  		OVERFLOW_CHECK_u64(array);
2914  		sz = *array++;
2915  
2916  		data->user_stack.offset = ((char *)(array - 1)
2917  					  - (char *) event);
2918  
2919  		if (!sz) {
2920  			data->user_stack.size = 0;
2921  		} else {
2922  			OVERFLOW_CHECK(array, sz, max_size);
2923  			data->user_stack.data = (char *)array;
2924  			array = (void *)array + sz;
2925  			OVERFLOW_CHECK_u64(array);
2926  			data->user_stack.size = *array++;
2927  			if (WARN_ONCE(data->user_stack.size > sz,
2928  				      "user stack dump failure\n"))
2929  				return -EFAULT;
2930  		}
2931  	}
2932  
2933  	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
2934  		OVERFLOW_CHECK_u64(array);
2935  		arch_perf_parse_sample_weight(data, array, type);
2936  		array++;
2937  	}
2938  
2939  	if (type & PERF_SAMPLE_DATA_SRC) {
2940  		OVERFLOW_CHECK_u64(array);
2941  		data->data_src = *array;
2942  		array++;
2943  	}
2944  
2945  	if (type & PERF_SAMPLE_TRANSACTION) {
2946  		OVERFLOW_CHECK_u64(array);
2947  		data->transaction = *array;
2948  		array++;
2949  	}
2950  
2951  	data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
2952  	if (type & PERF_SAMPLE_REGS_INTR) {
2953  		OVERFLOW_CHECK_u64(array);
2954  		data->intr_regs.abi = *array;
2955  		array++;
2956  
2957  		if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
2958  			u64 mask = evsel->core.attr.sample_regs_intr;
2959  
2960  			sz = hweight64(mask) * sizeof(u64);
2961  			OVERFLOW_CHECK(array, sz, max_size);
2962  			data->intr_regs.mask = mask;
2963  			data->intr_regs.regs = (u64 *)array;
2964  			array = (void *)array + sz;
2965  		}
2966  	}
2967  
2968  	data->phys_addr = 0;
2969  	if (type & PERF_SAMPLE_PHYS_ADDR) {
2970  		data->phys_addr = *array;
2971  		array++;
2972  	}
2973  
2974  	data->cgroup = 0;
2975  	if (type & PERF_SAMPLE_CGROUP) {
2976  		data->cgroup = *array;
2977  		array++;
2978  	}
2979  
2980  	data->data_page_size = 0;
2981  	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
2982  		data->data_page_size = *array;
2983  		array++;
2984  	}
2985  
2986  	data->code_page_size = 0;
2987  	if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
2988  		data->code_page_size = *array;
2989  		array++;
2990  	}
2991  
2992  	if (type & PERF_SAMPLE_AUX) {
2993  		OVERFLOW_CHECK_u64(array);
2994  		sz = *array++;
2995  
2996  		OVERFLOW_CHECK(array, sz, max_size);
2997  		/* Undo swap of data */
2998  		if (swapped)
2999  			mem_bswap_64((char *)array, sz);
3000  		data->aux_sample.size = sz;
3001  		data->aux_sample.data = (char *)array;
3002  		array = (void *)array + sz;
3003  	}
3004  
3005  	return 0;
3006  }
3007  
evsel__parse_sample_timestamp(struct evsel * evsel,union perf_event * event,u64 * timestamp)3008  int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
3009  				  u64 *timestamp)
3010  {
3011  	u64 type = evsel->core.attr.sample_type;
3012  	const __u64 *array;
3013  
3014  	if (!(type & PERF_SAMPLE_TIME))
3015  		return -1;
3016  
3017  	if (event->header.type != PERF_RECORD_SAMPLE) {
3018  		struct perf_sample data = {
3019  			.time = -1ULL,
3020  		};
3021  
3022  		if (!evsel->core.attr.sample_id_all)
3023  			return -1;
3024  		if (perf_evsel__parse_id_sample(evsel, event, &data))
3025  			return -1;
3026  
3027  		*timestamp = data.time;
3028  		return 0;
3029  	}
3030  
3031  	array = event->sample.array;
3032  
3033  	if (perf_event__check_size(event, evsel->sample_size))
3034  		return -EFAULT;
3035  
3036  	if (type & PERF_SAMPLE_IDENTIFIER)
3037  		array++;
3038  
3039  	if (type & PERF_SAMPLE_IP)
3040  		array++;
3041  
3042  	if (type & PERF_SAMPLE_TID)
3043  		array++;
3044  
3045  	if (type & PERF_SAMPLE_TIME)
3046  		*timestamp = *array;
3047  
3048  	return 0;
3049  }
3050  
evsel__id_hdr_size(const struct evsel * evsel)3051  u16 evsel__id_hdr_size(const struct evsel *evsel)
3052  {
3053  	u64 sample_type = evsel->core.attr.sample_type;
3054  	u16 size = 0;
3055  
3056  	if (sample_type & PERF_SAMPLE_TID)
3057  		size += sizeof(u64);
3058  
3059  	if (sample_type & PERF_SAMPLE_TIME)
3060  		size += sizeof(u64);
3061  
3062  	if (sample_type & PERF_SAMPLE_ID)
3063  		size += sizeof(u64);
3064  
3065  	if (sample_type & PERF_SAMPLE_STREAM_ID)
3066  		size += sizeof(u64);
3067  
3068  	if (sample_type & PERF_SAMPLE_CPU)
3069  		size += sizeof(u64);
3070  
3071  	if (sample_type & PERF_SAMPLE_IDENTIFIER)
3072  		size += sizeof(u64);
3073  
3074  	return size;
3075  }
3076  
3077  #ifdef HAVE_LIBTRACEEVENT
evsel__field(struct evsel * evsel,const char * name)3078  struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
3079  {
3080  	return tep_find_field(evsel->tp_format, name);
3081  }
3082  
evsel__common_field(struct evsel * evsel,const char * name)3083  struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name)
3084  {
3085  	return tep_find_common_field(evsel->tp_format, name);
3086  }
3087  
evsel__rawptr(struct evsel * evsel,struct perf_sample * sample,const char * name)3088  void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
3089  {
3090  	struct tep_format_field *field = evsel__field(evsel, name);
3091  	int offset;
3092  
3093  	if (!field)
3094  		return NULL;
3095  
3096  	offset = field->offset;
3097  
3098  	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
3099  		offset = *(int *)(sample->raw_data + field->offset);
3100  		offset &= 0xffff;
3101  		if (tep_field_is_relative(field->flags))
3102  			offset += field->offset + field->size;
3103  	}
3104  
3105  	return sample->raw_data + offset;
3106  }
3107  
format_field__intval(struct tep_format_field * field,struct perf_sample * sample,bool needs_swap)3108  u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
3109  			 bool needs_swap)
3110  {
3111  	u64 value;
3112  	void *ptr = sample->raw_data + field->offset;
3113  
3114  	switch (field->size) {
3115  	case 1:
3116  		return *(u8 *)ptr;
3117  	case 2:
3118  		value = *(u16 *)ptr;
3119  		break;
3120  	case 4:
3121  		value = *(u32 *)ptr;
3122  		break;
3123  	case 8:
3124  		memcpy(&value, ptr, sizeof(u64));
3125  		break;
3126  	default:
3127  		return 0;
3128  	}
3129  
3130  	if (!needs_swap)
3131  		return value;
3132  
3133  	switch (field->size) {
3134  	case 2:
3135  		return bswap_16(value);
3136  	case 4:
3137  		return bswap_32(value);
3138  	case 8:
3139  		return bswap_64(value);
3140  	default:
3141  		return 0;
3142  	}
3143  
3144  	return 0;
3145  }
3146  
evsel__intval(struct evsel * evsel,struct perf_sample * sample,const char * name)3147  u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
3148  {
3149  	struct tep_format_field *field = evsel__field(evsel, name);
3150  
3151  	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
3152  }
3153  
evsel__intval_common(struct evsel * evsel,struct perf_sample * sample,const char * name)3154  u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const char *name)
3155  {
3156  	struct tep_format_field *field = evsel__common_field(evsel, name);
3157  
3158  	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
3159  }
3160  
evsel__taskstate(struct evsel * evsel,struct perf_sample * sample,const char * name)3161  char evsel__taskstate(struct evsel *evsel, struct perf_sample *sample, const char *name)
3162  {
3163  	static struct tep_format_field *prev_state_field;
3164  	static const char *states;
3165  	struct tep_format_field *field;
3166  	unsigned long long val;
3167  	unsigned int bit;
3168  	char state = '?'; /* '?' denotes unknown task state */
3169  
3170  	field = evsel__field(evsel, name);
3171  
3172  	if (!field)
3173  		return state;
3174  
3175  	if (!states || field != prev_state_field) {
3176  		states = parse_task_states(field);
3177  		if (!states)
3178  			return state;
3179  		prev_state_field = field;
3180  	}
3181  
3182  	/*
3183  	 * Note since the kernel exposes TASK_REPORT_MAX to userspace
3184  	 * to denote the 'preempted' state, we might as welll report
3185  	 * 'R' for this case, which make senses to users as well.
3186  	 *
3187  	 * We can change this if we have a good reason in the future.
3188  	 */
3189  	val = evsel__intval(evsel, sample, name);
3190  	bit = val ? ffs(val) : 0;
3191  	state = (!bit || bit > strlen(states)) ? 'R' : states[bit-1];
3192  	return state;
3193  }
3194  #endif
3195  
evsel__fallback(struct evsel * evsel,struct target * target,int err,char * msg,size_t msgsize)3196  bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
3197  		     char *msg, size_t msgsize)
3198  {
3199  	int paranoid;
3200  
3201  	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
3202  	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
3203  	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
3204  		/*
3205  		 * If it's cycles then fall back to hrtimer based cpu-clock sw
3206  		 * counter, which is always available even if no PMU support.
3207  		 *
3208  		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
3209  		 * b0a873e).
3210  		 */
3211  		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
3212  		evsel->core.attr.config = target__has_cpu(target)
3213  			? PERF_COUNT_SW_CPU_CLOCK
3214  			: PERF_COUNT_SW_TASK_CLOCK;
3215  		scnprintf(msg, msgsize,
3216  			"The cycles event is not supported, trying to fall back to %s",
3217  			target__has_cpu(target) ? "cpu-clock" : "task-clock");
3218  
3219  		zfree(&evsel->name);
3220  		return true;
3221  	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
3222  		   (paranoid = perf_event_paranoid()) > 1) {
3223  		const char *name = evsel__name(evsel);
3224  		char *new_name;
3225  		const char *sep = ":";
3226  
3227  		/* If event has exclude user then don't exclude kernel. */
3228  		if (evsel->core.attr.exclude_user)
3229  			return false;
3230  
3231  		/* Is there already the separator in the name. */
3232  		if (strchr(name, '/') ||
3233  		    (strchr(name, ':') && !evsel->is_libpfm_event))
3234  			sep = "";
3235  
3236  		if (asprintf(&new_name, "%s%su", name, sep) < 0)
3237  			return false;
3238  
3239  		free(evsel->name);
3240  		evsel->name = new_name;
3241  		scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
3242  			  "to fall back to excluding kernel and hypervisor "
3243  			  " samples", paranoid);
3244  		evsel->core.attr.exclude_kernel = 1;
3245  		evsel->core.attr.exclude_hv     = 1;
3246  
3247  		return true;
3248  	}
3249  
3250  	return false;
3251  }
3252  
find_process(const char * name)3253  static bool find_process(const char *name)
3254  {
3255  	size_t len = strlen(name);
3256  	DIR *dir;
3257  	struct dirent *d;
3258  	int ret = -1;
3259  
3260  	dir = opendir(procfs__mountpoint());
3261  	if (!dir)
3262  		return false;
3263  
3264  	/* Walk through the directory. */
3265  	while (ret && (d = readdir(dir)) != NULL) {
3266  		char path[PATH_MAX];
3267  		char *data;
3268  		size_t size;
3269  
3270  		if ((d->d_type != DT_DIR) ||
3271  		     !strcmp(".", d->d_name) ||
3272  		     !strcmp("..", d->d_name))
3273  			continue;
3274  
3275  		scnprintf(path, sizeof(path), "%s/%s/comm",
3276  			  procfs__mountpoint(), d->d_name);
3277  
3278  		if (filename__read_str(path, &data, &size))
3279  			continue;
3280  
3281  		ret = strncmp(name, data, len);
3282  		free(data);
3283  	}
3284  
3285  	closedir(dir);
3286  	return ret ? false : true;
3287  }
3288  
arch_evsel__open_strerror(struct evsel * evsel __maybe_unused,char * msg __maybe_unused,size_t size __maybe_unused)3289  int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
3290  				     char *msg __maybe_unused,
3291  				     size_t size __maybe_unused)
3292  {
3293  	return 0;
3294  }
3295  
evsel__open_strerror(struct evsel * evsel,struct target * target,int err,char * msg,size_t size)3296  int evsel__open_strerror(struct evsel *evsel, struct target *target,
3297  			 int err, char *msg, size_t size)
3298  {
3299  	char sbuf[STRERR_BUFSIZE];
3300  	int printed = 0, enforced = 0;
3301  	int ret;
3302  
3303  	switch (err) {
3304  	case EPERM:
3305  	case EACCES:
3306  		printed += scnprintf(msg + printed, size - printed,
3307  			"Access to performance monitoring and observability operations is limited.\n");
3308  
3309  		if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
3310  			if (enforced) {
3311  				printed += scnprintf(msg + printed, size - printed,
3312  					"Enforced MAC policy settings (SELinux) can limit access to performance\n"
3313  					"monitoring and observability operations. Inspect system audit records for\n"
3314  					"more perf_event access control information and adjusting the policy.\n");
3315  			}
3316  		}
3317  
3318  		if (err == EPERM)
3319  			printed += scnprintf(msg, size,
3320  				"No permission to enable %s event.\n\n", evsel__name(evsel));
3321  
3322  		return scnprintf(msg + printed, size - printed,
3323  		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
3324  		 "access to performance monitoring and observability operations for processes\n"
3325  		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
3326  		 "More information can be found at 'Perf events and tool security' document:\n"
3327  		 "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
3328  		 "perf_event_paranoid setting is %d:\n"
3329  		 "  -1: Allow use of (almost) all events by all users\n"
3330  		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
3331  		 ">= 0: Disallow raw and ftrace function tracepoint access\n"
3332  		 ">= 1: Disallow CPU event access\n"
3333  		 ">= 2: Disallow kernel profiling\n"
3334  		 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
3335  		 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
3336  		 perf_event_paranoid());
3337  	case ENOENT:
3338  		return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
3339  	case EMFILE:
3340  		return scnprintf(msg, size, "%s",
3341  			 "Too many events are opened.\n"
3342  			 "Probably the maximum number of open file descriptors has been reached.\n"
3343  			 "Hint: Try again after reducing the number of events.\n"
3344  			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
3345  	case ENOMEM:
3346  		if (evsel__has_callchain(evsel) &&
3347  		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
3348  			return scnprintf(msg, size,
3349  					 "Not enough memory to setup event with callchain.\n"
3350  					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
3351  					 "Hint: Current value: %d", sysctl__max_stack());
3352  		break;
3353  	case ENODEV:
3354  		if (target->cpu_list)
3355  			return scnprintf(msg, size, "%s",
3356  	 "No such device - did you specify an out-of-range profile CPU?");
3357  		break;
3358  	case EOPNOTSUPP:
3359  		if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
3360  			return scnprintf(msg, size,
3361  	"%s: PMU Hardware or event type doesn't support branch stack sampling.",
3362  					 evsel__name(evsel));
3363  		if (evsel->core.attr.aux_output)
3364  			return scnprintf(msg, size,
3365  	"%s: PMU Hardware doesn't support 'aux_output' feature",
3366  					 evsel__name(evsel));
3367  		if (evsel->core.attr.sample_period != 0)
3368  			return scnprintf(msg, size,
3369  	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
3370  					 evsel__name(evsel));
3371  		if (evsel->core.attr.precise_ip)
3372  			return scnprintf(msg, size, "%s",
3373  	"\'precise\' request may not be supported. Try removing 'p' modifier.");
3374  #if defined(__i386__) || defined(__x86_64__)
3375  		if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
3376  			return scnprintf(msg, size, "%s",
3377  	"No hardware sampling interrupt available.\n");
3378  #endif
3379  		break;
3380  	case EBUSY:
3381  		if (find_process("oprofiled"))
3382  			return scnprintf(msg, size,
3383  	"The PMU counters are busy/taken by another profiler.\n"
3384  	"We found oprofile daemon running, please stop it and try again.");
3385  		break;
3386  	case EINVAL:
3387  		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
3388  			return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
3389  		if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
3390  			return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
3391  		if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
3392  			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
3393  		if (perf_missing_features.clockid)
3394  			return scnprintf(msg, size, "clockid feature not supported.");
3395  		if (perf_missing_features.clockid_wrong)
3396  			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
3397  		if (perf_missing_features.aux_output)
3398  			return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
3399  		if (!target__has_cpu(target))
3400  			return scnprintf(msg, size,
3401  	"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
3402  					evsel__name(evsel));
3403  
3404  		break;
3405  	case ENODATA:
3406  		return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
3407  				 "Please add an auxiliary event in front of the load latency event.");
3408  	default:
3409  		break;
3410  	}
3411  
3412  	ret = arch_evsel__open_strerror(evsel, msg, size);
3413  	if (ret)
3414  		return ret;
3415  
3416  	return scnprintf(msg, size,
3417  	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
3418  	"/bin/dmesg | grep -i perf may provide additional information.\n",
3419  			 err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
3420  }
3421  
evsel__env(struct evsel * evsel)3422  struct perf_env *evsel__env(struct evsel *evsel)
3423  {
3424  	if (evsel && evsel->evlist && evsel->evlist->env)
3425  		return evsel->evlist->env;
3426  	return &perf_env;
3427  }
3428  
store_evsel_ids(struct evsel * evsel,struct evlist * evlist)3429  static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
3430  {
3431  	int cpu_map_idx, thread;
3432  
3433  	if (evsel__is_retire_lat(evsel))
3434  		return 0;
3435  
3436  	for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
3437  		for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
3438  		     thread++) {
3439  			int fd = FD(evsel, cpu_map_idx, thread);
3440  
3441  			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
3442  						   cpu_map_idx, thread, fd) < 0)
3443  				return -1;
3444  		}
3445  	}
3446  
3447  	return 0;
3448  }
3449  
evsel__store_ids(struct evsel * evsel,struct evlist * evlist)3450  int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
3451  {
3452  	struct perf_cpu_map *cpus = evsel->core.cpus;
3453  	struct perf_thread_map *threads = evsel->core.threads;
3454  
3455  	if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
3456  		return -ENOMEM;
3457  
3458  	return store_evsel_ids(evsel, evlist);
3459  }
3460  
evsel__zero_per_pkg(struct evsel * evsel)3461  void evsel__zero_per_pkg(struct evsel *evsel)
3462  {
3463  	struct hashmap_entry *cur;
3464  	size_t bkt;
3465  
3466  	if (evsel->per_pkg_mask) {
3467  		hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
3468  			zfree(&cur->pkey);
3469  
3470  		hashmap__clear(evsel->per_pkg_mask);
3471  	}
3472  }
3473  
3474  /**
3475   * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this
3476   *                    will be false on hybrid systems for hardware and legacy
3477   *                    cache events.
3478   */
evsel__is_hybrid(const struct evsel * evsel)3479  bool evsel__is_hybrid(const struct evsel *evsel)
3480  {
3481  	if (perf_pmus__num_core_pmus() == 1)
3482  		return false;
3483  
3484  	return evsel->core.is_pmu_core;
3485  }
3486  
evsel__leader(const struct evsel * evsel)3487  struct evsel *evsel__leader(const struct evsel *evsel)
3488  {
3489  	return container_of(evsel->core.leader, struct evsel, core);
3490  }
3491  
evsel__has_leader(struct evsel * evsel,struct evsel * leader)3492  bool evsel__has_leader(struct evsel *evsel, struct evsel *leader)
3493  {
3494  	return evsel->core.leader == &leader->core;
3495  }
3496  
evsel__is_leader(struct evsel * evsel)3497  bool evsel__is_leader(struct evsel *evsel)
3498  {
3499  	return evsel__has_leader(evsel, evsel);
3500  }
3501  
evsel__set_leader(struct evsel * evsel,struct evsel * leader)3502  void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
3503  {
3504  	evsel->core.leader = &leader->core;
3505  }
3506  
evsel__source_count(const struct evsel * evsel)3507  int evsel__source_count(const struct evsel *evsel)
3508  {
3509  	struct evsel *pos;
3510  	int count = 0;
3511  
3512  	evlist__for_each_entry(evsel->evlist, pos) {
3513  		if (pos->metric_leader == evsel)
3514  			count++;
3515  	}
3516  	return count;
3517  }
3518  
arch_evsel__must_be_in_group(const struct evsel * evsel __maybe_unused)3519  bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
3520  {
3521  	return false;
3522  }
3523  
3524  /*
3525   * Remove an event from a given group (leader).
3526   * Some events, e.g., perf metrics Topdown events,
3527   * must always be grouped. Ignore the events.
3528   */
evsel__remove_from_group(struct evsel * evsel,struct evsel * leader)3529  void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
3530  {
3531  	if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
3532  		evsel__set_leader(evsel, evsel);
3533  		evsel->core.nr_members = 0;
3534  		leader->core.nr_members--;
3535  	}
3536  }
3537