1  // SPDX-License-Identifier: GPL-2.0
2  #include <sys/mman.h>
3  #include <inttypes.h>
4  #include <asm/bug.h>
5  #include <errno.h>
6  #include <string.h>
7  #include <linux/ring_buffer.h>
8  #include <linux/perf_event.h>
9  #include <perf/mmap.h>
10  #include <perf/event.h>
11  #include <perf/evsel.h>
12  #include <internal/mmap.h>
13  #include <internal/lib.h>
14  #include <linux/kernel.h>
15  #include <linux/math64.h>
16  #include <linux/stringify.h>
17  #include "internal.h"
18  
perf_mmap__init(struct perf_mmap * map,struct perf_mmap * prev,bool overwrite,libperf_unmap_cb_t unmap_cb)19  void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
20  		     bool overwrite, libperf_unmap_cb_t unmap_cb)
21  {
22  	/* Assume fields were zero initialized. */
23  	map->fd = -1;
24  	map->overwrite = overwrite;
25  	map->unmap_cb  = unmap_cb;
26  	refcount_set(&map->refcnt, 0);
27  	if (prev)
28  		prev->next = map;
29  }
30  
perf_mmap__mmap_len(struct perf_mmap * map)31  size_t perf_mmap__mmap_len(struct perf_mmap *map)
32  {
33  	return map->mask + 1 + page_size;
34  }
35  
perf_mmap__mmap(struct perf_mmap * map,struct perf_mmap_param * mp,int fd,struct perf_cpu cpu)36  int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
37  		    int fd, struct perf_cpu cpu)
38  {
39  	map->prev = 0;
40  	map->mask = mp->mask;
41  	map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
42  			 MAP_SHARED, fd, 0);
43  	if (map->base == MAP_FAILED) {
44  		map->base = NULL;
45  		return -1;
46  	}
47  
48  	map->fd  = fd;
49  	map->cpu = cpu;
50  	return 0;
51  }
52  
perf_mmap__munmap(struct perf_mmap * map)53  void perf_mmap__munmap(struct perf_mmap *map)
54  {
55  	if (!map)
56  		return;
57  
58  	zfree(&map->event_copy);
59  	map->event_copy_sz = 0;
60  	if (map->base) {
61  		munmap(map->base, perf_mmap__mmap_len(map));
62  		map->base = NULL;
63  		map->fd = -1;
64  		refcount_set(&map->refcnt, 0);
65  	}
66  	if (map->unmap_cb)
67  		map->unmap_cb(map);
68  }
69  
perf_mmap__get(struct perf_mmap * map)70  void perf_mmap__get(struct perf_mmap *map)
71  {
72  	refcount_inc(&map->refcnt);
73  }
74  
perf_mmap__put(struct perf_mmap * map)75  void perf_mmap__put(struct perf_mmap *map)
76  {
77  	BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
78  
79  	if (refcount_dec_and_test(&map->refcnt))
80  		perf_mmap__munmap(map);
81  }
82  
perf_mmap__write_tail(struct perf_mmap * md,u64 tail)83  static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
84  {
85  	ring_buffer_write_tail(md->base, tail);
86  }
87  
perf_mmap__read_head(struct perf_mmap * map)88  u64 perf_mmap__read_head(struct perf_mmap *map)
89  {
90  	return ring_buffer_read_head(map->base);
91  }
92  
perf_mmap__empty(struct perf_mmap * map)93  static bool perf_mmap__empty(struct perf_mmap *map)
94  {
95  	struct perf_event_mmap_page *pc = map->base;
96  
97  	return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
98  }
99  
perf_mmap__consume(struct perf_mmap * map)100  void perf_mmap__consume(struct perf_mmap *map)
101  {
102  	if (!map->overwrite) {
103  		u64 old = map->prev;
104  
105  		perf_mmap__write_tail(map, old);
106  	}
107  
108  	if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
109  		perf_mmap__put(map);
110  }
111  
overwrite_rb_find_range(void * buf,int mask,u64 * start,u64 * end)112  static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
113  {
114  	struct perf_event_header *pheader;
115  	u64 evt_head = *start;
116  	int size = mask + 1;
117  
118  	pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
119  	pheader = (struct perf_event_header *)(buf + (*start & mask));
120  	while (true) {
121  		if (evt_head - *start >= (unsigned int)size) {
122  			pr_debug("Finished reading overwrite ring buffer: rewind\n");
123  			if (evt_head - *start > (unsigned int)size)
124  				evt_head -= pheader->size;
125  			*end = evt_head;
126  			return 0;
127  		}
128  
129  		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
130  
131  		if (pheader->size == 0) {
132  			pr_debug("Finished reading overwrite ring buffer: get start\n");
133  			*end = evt_head;
134  			return 0;
135  		}
136  
137  		evt_head += pheader->size;
138  		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
139  	}
140  	WARN_ONCE(1, "Shouldn't get here\n");
141  	return -1;
142  }
143  
144  /*
145   * Report the start and end of the available data in ringbuffer
146   */
__perf_mmap__read_init(struct perf_mmap * md)147  static int __perf_mmap__read_init(struct perf_mmap *md)
148  {
149  	u64 head = perf_mmap__read_head(md);
150  	u64 old = md->prev;
151  	unsigned char *data = md->base + page_size;
152  	unsigned long size;
153  
154  	md->start = md->overwrite ? head : old;
155  	md->end = md->overwrite ? old : head;
156  
157  	if ((md->end - md->start) < md->flush)
158  		return -EAGAIN;
159  
160  	size = md->end - md->start;
161  	if (size > (unsigned long)(md->mask) + 1) {
162  		if (!md->overwrite) {
163  			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
164  
165  			md->prev = head;
166  			perf_mmap__consume(md);
167  			return -EAGAIN;
168  		}
169  
170  		/*
171  		 * Backward ring buffer is full. We still have a chance to read
172  		 * most of data from it.
173  		 */
174  		if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
175  			return -EINVAL;
176  	}
177  
178  	return 0;
179  }
180  
perf_mmap__read_init(struct perf_mmap * map)181  int perf_mmap__read_init(struct perf_mmap *map)
182  {
183  	/*
184  	 * Check if event was unmapped due to a POLLHUP/POLLERR.
185  	 */
186  	if (!refcount_read(&map->refcnt))
187  		return -ENOENT;
188  
189  	return __perf_mmap__read_init(map);
190  }
191  
192  /*
193   * Mandatory for overwrite mode
194   * The direction of overwrite mode is backward.
195   * The last perf_mmap__read() will set tail to map->core.prev.
196   * Need to correct the map->core.prev to head which is the end of next read.
197   */
perf_mmap__read_done(struct perf_mmap * map)198  void perf_mmap__read_done(struct perf_mmap *map)
199  {
200  	/*
201  	 * Check if event was unmapped due to a POLLHUP/POLLERR.
202  	 */
203  	if (!refcount_read(&map->refcnt))
204  		return;
205  
206  	map->prev = perf_mmap__read_head(map);
207  }
208  
209  /* When check_messup is true, 'end' must points to a good entry */
perf_mmap__read(struct perf_mmap * map,u64 * startp,u64 end)210  static union perf_event *perf_mmap__read(struct perf_mmap *map,
211  					 u64 *startp, u64 end)
212  {
213  	unsigned char *data = map->base + page_size;
214  	union perf_event *event = NULL;
215  	int diff = end - *startp;
216  
217  	if (diff >= (int)sizeof(event->header)) {
218  		size_t size;
219  
220  		event = (union perf_event *)&data[*startp & map->mask];
221  		size = event->header.size;
222  
223  		if (size < sizeof(event->header) || diff < (int)size)
224  			return NULL;
225  
226  		/*
227  		 * Event straddles the mmap boundary -- header should always
228  		 * be inside due to u64 alignment of output.
229  		 */
230  		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
231  			unsigned int offset = *startp;
232  			unsigned int len = size, cpy;
233  			void *dst = map->event_copy;
234  
235  			if (size > map->event_copy_sz) {
236  				dst = realloc(map->event_copy, size);
237  				if (!dst)
238  					return NULL;
239  				map->event_copy = dst;
240  				map->event_copy_sz = size;
241  			}
242  
243  			do {
244  				cpy = min(map->mask + 1 - (offset & map->mask), len);
245  				memcpy(dst, &data[offset & map->mask], cpy);
246  				offset += cpy;
247  				dst += cpy;
248  				len -= cpy;
249  			} while (len);
250  
251  			event = (union perf_event *)map->event_copy;
252  		}
253  
254  		*startp += size;
255  	}
256  
257  	return event;
258  }
259  
260  /*
261   * Read event from ring buffer one by one.
262   * Return one event for each call.
263   *
264   * Usage:
265   * perf_mmap__read_init()
266   * while(event = perf_mmap__read_event()) {
267   *	//process the event
268   *	perf_mmap__consume()
269   * }
270   * perf_mmap__read_done()
271   */
perf_mmap__read_event(struct perf_mmap * map)272  union perf_event *perf_mmap__read_event(struct perf_mmap *map)
273  {
274  	union perf_event *event;
275  
276  	/*
277  	 * Check if event was unmapped due to a POLLHUP/POLLERR.
278  	 */
279  	if (!refcount_read(&map->refcnt))
280  		return NULL;
281  
282  	/* non-overwrite doesn't pause the ringbuffer */
283  	if (!map->overwrite)
284  		map->end = perf_mmap__read_head(map);
285  
286  	event = perf_mmap__read(map, &map->start, map->end);
287  
288  	if (!map->overwrite)
289  		map->prev = map->start;
290  
291  	return event;
292  }
293  
294  #if defined(__i386__) || defined(__x86_64__)
read_perf_counter(unsigned int counter)295  static u64 read_perf_counter(unsigned int counter)
296  {
297  	unsigned int low, high;
298  
299  	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
300  
301  	return low | ((u64)high) << 32;
302  }
303  
read_timestamp(void)304  static u64 read_timestamp(void)
305  {
306  	unsigned int low, high;
307  
308  	asm volatile("rdtsc" : "=a" (low), "=d" (high));
309  
310  	return low | ((u64)high) << 32;
311  }
312  #elif defined(__aarch64__)
313  #define read_sysreg(r) ({						\
314  	u64 __val;							\
315  	asm volatile("mrs %0, " __stringify(r) : "=r" (__val));		\
316  	__val;								\
317  })
318  
read_pmccntr(void)319  static u64 read_pmccntr(void)
320  {
321  	return read_sysreg(pmccntr_el0);
322  }
323  
324  #define PMEVCNTR_READ(idx)					\
325  	static u64 read_pmevcntr_##idx(void) {			\
326  		return read_sysreg(pmevcntr##idx##_el0);	\
327  	}
328  
329  PMEVCNTR_READ(0);
330  PMEVCNTR_READ(1);
331  PMEVCNTR_READ(2);
332  PMEVCNTR_READ(3);
333  PMEVCNTR_READ(4);
334  PMEVCNTR_READ(5);
335  PMEVCNTR_READ(6);
336  PMEVCNTR_READ(7);
337  PMEVCNTR_READ(8);
338  PMEVCNTR_READ(9);
339  PMEVCNTR_READ(10);
340  PMEVCNTR_READ(11);
341  PMEVCNTR_READ(12);
342  PMEVCNTR_READ(13);
343  PMEVCNTR_READ(14);
344  PMEVCNTR_READ(15);
345  PMEVCNTR_READ(16);
346  PMEVCNTR_READ(17);
347  PMEVCNTR_READ(18);
348  PMEVCNTR_READ(19);
349  PMEVCNTR_READ(20);
350  PMEVCNTR_READ(21);
351  PMEVCNTR_READ(22);
352  PMEVCNTR_READ(23);
353  PMEVCNTR_READ(24);
354  PMEVCNTR_READ(25);
355  PMEVCNTR_READ(26);
356  PMEVCNTR_READ(27);
357  PMEVCNTR_READ(28);
358  PMEVCNTR_READ(29);
359  PMEVCNTR_READ(30);
360  
361  /*
362   * Read a value direct from PMEVCNTR<idx>
363   */
read_perf_counter(unsigned int counter)364  static u64 read_perf_counter(unsigned int counter)
365  {
366  	static u64 (* const read_f[])(void) = {
367  		read_pmevcntr_0,
368  		read_pmevcntr_1,
369  		read_pmevcntr_2,
370  		read_pmevcntr_3,
371  		read_pmevcntr_4,
372  		read_pmevcntr_5,
373  		read_pmevcntr_6,
374  		read_pmevcntr_7,
375  		read_pmevcntr_8,
376  		read_pmevcntr_9,
377  		read_pmevcntr_10,
378  		read_pmevcntr_11,
379  		read_pmevcntr_13,
380  		read_pmevcntr_12,
381  		read_pmevcntr_14,
382  		read_pmevcntr_15,
383  		read_pmevcntr_16,
384  		read_pmevcntr_17,
385  		read_pmevcntr_18,
386  		read_pmevcntr_19,
387  		read_pmevcntr_20,
388  		read_pmevcntr_21,
389  		read_pmevcntr_22,
390  		read_pmevcntr_23,
391  		read_pmevcntr_24,
392  		read_pmevcntr_25,
393  		read_pmevcntr_26,
394  		read_pmevcntr_27,
395  		read_pmevcntr_28,
396  		read_pmevcntr_29,
397  		read_pmevcntr_30,
398  		read_pmccntr
399  	};
400  
401  	if (counter < ARRAY_SIZE(read_f))
402  		return (read_f[counter])();
403  
404  	return 0;
405  }
406  
read_timestamp(void)407  static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
408  
409  /* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
410  #elif defined(__riscv) && __riscv_xlen == 64
411  
412  /* TODO: implement rv32 support */
413  
414  #define CSR_CYCLE	0xc00
415  #define CSR_TIME	0xc01
416  
417  #define csr_read(csr)						\
418  ({								\
419  	register unsigned long __v;				\
420  		__asm__ __volatile__ ("csrr %0, %1"		\
421  		 : "=r" (__v)					\
422  		 : "i" (csr) : );				\
423  		 __v;						\
424  })
425  
csr_read_num(int csr_num)426  static unsigned long csr_read_num(int csr_num)
427  {
428  #define switchcase_csr_read(__csr_num, __val)           {\
429  	case __csr_num:                                 \
430  		__val = csr_read(__csr_num);            \
431  		break; }
432  #define switchcase_csr_read_2(__csr_num, __val)         {\
433  	switchcase_csr_read(__csr_num + 0, __val)        \
434  	switchcase_csr_read(__csr_num + 1, __val)}
435  #define switchcase_csr_read_4(__csr_num, __val)         {\
436  	switchcase_csr_read_2(__csr_num + 0, __val)      \
437  	switchcase_csr_read_2(__csr_num + 2, __val)}
438  #define switchcase_csr_read_8(__csr_num, __val)         {\
439  	switchcase_csr_read_4(__csr_num + 0, __val)      \
440  	switchcase_csr_read_4(__csr_num + 4, __val)}
441  #define switchcase_csr_read_16(__csr_num, __val)        {\
442  	switchcase_csr_read_8(__csr_num + 0, __val)      \
443  	switchcase_csr_read_8(__csr_num + 8, __val)}
444  #define switchcase_csr_read_32(__csr_num, __val)        {\
445  	switchcase_csr_read_16(__csr_num + 0, __val)     \
446  	switchcase_csr_read_16(__csr_num + 16, __val)}
447  
448  	unsigned long ret = 0;
449  
450  	switch (csr_num) {
451  	switchcase_csr_read_32(CSR_CYCLE, ret)
452  	default:
453  		break;
454  	}
455  
456  	return ret;
457  #undef switchcase_csr_read_32
458  #undef switchcase_csr_read_16
459  #undef switchcase_csr_read_8
460  #undef switchcase_csr_read_4
461  #undef switchcase_csr_read_2
462  #undef switchcase_csr_read
463  }
464  
read_perf_counter(unsigned int counter)465  static u64 read_perf_counter(unsigned int counter)
466  {
467  	return csr_read_num(CSR_CYCLE + counter);
468  }
469  
read_timestamp(void)470  static u64 read_timestamp(void)
471  {
472  	return csr_read_num(CSR_TIME);
473  }
474  
475  #else
read_perf_counter(unsigned int counter __maybe_unused)476  static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
read_timestamp(void)477  static u64 read_timestamp(void) { return 0; }
478  #endif
479  
perf_mmap__read_self(struct perf_mmap * map,struct perf_counts_values * count)480  int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
481  {
482  	struct perf_event_mmap_page *pc = map->base;
483  	u32 seq, idx, time_mult = 0, time_shift = 0;
484  	u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
485  
486  	if (!pc || !pc->cap_user_rdpmc)
487  		return -1;
488  
489  	do {
490  		seq = READ_ONCE(pc->lock);
491  		barrier();
492  
493  		count->ena = READ_ONCE(pc->time_enabled);
494  		count->run = READ_ONCE(pc->time_running);
495  
496  		if (pc->cap_user_time && count->ena != count->run) {
497  			cyc = read_timestamp();
498  			time_mult = READ_ONCE(pc->time_mult);
499  			time_shift = READ_ONCE(pc->time_shift);
500  			time_offset = READ_ONCE(pc->time_offset);
501  
502  			if (pc->cap_user_time_short) {
503  				time_cycles = READ_ONCE(pc->time_cycles);
504  				time_mask = READ_ONCE(pc->time_mask);
505  			}
506  		}
507  
508  		idx = READ_ONCE(pc->index);
509  		cnt = READ_ONCE(pc->offset);
510  		if (pc->cap_user_rdpmc && idx) {
511  			s64 evcnt = read_perf_counter(idx - 1);
512  			u16 width = READ_ONCE(pc->pmc_width);
513  
514  			evcnt <<= 64 - width;
515  			evcnt >>= 64 - width;
516  			cnt += evcnt;
517  		} else
518  			return -1;
519  
520  		barrier();
521  	} while (READ_ONCE(pc->lock) != seq);
522  
523  	if (count->ena != count->run) {
524  		u64 delta;
525  
526  		/* Adjust for cap_usr_time_short, a nop if not */
527  		cyc = time_cycles + ((cyc - time_cycles) & time_mask);
528  
529  		delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
530  
531  		count->ena += delta;
532  		if (idx)
533  			count->run += delta;
534  	}
535  
536  	count->val = cnt;
537  
538  	return 0;
539  }
540