1 // SPDX-License-Identifier: GPL-2.0
2 #include <sys/mman.h>
3 #include <inttypes.h>
4 #include <asm/bug.h>
5 #include <errno.h>
6 #include <string.h>
7 #include <linux/ring_buffer.h>
8 #include <linux/perf_event.h>
9 #include <perf/mmap.h>
10 #include <perf/event.h>
11 #include <perf/evsel.h>
12 #include <internal/mmap.h>
13 #include <internal/lib.h>
14 #include <linux/kernel.h>
15 #include <linux/math64.h>
16 #include "internal.h"
17 
perf_mmap__init(struct perf_mmap * map,struct perf_mmap * prev,bool overwrite,libperf_unmap_cb_t unmap_cb)18 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
19 		     bool overwrite, libperf_unmap_cb_t unmap_cb)
20 {
21 	map->fd = -1;
22 	map->overwrite = overwrite;
23 	map->unmap_cb  = unmap_cb;
24 	refcount_set(&map->refcnt, 0);
25 	if (prev)
26 		prev->next = map;
27 }
28 
perf_mmap__mmap_len(struct perf_mmap * map)29 size_t perf_mmap__mmap_len(struct perf_mmap *map)
30 {
31 	return map->mask + 1 + page_size;
32 }
33 
perf_mmap__mmap(struct perf_mmap * map,struct perf_mmap_param * mp,int fd,int cpu)34 int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
35 		    int fd, int cpu)
36 {
37 	map->prev = 0;
38 	map->mask = mp->mask;
39 	map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
40 			 MAP_SHARED, fd, 0);
41 	if (map->base == MAP_FAILED) {
42 		map->base = NULL;
43 		return -1;
44 	}
45 
46 	map->fd  = fd;
47 	map->cpu = cpu;
48 	return 0;
49 }
50 
perf_mmap__munmap(struct perf_mmap * map)51 void perf_mmap__munmap(struct perf_mmap *map)
52 {
53 	if (map && map->base != NULL) {
54 		munmap(map->base, perf_mmap__mmap_len(map));
55 		map->base = NULL;
56 		map->fd = -1;
57 		refcount_set(&map->refcnt, 0);
58 	}
59 	if (map && map->unmap_cb)
60 		map->unmap_cb(map);
61 }
62 
perf_mmap__get(struct perf_mmap * map)63 void perf_mmap__get(struct perf_mmap *map)
64 {
65 	refcount_inc(&map->refcnt);
66 }
67 
perf_mmap__put(struct perf_mmap * map)68 void perf_mmap__put(struct perf_mmap *map)
69 {
70 	BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
71 
72 	if (refcount_dec_and_test(&map->refcnt))
73 		perf_mmap__munmap(map);
74 }
75 
perf_mmap__write_tail(struct perf_mmap * md,u64 tail)76 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
77 {
78 	ring_buffer_write_tail(md->base, tail);
79 }
80 
perf_mmap__read_head(struct perf_mmap * map)81 u64 perf_mmap__read_head(struct perf_mmap *map)
82 {
83 	return ring_buffer_read_head(map->base);
84 }
85 
perf_mmap__empty(struct perf_mmap * map)86 static bool perf_mmap__empty(struct perf_mmap *map)
87 {
88 	struct perf_event_mmap_page *pc = map->base;
89 
90 	return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
91 }
92 
perf_mmap__consume(struct perf_mmap * map)93 void perf_mmap__consume(struct perf_mmap *map)
94 {
95 	if (!map->overwrite) {
96 		u64 old = map->prev;
97 
98 		perf_mmap__write_tail(map, old);
99 	}
100 
101 	if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
102 		perf_mmap__put(map);
103 }
104 
overwrite_rb_find_range(void * buf,int mask,u64 * start,u64 * end)105 static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
106 {
107 	struct perf_event_header *pheader;
108 	u64 evt_head = *start;
109 	int size = mask + 1;
110 
111 	pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
112 	pheader = (struct perf_event_header *)(buf + (*start & mask));
113 	while (true) {
114 		if (evt_head - *start >= (unsigned int)size) {
115 			pr_debug("Finished reading overwrite ring buffer: rewind\n");
116 			if (evt_head - *start > (unsigned int)size)
117 				evt_head -= pheader->size;
118 			*end = evt_head;
119 			return 0;
120 		}
121 
122 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
123 
124 		if (pheader->size == 0) {
125 			pr_debug("Finished reading overwrite ring buffer: get start\n");
126 			*end = evt_head;
127 			return 0;
128 		}
129 
130 		evt_head += pheader->size;
131 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
132 	}
133 	WARN_ONCE(1, "Shouldn't get here\n");
134 	return -1;
135 }
136 
137 /*
138  * Report the start and end of the available data in ringbuffer
139  */
__perf_mmap__read_init(struct perf_mmap * md)140 static int __perf_mmap__read_init(struct perf_mmap *md)
141 {
142 	u64 head = perf_mmap__read_head(md);
143 	u64 old = md->prev;
144 	unsigned char *data = md->base + page_size;
145 	unsigned long size;
146 
147 	md->start = md->overwrite ? head : old;
148 	md->end = md->overwrite ? old : head;
149 
150 	if ((md->end - md->start) < md->flush)
151 		return -EAGAIN;
152 
153 	size = md->end - md->start;
154 	if (size > (unsigned long)(md->mask) + 1) {
155 		if (!md->overwrite) {
156 			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
157 
158 			md->prev = head;
159 			perf_mmap__consume(md);
160 			return -EAGAIN;
161 		}
162 
163 		/*
164 		 * Backward ring buffer is full. We still have a chance to read
165 		 * most of data from it.
166 		 */
167 		if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
168 			return -EINVAL;
169 	}
170 
171 	return 0;
172 }
173 
perf_mmap__read_init(struct perf_mmap * map)174 int perf_mmap__read_init(struct perf_mmap *map)
175 {
176 	/*
177 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
178 	 */
179 	if (!refcount_read(&map->refcnt))
180 		return -ENOENT;
181 
182 	return __perf_mmap__read_init(map);
183 }
184 
185 /*
186  * Mandatory for overwrite mode
187  * The direction of overwrite mode is backward.
188  * The last perf_mmap__read() will set tail to map->core.prev.
189  * Need to correct the map->core.prev to head which is the end of next read.
190  */
perf_mmap__read_done(struct perf_mmap * map)191 void perf_mmap__read_done(struct perf_mmap *map)
192 {
193 	/*
194 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
195 	 */
196 	if (!refcount_read(&map->refcnt))
197 		return;
198 
199 	map->prev = perf_mmap__read_head(map);
200 }
201 
202 /* When check_messup is true, 'end' must points to a good entry */
perf_mmap__read(struct perf_mmap * map,u64 * startp,u64 end)203 static union perf_event *perf_mmap__read(struct perf_mmap *map,
204 					 u64 *startp, u64 end)
205 {
206 	unsigned char *data = map->base + page_size;
207 	union perf_event *event = NULL;
208 	int diff = end - *startp;
209 
210 	if (diff >= (int)sizeof(event->header)) {
211 		size_t size;
212 
213 		event = (union perf_event *)&data[*startp & map->mask];
214 		size = event->header.size;
215 
216 		if (size < sizeof(event->header) || diff < (int)size)
217 			return NULL;
218 
219 		/*
220 		 * Event straddles the mmap boundary -- header should always
221 		 * be inside due to u64 alignment of output.
222 		 */
223 		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
224 			unsigned int offset = *startp;
225 			unsigned int len = min(sizeof(*event), size), cpy;
226 			void *dst = map->event_copy;
227 
228 			do {
229 				cpy = min(map->mask + 1 - (offset & map->mask), len);
230 				memcpy(dst, &data[offset & map->mask], cpy);
231 				offset += cpy;
232 				dst += cpy;
233 				len -= cpy;
234 			} while (len);
235 
236 			event = (union perf_event *)map->event_copy;
237 		}
238 
239 		*startp += size;
240 	}
241 
242 	return event;
243 }
244 
245 /*
246  * Read event from ring buffer one by one.
247  * Return one event for each call.
248  *
249  * Usage:
250  * perf_mmap__read_init()
251  * while(event = perf_mmap__read_event()) {
252  *	//process the event
253  *	perf_mmap__consume()
254  * }
255  * perf_mmap__read_done()
256  */
perf_mmap__read_event(struct perf_mmap * map)257 union perf_event *perf_mmap__read_event(struct perf_mmap *map)
258 {
259 	union perf_event *event;
260 
261 	/*
262 	 * Check if event was unmapped due to a POLLHUP/POLLERR.
263 	 */
264 	if (!refcount_read(&map->refcnt))
265 		return NULL;
266 
267 	/* non-overwirte doesn't pause the ringbuffer */
268 	if (!map->overwrite)
269 		map->end = perf_mmap__read_head(map);
270 
271 	event = perf_mmap__read(map, &map->start, map->end);
272 
273 	if (!map->overwrite)
274 		map->prev = map->start;
275 
276 	return event;
277 }
278 
279 #if defined(__i386__) || defined(__x86_64__)
read_perf_counter(unsigned int counter)280 static u64 read_perf_counter(unsigned int counter)
281 {
282 	unsigned int low, high;
283 
284 	asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
285 
286 	return low | ((u64)high) << 32;
287 }
288 
read_timestamp(void)289 static u64 read_timestamp(void)
290 {
291 	unsigned int low, high;
292 
293 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
294 
295 	return low | ((u64)high) << 32;
296 }
297 #else
read_perf_counter(unsigned int counter __maybe_unused)298 static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
read_timestamp(void)299 static u64 read_timestamp(void) { return 0; }
300 #endif
301 
perf_mmap__read_self(struct perf_mmap * map,struct perf_counts_values * count)302 int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
303 {
304 	struct perf_event_mmap_page *pc = map->base;
305 	u32 seq, idx, time_mult = 0, time_shift = 0;
306 	u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
307 
308 	if (!pc || !pc->cap_user_rdpmc)
309 		return -1;
310 
311 	do {
312 		seq = READ_ONCE(pc->lock);
313 		barrier();
314 
315 		count->ena = READ_ONCE(pc->time_enabled);
316 		count->run = READ_ONCE(pc->time_running);
317 
318 		if (pc->cap_user_time && count->ena != count->run) {
319 			cyc = read_timestamp();
320 			time_mult = READ_ONCE(pc->time_mult);
321 			time_shift = READ_ONCE(pc->time_shift);
322 			time_offset = READ_ONCE(pc->time_offset);
323 
324 			if (pc->cap_user_time_short) {
325 				time_cycles = READ_ONCE(pc->time_cycles);
326 				time_mask = READ_ONCE(pc->time_mask);
327 			}
328 		}
329 
330 		idx = READ_ONCE(pc->index);
331 		cnt = READ_ONCE(pc->offset);
332 		if (pc->cap_user_rdpmc && idx) {
333 			s64 evcnt = read_perf_counter(idx - 1);
334 			u16 width = READ_ONCE(pc->pmc_width);
335 
336 			evcnt <<= 64 - width;
337 			evcnt >>= 64 - width;
338 			cnt += evcnt;
339 		} else
340 			return -1;
341 
342 		barrier();
343 	} while (READ_ONCE(pc->lock) != seq);
344 
345 	if (count->ena != count->run) {
346 		u64 delta;
347 
348 		/* Adjust for cap_usr_time_short, a nop if not */
349 		cyc = time_cycles + ((cyc - time_cycles) & time_mask);
350 
351 		delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
352 
353 		count->ena += delta;
354 		if (idx)
355 			count->run += delta;
356 
357 		cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
358 	}
359 
360 	count->val = cnt;
361 
362 	return 0;
363 }
364