1 // SPDX-License-Identifier: GPL-2.0
2 #include <sys/mman.h>
3 #include <inttypes.h>
4 #include <asm/bug.h>
5 #include <errno.h>
6 #include <string.h>
7 #include <linux/ring_buffer.h>
8 #include <linux/perf_event.h>
9 #include <perf/mmap.h>
10 #include <perf/event.h>
11 #include <perf/evsel.h>
12 #include <internal/mmap.h>
13 #include <internal/lib.h>
14 #include <linux/kernel.h>
15 #include <linux/math64.h>
16 #include "internal.h"
17
perf_mmap__init(struct perf_mmap * map,struct perf_mmap * prev,bool overwrite,libperf_unmap_cb_t unmap_cb)18 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
19 bool overwrite, libperf_unmap_cb_t unmap_cb)
20 {
21 map->fd = -1;
22 map->overwrite = overwrite;
23 map->unmap_cb = unmap_cb;
24 refcount_set(&map->refcnt, 0);
25 if (prev)
26 prev->next = map;
27 }
28
perf_mmap__mmap_len(struct perf_mmap * map)29 size_t perf_mmap__mmap_len(struct perf_mmap *map)
30 {
31 return map->mask + 1 + page_size;
32 }
33
perf_mmap__mmap(struct perf_mmap * map,struct perf_mmap_param * mp,int fd,int cpu)34 int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
35 int fd, int cpu)
36 {
37 map->prev = 0;
38 map->mask = mp->mask;
39 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
40 MAP_SHARED, fd, 0);
41 if (map->base == MAP_FAILED) {
42 map->base = NULL;
43 return -1;
44 }
45
46 map->fd = fd;
47 map->cpu = cpu;
48 return 0;
49 }
50
perf_mmap__munmap(struct perf_mmap * map)51 void perf_mmap__munmap(struct perf_mmap *map)
52 {
53 if (map && map->base != NULL) {
54 munmap(map->base, perf_mmap__mmap_len(map));
55 map->base = NULL;
56 map->fd = -1;
57 refcount_set(&map->refcnt, 0);
58 }
59 if (map && map->unmap_cb)
60 map->unmap_cb(map);
61 }
62
perf_mmap__get(struct perf_mmap * map)63 void perf_mmap__get(struct perf_mmap *map)
64 {
65 refcount_inc(&map->refcnt);
66 }
67
perf_mmap__put(struct perf_mmap * map)68 void perf_mmap__put(struct perf_mmap *map)
69 {
70 BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
71
72 if (refcount_dec_and_test(&map->refcnt))
73 perf_mmap__munmap(map);
74 }
75
perf_mmap__write_tail(struct perf_mmap * md,u64 tail)76 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
77 {
78 ring_buffer_write_tail(md->base, tail);
79 }
80
perf_mmap__read_head(struct perf_mmap * map)81 u64 perf_mmap__read_head(struct perf_mmap *map)
82 {
83 return ring_buffer_read_head(map->base);
84 }
85
perf_mmap__empty(struct perf_mmap * map)86 static bool perf_mmap__empty(struct perf_mmap *map)
87 {
88 struct perf_event_mmap_page *pc = map->base;
89
90 return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
91 }
92
perf_mmap__consume(struct perf_mmap * map)93 void perf_mmap__consume(struct perf_mmap *map)
94 {
95 if (!map->overwrite) {
96 u64 old = map->prev;
97
98 perf_mmap__write_tail(map, old);
99 }
100
101 if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
102 perf_mmap__put(map);
103 }
104
overwrite_rb_find_range(void * buf,int mask,u64 * start,u64 * end)105 static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
106 {
107 struct perf_event_header *pheader;
108 u64 evt_head = *start;
109 int size = mask + 1;
110
111 pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
112 pheader = (struct perf_event_header *)(buf + (*start & mask));
113 while (true) {
114 if (evt_head - *start >= (unsigned int)size) {
115 pr_debug("Finished reading overwrite ring buffer: rewind\n");
116 if (evt_head - *start > (unsigned int)size)
117 evt_head -= pheader->size;
118 *end = evt_head;
119 return 0;
120 }
121
122 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
123
124 if (pheader->size == 0) {
125 pr_debug("Finished reading overwrite ring buffer: get start\n");
126 *end = evt_head;
127 return 0;
128 }
129
130 evt_head += pheader->size;
131 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
132 }
133 WARN_ONCE(1, "Shouldn't get here\n");
134 return -1;
135 }
136
137 /*
138 * Report the start and end of the available data in ringbuffer
139 */
__perf_mmap__read_init(struct perf_mmap * md)140 static int __perf_mmap__read_init(struct perf_mmap *md)
141 {
142 u64 head = perf_mmap__read_head(md);
143 u64 old = md->prev;
144 unsigned char *data = md->base + page_size;
145 unsigned long size;
146
147 md->start = md->overwrite ? head : old;
148 md->end = md->overwrite ? old : head;
149
150 if ((md->end - md->start) < md->flush)
151 return -EAGAIN;
152
153 size = md->end - md->start;
154 if (size > (unsigned long)(md->mask) + 1) {
155 if (!md->overwrite) {
156 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
157
158 md->prev = head;
159 perf_mmap__consume(md);
160 return -EAGAIN;
161 }
162
163 /*
164 * Backward ring buffer is full. We still have a chance to read
165 * most of data from it.
166 */
167 if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
168 return -EINVAL;
169 }
170
171 return 0;
172 }
173
perf_mmap__read_init(struct perf_mmap * map)174 int perf_mmap__read_init(struct perf_mmap *map)
175 {
176 /*
177 * Check if event was unmapped due to a POLLHUP/POLLERR.
178 */
179 if (!refcount_read(&map->refcnt))
180 return -ENOENT;
181
182 return __perf_mmap__read_init(map);
183 }
184
185 /*
186 * Mandatory for overwrite mode
187 * The direction of overwrite mode is backward.
188 * The last perf_mmap__read() will set tail to map->core.prev.
189 * Need to correct the map->core.prev to head which is the end of next read.
190 */
perf_mmap__read_done(struct perf_mmap * map)191 void perf_mmap__read_done(struct perf_mmap *map)
192 {
193 /*
194 * Check if event was unmapped due to a POLLHUP/POLLERR.
195 */
196 if (!refcount_read(&map->refcnt))
197 return;
198
199 map->prev = perf_mmap__read_head(map);
200 }
201
202 /* When check_messup is true, 'end' must points to a good entry */
perf_mmap__read(struct perf_mmap * map,u64 * startp,u64 end)203 static union perf_event *perf_mmap__read(struct perf_mmap *map,
204 u64 *startp, u64 end)
205 {
206 unsigned char *data = map->base + page_size;
207 union perf_event *event = NULL;
208 int diff = end - *startp;
209
210 if (diff >= (int)sizeof(event->header)) {
211 size_t size;
212
213 event = (union perf_event *)&data[*startp & map->mask];
214 size = event->header.size;
215
216 if (size < sizeof(event->header) || diff < (int)size)
217 return NULL;
218
219 /*
220 * Event straddles the mmap boundary -- header should always
221 * be inside due to u64 alignment of output.
222 */
223 if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
224 unsigned int offset = *startp;
225 unsigned int len = min(sizeof(*event), size), cpy;
226 void *dst = map->event_copy;
227
228 do {
229 cpy = min(map->mask + 1 - (offset & map->mask), len);
230 memcpy(dst, &data[offset & map->mask], cpy);
231 offset += cpy;
232 dst += cpy;
233 len -= cpy;
234 } while (len);
235
236 event = (union perf_event *)map->event_copy;
237 }
238
239 *startp += size;
240 }
241
242 return event;
243 }
244
245 /*
246 * Read event from ring buffer one by one.
247 * Return one event for each call.
248 *
249 * Usage:
250 * perf_mmap__read_init()
251 * while(event = perf_mmap__read_event()) {
252 * //process the event
253 * perf_mmap__consume()
254 * }
255 * perf_mmap__read_done()
256 */
perf_mmap__read_event(struct perf_mmap * map)257 union perf_event *perf_mmap__read_event(struct perf_mmap *map)
258 {
259 union perf_event *event;
260
261 /*
262 * Check if event was unmapped due to a POLLHUP/POLLERR.
263 */
264 if (!refcount_read(&map->refcnt))
265 return NULL;
266
267 /* non-overwirte doesn't pause the ringbuffer */
268 if (!map->overwrite)
269 map->end = perf_mmap__read_head(map);
270
271 event = perf_mmap__read(map, &map->start, map->end);
272
273 if (!map->overwrite)
274 map->prev = map->start;
275
276 return event;
277 }
278
279 #if defined(__i386__) || defined(__x86_64__)
read_perf_counter(unsigned int counter)280 static u64 read_perf_counter(unsigned int counter)
281 {
282 unsigned int low, high;
283
284 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
285
286 return low | ((u64)high) << 32;
287 }
288
read_timestamp(void)289 static u64 read_timestamp(void)
290 {
291 unsigned int low, high;
292
293 asm volatile("rdtsc" : "=a" (low), "=d" (high));
294
295 return low | ((u64)high) << 32;
296 }
297 #else
read_perf_counter(unsigned int counter __maybe_unused)298 static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
read_timestamp(void)299 static u64 read_timestamp(void) { return 0; }
300 #endif
301
perf_mmap__read_self(struct perf_mmap * map,struct perf_counts_values * count)302 int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
303 {
304 struct perf_event_mmap_page *pc = map->base;
305 u32 seq, idx, time_mult = 0, time_shift = 0;
306 u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
307
308 if (!pc || !pc->cap_user_rdpmc)
309 return -1;
310
311 do {
312 seq = READ_ONCE(pc->lock);
313 barrier();
314
315 count->ena = READ_ONCE(pc->time_enabled);
316 count->run = READ_ONCE(pc->time_running);
317
318 if (pc->cap_user_time && count->ena != count->run) {
319 cyc = read_timestamp();
320 time_mult = READ_ONCE(pc->time_mult);
321 time_shift = READ_ONCE(pc->time_shift);
322 time_offset = READ_ONCE(pc->time_offset);
323
324 if (pc->cap_user_time_short) {
325 time_cycles = READ_ONCE(pc->time_cycles);
326 time_mask = READ_ONCE(pc->time_mask);
327 }
328 }
329
330 idx = READ_ONCE(pc->index);
331 cnt = READ_ONCE(pc->offset);
332 if (pc->cap_user_rdpmc && idx) {
333 s64 evcnt = read_perf_counter(idx - 1);
334 u16 width = READ_ONCE(pc->pmc_width);
335
336 evcnt <<= 64 - width;
337 evcnt >>= 64 - width;
338 cnt += evcnt;
339 } else
340 return -1;
341
342 barrier();
343 } while (READ_ONCE(pc->lock) != seq);
344
345 if (count->ena != count->run) {
346 u64 delta;
347
348 /* Adjust for cap_usr_time_short, a nop if not */
349 cyc = time_cycles + ((cyc - time_cycles) & time_mask);
350
351 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
352
353 count->ena += delta;
354 if (idx)
355 count->run += delta;
356
357 cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
358 }
359
360 count->val = cnt;
361
362 return 0;
363 }
364