1 // SPDX-License-Identifier: GPL-2.0
2 #include <stddef.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <unistd.h>
9 #include <api/fs/fs.h>
10 #include <linux/kernel.h>
11 #include "map_symbol.h"
12 #include "mem-events.h"
13 #include "debug.h"
14 #include "symbol.h"
15 #include "pmu.h"
16 #include "pmu-hybrid.h"
17
18 unsigned int perf_mem_events__loads_ldlat = 30;
19
20 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
21
22 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
23 E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"),
24 E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"),
25 E(NULL, NULL, NULL),
26 };
27 #undef E
28
29 static char mem_loads_name[100];
30 static bool mem_loads_name__init;
31
perf_mem_events__ptr(int i)32 struct perf_mem_event * __weak perf_mem_events__ptr(int i)
33 {
34 if (i >= PERF_MEM_EVENTS__MAX)
35 return NULL;
36
37 return &perf_mem_events[i];
38 }
39
perf_mem_events__name(int i,char * pmu_name __maybe_unused)40 char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused)
41 {
42 struct perf_mem_event *e = perf_mem_events__ptr(i);
43
44 if (!e)
45 return NULL;
46
47 if (i == PERF_MEM_EVENTS__LOAD) {
48 if (!mem_loads_name__init) {
49 mem_loads_name__init = true;
50 scnprintf(mem_loads_name, sizeof(mem_loads_name),
51 e->name, perf_mem_events__loads_ldlat);
52 }
53 return mem_loads_name;
54 }
55
56 return (char *)e->name;
57 }
58
is_mem_loads_aux_event(struct evsel * leader __maybe_unused)59 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
60 {
61 return false;
62 }
63
perf_mem_events__parse(const char * str)64 int perf_mem_events__parse(const char *str)
65 {
66 char *tok, *saveptr = NULL;
67 bool found = false;
68 char *buf;
69 int j;
70
71 /* We need buffer that we know we can write to. */
72 buf = malloc(strlen(str) + 1);
73 if (!buf)
74 return -ENOMEM;
75
76 strcpy(buf, str);
77
78 tok = strtok_r((char *)buf, ",", &saveptr);
79
80 while (tok) {
81 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
82 struct perf_mem_event *e = perf_mem_events__ptr(j);
83
84 if (!e->tag)
85 continue;
86
87 if (strstr(e->tag, tok))
88 e->record = found = true;
89 }
90
91 tok = strtok_r(NULL, ",", &saveptr);
92 }
93
94 free(buf);
95
96 if (found)
97 return 0;
98
99 pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
100 return -1;
101 }
102
perf_mem_event__supported(const char * mnt,char * sysfs_name)103 static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
104 {
105 char path[PATH_MAX];
106 struct stat st;
107
108 scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
109 return !stat(path, &st);
110 }
111
perf_mem_events__init(void)112 int perf_mem_events__init(void)
113 {
114 const char *mnt = sysfs__mount();
115 bool found = false;
116 int j;
117
118 if (!mnt)
119 return -ENOENT;
120
121 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
122 struct perf_mem_event *e = perf_mem_events__ptr(j);
123 struct perf_pmu *pmu;
124 char sysfs_name[100];
125
126 /*
127 * If the event entry isn't valid, skip initialization
128 * and "e->supported" will keep false.
129 */
130 if (!e->tag)
131 continue;
132
133 if (!perf_pmu__has_hybrid()) {
134 scnprintf(sysfs_name, sizeof(sysfs_name),
135 e->sysfs_name, "cpu");
136 e->supported = perf_mem_event__supported(mnt, sysfs_name);
137 } else {
138 perf_pmu__for_each_hybrid_pmu(pmu) {
139 scnprintf(sysfs_name, sizeof(sysfs_name),
140 e->sysfs_name, pmu->name);
141 e->supported |= perf_mem_event__supported(mnt, sysfs_name);
142 }
143 }
144
145 if (e->supported)
146 found = true;
147 }
148
149 return found ? 0 : -ENOENT;
150 }
151
perf_mem_events__list(void)152 void perf_mem_events__list(void)
153 {
154 int j;
155
156 for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
157 struct perf_mem_event *e = perf_mem_events__ptr(j);
158
159 fprintf(stderr, "%-13s%-*s%s\n",
160 e->tag ?: "",
161 verbose > 0 ? 25 : 0,
162 verbose > 0 ? perf_mem_events__name(j, NULL) : "",
163 e->supported ? ": available" : "");
164 }
165 }
166
perf_mem_events__print_unsupport_hybrid(struct perf_mem_event * e,int idx)167 static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
168 int idx)
169 {
170 const char *mnt = sysfs__mount();
171 char sysfs_name[100];
172 struct perf_pmu *pmu;
173
174 perf_pmu__for_each_hybrid_pmu(pmu) {
175 scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
176 pmu->name);
177 if (!perf_mem_event__supported(mnt, sysfs_name)) {
178 pr_err("failed: event '%s' not supported\n",
179 perf_mem_events__name(idx, pmu->name));
180 }
181 }
182 }
183
perf_mem_events__record_args(const char ** rec_argv,int * argv_nr,char ** rec_tmp,int * tmp_nr)184 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
185 char **rec_tmp, int *tmp_nr)
186 {
187 int i = *argv_nr, k = 0;
188 struct perf_mem_event *e;
189 struct perf_pmu *pmu;
190 char *s;
191
192 for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
193 e = perf_mem_events__ptr(j);
194 if (!e->record)
195 continue;
196
197 if (!perf_pmu__has_hybrid()) {
198 if (!e->supported) {
199 pr_err("failed: event '%s' not supported\n",
200 perf_mem_events__name(j, NULL));
201 return -1;
202 }
203
204 rec_argv[i++] = "-e";
205 rec_argv[i++] = perf_mem_events__name(j, NULL);
206 } else {
207 if (!e->supported) {
208 perf_mem_events__print_unsupport_hybrid(e, j);
209 return -1;
210 }
211
212 perf_pmu__for_each_hybrid_pmu(pmu) {
213 rec_argv[i++] = "-e";
214 s = perf_mem_events__name(j, pmu->name);
215 if (s) {
216 s = strdup(s);
217 if (!s)
218 return -1;
219
220 rec_argv[i++] = s;
221 rec_tmp[k++] = s;
222 }
223 }
224 }
225 }
226
227 *argv_nr = i;
228 *tmp_nr = k;
229 return 0;
230 }
231
232 static const char * const tlb_access[] = {
233 "N/A",
234 "HIT",
235 "MISS",
236 "L1",
237 "L2",
238 "Walker",
239 "Fault",
240 };
241
perf_mem__tlb_scnprintf(char * out,size_t sz,struct mem_info * mem_info)242 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
243 {
244 size_t l = 0, i;
245 u64 m = PERF_MEM_TLB_NA;
246 u64 hit, miss;
247
248 sz -= 1; /* -1 for null termination */
249 out[0] = '\0';
250
251 if (mem_info)
252 m = mem_info->data_src.mem_dtlb;
253
254 hit = m & PERF_MEM_TLB_HIT;
255 miss = m & PERF_MEM_TLB_MISS;
256
257 /* already taken care of */
258 m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
259
260 for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
261 if (!(m & 0x1))
262 continue;
263 if (l) {
264 strcat(out, " or ");
265 l += 4;
266 }
267 l += scnprintf(out + l, sz - l, tlb_access[i]);
268 }
269 if (*out == '\0')
270 l += scnprintf(out, sz - l, "N/A");
271 if (hit)
272 l += scnprintf(out + l, sz - l, " hit");
273 if (miss)
274 l += scnprintf(out + l, sz - l, " miss");
275
276 return l;
277 }
278
279 static const char * const mem_lvl[] = {
280 "N/A",
281 "HIT",
282 "MISS",
283 "L1",
284 "LFB",
285 "L2",
286 "L3",
287 "Local RAM",
288 "Remote RAM (1 hop)",
289 "Remote RAM (2 hops)",
290 "Remote Cache (1 hop)",
291 "Remote Cache (2 hops)",
292 "I/O",
293 "Uncached",
294 };
295
296 static const char * const mem_lvlnum[] = {
297 [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
298 [PERF_MEM_LVLNUM_LFB] = "LFB",
299 [PERF_MEM_LVLNUM_RAM] = "RAM",
300 [PERF_MEM_LVLNUM_PMEM] = "PMEM",
301 [PERF_MEM_LVLNUM_NA] = "N/A",
302 };
303
304 static const char * const mem_hops[] = {
305 "N/A",
306 /*
307 * While printing, 'Remote' will be added to represent
308 * 'Remote core, same node' accesses as remote field need
309 * to be set with mem_hops field.
310 */
311 "core, same node",
312 };
313
perf_mem__lvl_scnprintf(char * out,size_t sz,struct mem_info * mem_info)314 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
315 {
316 size_t i, l = 0;
317 u64 m = PERF_MEM_LVL_NA;
318 u64 hit, miss;
319 int printed;
320
321 if (mem_info)
322 m = mem_info->data_src.mem_lvl;
323
324 sz -= 1; /* -1 for null termination */
325 out[0] = '\0';
326
327 hit = m & PERF_MEM_LVL_HIT;
328 miss = m & PERF_MEM_LVL_MISS;
329
330 /* already taken care of */
331 m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
332
333 if (mem_info && mem_info->data_src.mem_remote) {
334 strcat(out, "Remote ");
335 l += 7;
336 }
337
338 if (mem_info && mem_info->data_src.mem_hops)
339 l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
340
341 printed = 0;
342 for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
343 if (!(m & 0x1))
344 continue;
345 if (printed++) {
346 strcat(out, " or ");
347 l += 4;
348 }
349 l += scnprintf(out + l, sz - l, mem_lvl[i]);
350 }
351
352 if (mem_info && mem_info->data_src.mem_lvl_num) {
353 int lvl = mem_info->data_src.mem_lvl_num;
354 if (printed++) {
355 strcat(out, " or ");
356 l += 4;
357 }
358 if (mem_lvlnum[lvl])
359 l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
360 else
361 l += scnprintf(out + l, sz - l, "L%d", lvl);
362 }
363
364 if (l == 0)
365 l += scnprintf(out + l, sz - l, "N/A");
366 if (hit)
367 l += scnprintf(out + l, sz - l, " hit");
368 if (miss)
369 l += scnprintf(out + l, sz - l, " miss");
370
371 return l;
372 }
373
374 static const char * const snoop_access[] = {
375 "N/A",
376 "None",
377 "Hit",
378 "Miss",
379 "HitM",
380 };
381
perf_mem__snp_scnprintf(char * out,size_t sz,struct mem_info * mem_info)382 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
383 {
384 size_t i, l = 0;
385 u64 m = PERF_MEM_SNOOP_NA;
386
387 sz -= 1; /* -1 for null termination */
388 out[0] = '\0';
389
390 if (mem_info)
391 m = mem_info->data_src.mem_snoop;
392
393 for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
394 if (!(m & 0x1))
395 continue;
396 if (l) {
397 strcat(out, " or ");
398 l += 4;
399 }
400 l += scnprintf(out + l, sz - l, snoop_access[i]);
401 }
402 if (mem_info &&
403 (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
404 if (l) {
405 strcat(out, " or ");
406 l += 4;
407 }
408 l += scnprintf(out + l, sz - l, "Fwd");
409 }
410
411 if (*out == '\0')
412 l += scnprintf(out, sz - l, "N/A");
413
414 return l;
415 }
416
perf_mem__lck_scnprintf(char * out,size_t sz,struct mem_info * mem_info)417 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
418 {
419 u64 mask = PERF_MEM_LOCK_NA;
420 int l;
421
422 if (mem_info)
423 mask = mem_info->data_src.mem_lock;
424
425 if (mask & PERF_MEM_LOCK_NA)
426 l = scnprintf(out, sz, "N/A");
427 else if (mask & PERF_MEM_LOCK_LOCKED)
428 l = scnprintf(out, sz, "Yes");
429 else
430 l = scnprintf(out, sz, "No");
431
432 return l;
433 }
434
perf_mem__blk_scnprintf(char * out,size_t sz,struct mem_info * mem_info)435 int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
436 {
437 size_t l = 0;
438 u64 mask = PERF_MEM_BLK_NA;
439
440 sz -= 1; /* -1 for null termination */
441 out[0] = '\0';
442
443 if (mem_info)
444 mask = mem_info->data_src.mem_blk;
445
446 if (!mask || (mask & PERF_MEM_BLK_NA)) {
447 l += scnprintf(out + l, sz - l, " N/A");
448 return l;
449 }
450 if (mask & PERF_MEM_BLK_DATA)
451 l += scnprintf(out + l, sz - l, " Data");
452 if (mask & PERF_MEM_BLK_ADDR)
453 l += scnprintf(out + l, sz - l, " Addr");
454
455 return l;
456 }
457
perf_script__meminfo_scnprintf(char * out,size_t sz,struct mem_info * mem_info)458 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
459 {
460 int i = 0;
461
462 i += perf_mem__lvl_scnprintf(out, sz, mem_info);
463 i += scnprintf(out + i, sz - i, "|SNP ");
464 i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
465 i += scnprintf(out + i, sz - i, "|TLB ");
466 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
467 i += scnprintf(out + i, sz - i, "|LCK ");
468 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
469 i += scnprintf(out + i, sz - i, "|BLK ");
470 i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
471
472 return i;
473 }
474
c2c_decode_stats(struct c2c_stats * stats,struct mem_info * mi)475 int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
476 {
477 union perf_mem_data_src *data_src = &mi->data_src;
478 u64 daddr = mi->daddr.addr;
479 u64 op = data_src->mem_op;
480 u64 lvl = data_src->mem_lvl;
481 u64 snoop = data_src->mem_snoop;
482 u64 lock = data_src->mem_lock;
483 u64 blk = data_src->mem_blk;
484 /*
485 * Skylake might report unknown remote level via this
486 * bit, consider it when evaluating remote HITMs.
487 *
488 * Incase of power, remote field can also be used to denote cache
489 * accesses from the another core of same node. Hence, setting
490 * mrem only when HOPS is zero along with set remote field.
491 */
492 bool mrem = (data_src->mem_remote && !data_src->mem_hops);
493 int err = 0;
494
495 #define HITM_INC(__f) \
496 do { \
497 stats->__f++; \
498 stats->tot_hitm++; \
499 } while (0)
500
501 #define P(a, b) PERF_MEM_##a##_##b
502
503 stats->nr_entries++;
504
505 if (lock & P(LOCK, LOCKED)) stats->locks++;
506
507 if (blk & P(BLK, DATA)) stats->blk_data++;
508 if (blk & P(BLK, ADDR)) stats->blk_addr++;
509
510 if (op & P(OP, LOAD)) {
511 /* load */
512 stats->load++;
513
514 if (!daddr) {
515 stats->ld_noadrs++;
516 return -1;
517 }
518
519 if (lvl & P(LVL, HIT)) {
520 if (lvl & P(LVL, UNC)) stats->ld_uncache++;
521 if (lvl & P(LVL, IO)) stats->ld_io++;
522 if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
523 if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
524 if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
525 if (lvl & P(LVL, L3 )) {
526 if (snoop & P(SNOOP, HITM))
527 HITM_INC(lcl_hitm);
528 else
529 stats->ld_llchit++;
530 }
531
532 if (lvl & P(LVL, LOC_RAM)) {
533 stats->lcl_dram++;
534 if (snoop & P(SNOOP, HIT))
535 stats->ld_shared++;
536 else
537 stats->ld_excl++;
538 }
539
540 if ((lvl & P(LVL, REM_RAM1)) ||
541 (lvl & P(LVL, REM_RAM2)) ||
542 mrem) {
543 stats->rmt_dram++;
544 if (snoop & P(SNOOP, HIT))
545 stats->ld_shared++;
546 else
547 stats->ld_excl++;
548 }
549 }
550
551 if ((lvl & P(LVL, REM_CCE1)) ||
552 (lvl & P(LVL, REM_CCE2)) ||
553 mrem) {
554 if (snoop & P(SNOOP, HIT))
555 stats->rmt_hit++;
556 else if (snoop & P(SNOOP, HITM))
557 HITM_INC(rmt_hitm);
558 }
559
560 if ((lvl & P(LVL, MISS)))
561 stats->ld_miss++;
562
563 } else if (op & P(OP, STORE)) {
564 /* store */
565 stats->store++;
566
567 if (!daddr) {
568 stats->st_noadrs++;
569 return -1;
570 }
571
572 if (lvl & P(LVL, HIT)) {
573 if (lvl & P(LVL, UNC)) stats->st_uncache++;
574 if (lvl & P(LVL, L1 )) stats->st_l1hit++;
575 }
576 if (lvl & P(LVL, MISS))
577 if (lvl & P(LVL, L1)) stats->st_l1miss++;
578 } else {
579 /* unparsable data_src? */
580 stats->noparse++;
581 return -1;
582 }
583
584 if (!mi->daddr.ms.map || !mi->iaddr.ms.map) {
585 stats->nomap++;
586 return -1;
587 }
588
589 #undef P
590 #undef HITM_INC
591 return err;
592 }
593
c2c_add_stats(struct c2c_stats * stats,struct c2c_stats * add)594 void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
595 {
596 stats->nr_entries += add->nr_entries;
597
598 stats->locks += add->locks;
599 stats->store += add->store;
600 stats->st_uncache += add->st_uncache;
601 stats->st_noadrs += add->st_noadrs;
602 stats->st_l1hit += add->st_l1hit;
603 stats->st_l1miss += add->st_l1miss;
604 stats->load += add->load;
605 stats->ld_excl += add->ld_excl;
606 stats->ld_shared += add->ld_shared;
607 stats->ld_uncache += add->ld_uncache;
608 stats->ld_io += add->ld_io;
609 stats->ld_miss += add->ld_miss;
610 stats->ld_noadrs += add->ld_noadrs;
611 stats->ld_fbhit += add->ld_fbhit;
612 stats->ld_l1hit += add->ld_l1hit;
613 stats->ld_l2hit += add->ld_l2hit;
614 stats->ld_llchit += add->ld_llchit;
615 stats->lcl_hitm += add->lcl_hitm;
616 stats->rmt_hitm += add->rmt_hitm;
617 stats->tot_hitm += add->tot_hitm;
618 stats->rmt_hit += add->rmt_hit;
619 stats->lcl_dram += add->lcl_dram;
620 stats->rmt_dram += add->rmt_dram;
621 stats->blk_data += add->blk_data;
622 stats->blk_addr += add->blk_addr;
623 stats->nomap += add->nomap;
624 stats->noparse += add->noparse;
625 }
626