1 /* Generate assembler source containing symbol information
2 *
3 * Copyright 2002 by Kai Germaschewski
4 *
5 * This software may be used and distributed according to the terms
6 * of the GNU General Public License, incorporated herein by reference.
7 *
8 * Usage: nm -n vmlinux | scripts/symbols [--all-symbols] > symbols.S
9 *
10 * ChangeLog:
11 *
12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
13 * Changed the compression method from stem compression to "table lookup"
14 * compression
15 *
16 * Table compression uses all the unused char codes on the symbols and
17 * maps these to the most used substrings (tokens). For instance, it might
18 * map char code 0xF7 to represent "write_" and then in every symbol where
19 * "write_" appears it can be replaced by 0xF7, saving 5 bytes.
20 * The used codes themselves are also placed in the table so that the
21 * decompresion can work without "special cases".
22 * Applied to kernel symbols, this usually produces a compression ratio
23 * of about 50%.
24 *
25 */
26
27 #define _GNU_SOURCE
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdint.h>
33 #include <stdbool.h>
34 #include <ctype.h>
35
36 #define KSYM_NAME_LEN 127
37
38
39 struct sym_entry {
40 unsigned long long addr;
41 unsigned int len;
42 unsigned char *sym;
43 char *orig_symbol;
44 unsigned int addr_idx;
45 unsigned int stream_offset;
46 unsigned char type;
47 };
48 #define SYMBOL_NAME(s) ((char *)(s)->sym + 1)
49
50 static struct sym_entry *table;
51 static unsigned int table_size, table_cnt;
52 static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
53 static int all_symbols = 0;
54 static int sort_by_name = 0;
55 static int map_only = 0;
56 static char symbol_prefix_char = '\0';
57 static enum { fmt_bsd, fmt_sysv } input_format;
58 static int compare_name(const void *p1, const void *p2);
59
60 int token_profit[0x10000];
61
62 /* the table that holds the result of the compression */
63 unsigned char best_table[256][2];
64 unsigned char best_table_len[256];
65
66
usage(void)67 static void usage(void)
68 {
69 fprintf(stderr, "Usage: symbols [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
70 exit(1);
71 }
72
73 /*
74 * This ignores the intensely annoying "mapping symbols" found
75 * in ARM ELF files: $a, $t and $d.
76 */
is_arm_mapping_symbol(const char * str)77 static inline int is_arm_mapping_symbol(const char *str)
78 {
79 return str[0] == '$' && strchr("atd", str[1])
80 && (str[2] == '\0' || str[2] == '.');
81 }
82
read_symbol(FILE * in,struct sym_entry * s)83 static int read_symbol(FILE *in, struct sym_entry *s)
84 {
85 char str[500], type[20] = "";
86 char *sym, stype;
87 static enum { symbol, single_source, multi_source } last;
88 static char *filename;
89 int rc = -1;
90
91 switch (input_format) {
92 case fmt_bsd:
93 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
94 break;
95 case fmt_sysv:
96 while (fscanf(in, "\n") == 1)
97 /* nothing */;
98 rc = fscanf(in, "%499[^ |] |%llx | %c |",
99 str, &s->addr, &stype);
100 if (rc == 3 && fscanf(in, " %19[^ |] |", type) != 1)
101 *type = '\0';
102 break;
103 }
104 if (rc != 3) {
105 if (rc != EOF) {
106 /* skip line */
107 if (fgets(str, 500, in) == NULL)
108 return -1; /* must check fgets result */
109 }
110 return -1;
111 }
112
113 sym = strrchr(str, '.');
114 if (strcasecmp(type, "FILE") == 0 ||
115 (/*
116 * GNU nm prior to binutils commit 552e55ed06 (expected to
117 * appear in 2.27) doesn't produce a type for EFI binaries.
118 */
119 input_format == fmt_sysv && !*type && stype == '?' && sym &&
120 sym[1] && strchr("cSsoh", sym[1]) && !sym[2])) {
121 /*
122 * gas prior to binutils commit fbdf9406b0 (expected to appear
123 * in 2.27) outputs symbol table entries resulting from .file
124 * in reverse order. If we get two consecutive file symbols,
125 * prefer the first one if that names an object file or has a
126 * directory component (to cover multiply compiled files).
127 */
128 bool multi = strchr(str, '/') || (sym && sym[1] == 'o');
129
130 if (multi || last != multi_source) {
131 free(filename);
132 filename = *str ? strdup(str) : NULL;
133 }
134 last = multi ? multi_source : single_source;
135 goto skip_tail;
136 }
137
138 last = symbol;
139 rc = -1;
140
141 sym = str;
142 /* skip prefix char */
143 if (symbol_prefix_char && str[0] == symbol_prefix_char)
144 sym++;
145
146 /* Ignore most absolute/undefined (?) symbols. */
147 if (strcmp(sym, "_stext") == 0)
148 _stext = s->addr;
149 else if (strcmp(sym, "_etext") == 0)
150 _etext = s->addr;
151 else if (strcmp(sym, "_sinittext") == 0)
152 _sinittext = s->addr;
153 else if (strcmp(sym, "_einittext") == 0)
154 _einittext = s->addr;
155 else if (strcmp(sym, "_sextratext") == 0)
156 _sextratext = s->addr;
157 else if (strcmp(sym, "_eextratext") == 0)
158 _eextratext = s->addr;
159 else if (toupper((uint8_t)stype) == 'A')
160 {
161 /* Keep these useful absolute symbols */
162 if (strcmp(sym, "__gp"))
163 goto skip_tail;
164 }
165 else if (toupper((uint8_t)stype) == 'U' ||
166 toupper((uint8_t)stype) == 'N' ||
167 is_arm_mapping_symbol(sym))
168 goto skip_tail;
169 /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
170 else if (str[0] == '$')
171 goto skip_tail;
172
173 /* include the type field in the symbol name, so that it gets
174 * compressed together */
175 s->len = strlen(str) + 1;
176 if (islower(stype) && filename)
177 s->len += strlen(filename) + 1;
178 s->sym = malloc(s->len + 1);
179 sym = SYMBOL_NAME(s);
180 if (islower(stype) && filename) {
181 sym = stpcpy(sym, filename);
182 *sym++ = '#';
183 }
184 strcpy(sym, str);
185 if (sort_by_name || map_only) {
186 s->orig_symbol = strdup(SYMBOL_NAME(s));
187 s->type = stype; /* As s->sym[0] ends mangled. */
188 }
189 s->sym[0] = stype;
190 rc = 0;
191
192 skip_tail:
193 if ((input_format == fmt_sysv) && fgets(str, 500, in) == NULL)
194 /* ignore errors while discarding rest of line */;
195
196 return rc;
197 }
198
symbol_valid(struct sym_entry * s)199 static int symbol_valid(struct sym_entry *s)
200 {
201 int offset = 1;
202
203 /* skip prefix char */
204 if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
205 offset++;
206
207 /* if --all-symbols is not specified, then symbols outside the text
208 * and inittext sections are discarded */
209 if (!all_symbols) {
210 if ((s->addr < _stext || s->addr > _etext)
211 && (s->addr < _sinittext || s->addr > _einittext)
212 && (s->addr < _sextratext || s->addr > _eextratext))
213 return 0;
214 /* Corner case. Discard any symbols with the same value as
215 * _etext _einittext or _eextratext; they can move between pass
216 * 1 and 2 when the symbols data are added. If these symbols
217 * move then they may get dropped in pass 2, which breaks the
218 * symbols rules.
219 */
220 if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) ||
221 (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) ||
222 (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext")))
223 return 0;
224 }
225
226 /* Exclude symbols which vary between passes. */
227 if (strstr((char *)s->sym + offset, "_compiled."))
228 return 0;
229
230 return 1;
231 }
232
read_map(FILE * in)233 static void read_map(FILE *in)
234 {
235 while (!feof(in)) {
236 if (table_cnt >= table_size) {
237 table_size += 10000;
238 table = realloc(table, sizeof(*table) * table_size);
239 if (!table) {
240 fprintf(stderr, "out of memory\n");
241 exit (1);
242 }
243 }
244 if (read_symbol(in, &table[table_cnt]) == 0)
245 table_cnt++;
246 }
247 }
248
output_label(char * label)249 static void output_label(char *label)
250 {
251 if (symbol_prefix_char)
252 printf(".globl %c%s\n", symbol_prefix_char, label);
253 else
254 printf(".globl %s\n", label);
255 printf("\tALGN\n");
256 if (symbol_prefix_char)
257 printf("%c%s:\n", symbol_prefix_char, label);
258 else
259 printf("%s:\n", label);
260 }
261
262 /* uncompress a compressed symbol. When this function is called, the best table
263 * might still be compressed itself, so the function needs to be recursive */
expand_symbol(unsigned char * data,int len,char * result)264 static int expand_symbol(unsigned char *data, int len, char *result)
265 {
266 int c, rlen, total=0;
267
268 while (len) {
269 c = *data;
270 /* if the table holds a single char that is the same as the one
271 * we are looking for, then end the search */
272 if (best_table[c][0]==c && best_table_len[c]==1) {
273 *result++ = c;
274 total++;
275 } else {
276 /* if not, recurse and expand */
277 rlen = expand_symbol(best_table[c], best_table_len[c], result);
278 total += rlen;
279 result += rlen;
280 }
281 data++;
282 len--;
283 }
284 *result=0;
285
286 return total;
287 }
288
289 /* Sort by original (non mangled) symbol name, then type. */
compare_name_orig(const void * p1,const void * p2)290 static int compare_name_orig(const void *p1, const void *p2)
291 {
292 const struct sym_entry *sym1 = p1;
293 const struct sym_entry *sym2 = p2;
294 int rc;
295
296 rc = strcmp(sym1->orig_symbol, sym2->orig_symbol);
297
298 if (!rc)
299 rc = sym1->type - sym2->type;
300
301 return rc;
302 }
303
write_src(void)304 static void write_src(void)
305 {
306 unsigned int i, k, off;
307 unsigned int best_idx[256];
308 unsigned int *markers;
309 char buf[KSYM_NAME_LEN+1];
310
311 if (map_only) {
312 for (i = 0; i < table_cnt; i++)
313 printf("%#llx %c %s\n", table[i].addr, table[i].type,
314 table[i].orig_symbol);
315
316 return;
317 }
318 printf("#include <xen/config.h>\n");
319 printf("#include <asm/types.h>\n");
320 printf("#if BITS_PER_LONG == 64 && !defined(SYMBOLS_ORIGIN)\n");
321 printf("#define PTR .quad\n");
322 printf("#define ALGN .align 8\n");
323 printf("#else\n");
324 printf("#define PTR .long\n");
325 printf("#define ALGN .align 4\n");
326 printf("#endif\n");
327
328 printf("\t.section .rodata, \"a\"\n");
329
330 printf("#ifndef SYMBOLS_ORIGIN\n");
331 printf("#define SYMBOLS_ORIGIN 0\n");
332 output_label("symbols_addresses");
333 printf("#else\n");
334 output_label("symbols_offsets");
335 printf("#endif\n");
336 for (i = 0; i < table_cnt; i++) {
337 printf("\tPTR\t%#llx - SYMBOLS_ORIGIN\n", table[i].addr);
338 }
339 printf("\n");
340
341 output_label("symbols_num_syms");
342 printf("\t.long\t%d\n", table_cnt);
343 printf("\n");
344
345 /* table of offset markers, that give the offset in the compressed stream
346 * every 256 symbols */
347 markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
348
349 output_label("symbols_names");
350 off = 0;
351 for (i = 0; i < table_cnt; i++) {
352 if ((i & 0xFF) == 0)
353 markers[i >> 8] = off;
354
355 printf("\t.byte 0x%02x", table[i].len);
356 for (k = 0; k < table[i].len; k++)
357 printf(", 0x%02x", table[i].sym[k]);
358 printf("\n");
359
360 table[i].stream_offset = off;
361 off += table[i].len + 1;
362 }
363 printf("\n");
364
365 output_label("symbols_markers");
366 for (i = 0; i < ((table_cnt + 255) >> 8); i++)
367 printf("\t.long\t%d\n", markers[i]);
368 printf("\n");
369
370
371 output_label("symbols_token_table");
372 off = 0;
373 for (i = 0; i < 256; i++) {
374 best_idx[i] = off;
375 expand_symbol(best_table[i], best_table_len[i], buf);
376 printf("\t.asciz\t\"%s\"\n", buf);
377 off += strlen(buf) + 1;
378 }
379 printf("\n");
380
381 output_label("symbols_token_index");
382 for (i = 0; i < 256; i++)
383 printf("\t.short\t%d\n", best_idx[i]);
384 printf("\n");
385
386 if (!sort_by_name) {
387 free(markers);
388 return;
389 }
390
391 /* Sorted by original symbol names and type. */
392 qsort(table, table_cnt, sizeof(*table), compare_name_orig);
393
394 output_label("symbols_sorted_offsets");
395 /* A fixed sized array with two entries: offset in the
396 * compressed stream (for symbol name), and offset in
397 * symbols_addresses (or symbols_offset). */
398 for (i = 0; i < table_cnt; i++) {
399 printf("\t.long %u, %u\n", table[i].stream_offset, table[i].addr_idx);
400 }
401 printf("\n");
402
403 free(markers);
404 }
405
406
407 /* table lookup compression functions */
408
409 /* count all the possible tokens in a symbol */
learn_symbol(unsigned char * symbol,int len)410 static void learn_symbol(unsigned char *symbol, int len)
411 {
412 int i;
413
414 for (i = 0; i < len - 1; i++)
415 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
416 }
417
418 /* decrease the count for all the possible tokens in a symbol */
forget_symbol(unsigned char * symbol,int len)419 static void forget_symbol(unsigned char *symbol, int len)
420 {
421 int i;
422
423 for (i = 0; i < len - 1; i++)
424 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
425 }
426
427 /* remove all the invalid symbols from the table and do the initial token count */
build_initial_tok_table(void)428 static void build_initial_tok_table(void)
429 {
430 unsigned int i, pos;
431
432 pos = 0;
433 for (i = 0; i < table_cnt; i++) {
434 if ( symbol_valid(&table[i]) ) {
435 if (pos != i)
436 table[pos] = table[i];
437 learn_symbol(table[pos].sym, table[pos].len);
438 pos++;
439 }
440 }
441 table_cnt = pos;
442 }
443
memmem_pvt(void * h,size_t hlen,void * n,size_t nlen)444 static void *memmem_pvt(void *h, size_t hlen, void *n, size_t nlen)
445 {
446 char *p;
447 for (p = h; (p - (char *)h) <= (long)(hlen - nlen); p++)
448 if (!memcmp(p, n, nlen)) return p;
449 return NULL;
450 }
451
452 /* replace a given token in all the valid symbols. Use the sampled symbols
453 * to update the counts */
compress_symbols(unsigned char * str,int idx)454 static void compress_symbols(unsigned char *str, int idx)
455 {
456 unsigned int i, len, size;
457 unsigned char *p1, *p2;
458
459 for (i = 0; i < table_cnt; i++) {
460
461 len = table[i].len;
462 p1 = table[i].sym;
463
464 table[i].addr_idx = i;
465 /* find the token on the symbol */
466 p2 = memmem_pvt(p1, len, str, 2);
467 if (!p2) continue;
468
469 /* decrease the counts for this symbol's tokens */
470 forget_symbol(table[i].sym, len);
471
472 size = len;
473
474 do {
475 *p2 = idx;
476 p2++;
477 size -= (p2 - p1);
478 memmove(p2, p2 + 1, size);
479 p1 = p2;
480 len--;
481
482 if (size < 2) break;
483
484 /* find the token on the symbol */
485 p2 = memmem_pvt(p1, size, str, 2);
486
487 } while (p2);
488
489 table[i].len = len;
490
491 /* increase the counts for this symbol's new tokens */
492 learn_symbol(table[i].sym, len);
493 }
494 }
495
496 /* search the token with the maximum profit */
find_best_token(void)497 static int find_best_token(void)
498 {
499 int i, best, bestprofit;
500
501 bestprofit=-10000;
502 best = 0;
503
504 for (i = 0; i < 0x10000; i++) {
505 if (token_profit[i] > bestprofit) {
506 best = i;
507 bestprofit = token_profit[i];
508 }
509 }
510 return best;
511 }
512
513 /* this is the core of the algorithm: calculate the "best" table */
optimize_result(void)514 static void optimize_result(void)
515 {
516 int i, best;
517
518 /* using the '\0' symbol last allows compress_symbols to use standard
519 * fast string functions */
520 for (i = 255; i >= 0; i--) {
521
522 /* if this table slot is empty (it is not used by an actual
523 * original char code */
524 if (!best_table_len[i]) {
525
526 /* find the token with the breates profit value */
527 best = find_best_token();
528 if (token_profit[best] == 0)
529 break;
530
531 /* place it in the "best" table */
532 best_table_len[i] = 2;
533 best_table[i][0] = best & 0xFF;
534 best_table[i][1] = (best >> 8) & 0xFF;
535
536 /* replace this token in all the valid symbols */
537 compress_symbols(best_table[i], i);
538 }
539 }
540 }
541
542 /* start by placing the symbols that are actually used on the table */
insert_real_symbols_in_table(void)543 static void insert_real_symbols_in_table(void)
544 {
545 unsigned int i, j, c;
546
547 memset(best_table, 0, sizeof(best_table));
548 memset(best_table_len, 0, sizeof(best_table_len));
549
550 for (i = 0; i < table_cnt; i++) {
551 for (j = 0; j < table[i].len; j++) {
552 c = table[i].sym[j];
553 best_table[c][0]=c;
554 best_table_len[c]=1;
555 }
556 }
557 }
558
optimize_token_table(void)559 static void optimize_token_table(void)
560 {
561 build_initial_tok_table();
562
563 insert_real_symbols_in_table();
564
565 /* When valid symbol is not registered, exit to error */
566 if (!table_cnt) {
567 fprintf(stderr, "No valid symbol.\n");
568 exit(1);
569 }
570
571 optimize_result();
572 }
573
compare_value(const void * p1,const void * p2)574 static int compare_value(const void *p1, const void *p2)
575 {
576 const struct sym_entry *sym1 = p1;
577 const struct sym_entry *sym2 = p2;
578
579 if (sym1->addr < sym2->addr)
580 return -1;
581 if (sym1->addr > sym2->addr)
582 return +1;
583 /* Prefer global symbols. */
584 if (isupper(*sym1->sym))
585 return -1;
586 if (isupper(*sym2->sym))
587 return +1;
588 return 0;
589 }
590
compare_name(const void * p1,const void * p2)591 static int compare_name(const void *p1, const void *p2)
592 {
593 const struct sym_entry *sym1 = p1;
594 const struct sym_entry *sym2 = p2;
595
596 return strcmp(SYMBOL_NAME(sym1), SYMBOL_NAME(sym2));
597 }
598
main(int argc,char ** argv)599 int main(int argc, char **argv)
600 {
601 unsigned int i;
602 bool unsorted = false, warn_dup = false, error_dup = false, found_dup = false;
603
604 if (argc >= 2) {
605 for (i = 1; i < argc; i++) {
606 if(strcmp(argv[i], "--all-symbols") == 0)
607 all_symbols = 1;
608 else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
609 char *p = &argv[i][16];
610 /* skip quote */
611 if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
612 p++;
613 symbol_prefix_char = *p;
614 } else if (strcmp(argv[i], "--sysv") == 0)
615 input_format = fmt_sysv;
616 else if (strcmp(argv[i], "--sort") == 0)
617 unsorted = true;
618 else if (strcmp(argv[i], "--sort-by-name") == 0)
619 sort_by_name = 1;
620 else if (strcmp(argv[i], "--warn-dup") == 0)
621 warn_dup = true;
622 else if (strcmp(argv[i], "--error-dup") == 0)
623 warn_dup = error_dup = true;
624 else if (strcmp(argv[i], "--xensyms") == 0)
625 map_only = true;
626 else
627 usage();
628 }
629 } else if (argc != 1)
630 usage();
631
632 read_map(stdin);
633
634 if (warn_dup) {
635 qsort(table, table_cnt, sizeof(*table), compare_name);
636 for (i = 1; i < table_cnt; ++i)
637 if (strcmp(SYMBOL_NAME(table + i - 1),
638 SYMBOL_NAME(table + i)) == 0 &&
639 table[i - 1].addr != table[i].addr) {
640 fprintf(stderr,
641 "Duplicate symbol '%s' (%llx != %llx)\n",
642 SYMBOL_NAME(table + i),
643 table[i].addr, table[i - 1].addr);
644 found_dup = true;
645 }
646 unsorted = true;
647 }
648
649 if (error_dup && found_dup)
650 exit(1);
651
652 if (unsorted)
653 qsort(table, table_cnt, sizeof(*table), compare_value);
654
655 optimize_token_table();
656 write_src();
657
658 return 0;
659 }
660