1 /* Generate assembler source containing symbol information 2 * 3 * Copyright 2002 by Kai Germaschewski 4 * 5 * This software may be used and distributed according to the terms 6 * of the GNU General Public License, incorporated herein by reference. 7 * 8 * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S 9 * 10 * ChangeLog: 11 * 12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> 13 * Changed the compression method from stem compression to "table lookup" 14 * compression 15 * 16 * Table compression uses all the unused char codes on the symbols and 17 * maps these to the most used substrings (tokens). For instance, it might 18 * map char code 0xF7 to represent "write_" and then in every symbol where 19 * "write_" appears it can be replaced by 0xF7, saving 5 bytes. 20 * The used codes themselves are also placed in the table so that the 21 * decompresion can work without "special cases". 22 * Applied to kernel symbols, this usually produces a compression ratio 23 * of about 50%. 24 * 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <ctype.h> 31 32 #define KSYM_NAME_LEN 128 33 34 struct sym_entry { 35 unsigned long long addr; 36 unsigned int len; 37 unsigned int start_pos; 38 unsigned char *sym; 39 }; 40 41 static struct sym_entry *table; 42 static unsigned int table_size, table_cnt; 43 static unsigned long long _text, _stext, _etext, _sinittext, _einittext; 44 static int all_symbols = 0; 45 static char symbol_prefix_char = '\0'; 46 47 int token_profit[0x10000]; 48 49 /* the table that holds the result of the compression */ 50 unsigned char best_table[256][2]; 51 unsigned char best_table_len[256]; 52 53 54 static void usage(void) 55 { 56 fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); 57 exit(1); 58 } 59 60 /* 61 * This ignores the intensely annoying "mapping symbols" found 62 * in ARM ELF files: $a, $t and $d. 63 */ 64 static inline int is_arm_mapping_symbol(const char *str) 65 { 66 return str[0] == '$' && strchr("atd", str[1]) 67 && (str[2] == '\0' || str[2] == '.'); 68 } 69 70 static int read_symbol(FILE *in, struct sym_entry *s) 71 { 72 char str[500]; 73 char *sym, stype; 74 int rc; 75 76 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str); 77 if (rc != 3) { 78 if (rc != EOF) { 79 /* skip line */ 80 fgets(str, 500, in); 81 } 82 return -1; 83 } 84 85 sym = str; 86 /* skip prefix char */ 87 if (symbol_prefix_char && str[0] == symbol_prefix_char) 88 sym++; 89 90 /* Ignore most absolute/undefined (?) symbols. */ 91 if (strcmp(sym, "_text") == 0) 92 _text = s->addr; 93 else if (strcmp(sym, "_stext") == 0) 94 _stext = s->addr; 95 else if (strcmp(sym, "_etext") == 0) 96 _etext = s->addr; 97 else if (strcmp(sym, "_sinittext") == 0) 98 _sinittext = s->addr; 99 else if (strcmp(sym, "_einittext") == 0) 100 _einittext = s->addr; 101 else if (toupper(stype) == 'A') 102 { 103 /* Keep these useful absolute symbols */ 104 if (strcmp(sym, "__kernel_syscall_via_break") && 105 strcmp(sym, "__kernel_syscall_via_epc") && 106 strcmp(sym, "__kernel_sigtramp") && 107 strcmp(sym, "__gp")) 108 return -1; 109 110 } 111 else if (toupper(stype) == 'U' || 112 is_arm_mapping_symbol(sym)) 113 return -1; 114 /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ 115 else if (str[0] == '$') 116 return -1; 117 118 /* include the type field in the symbol name, so that it gets 119 * compressed together */ 120 s->len = strlen(str) + 1; 121 s->sym = malloc(s->len + 1); 122 if (!s->sym) { 123 fprintf(stderr, "kallsyms failure: " 124 "unable to allocate required amount of memory\n"); 125 exit(EXIT_FAILURE); 126 } 127 strcpy((char *)s->sym + 1, str); 128 s->sym[0] = stype; 129 130 return 0; 131 } 132 133 static int symbol_valid(struct sym_entry *s) 134 { 135 /* Symbols which vary between passes. Passes 1 and 2 must have 136 * identical symbol lists. The kallsyms_* symbols below are only added 137 * after pass 1, they would be included in pass 2 when --all-symbols is 138 * specified so exclude them to get a stable symbol list. 139 */ 140 static char *special_symbols[] = { 141 "kallsyms_addresses", 142 "kallsyms_num_syms", 143 "kallsyms_names", 144 "kallsyms_markers", 145 "kallsyms_token_table", 146 "kallsyms_token_index", 147 148 /* Exclude linker generated symbols which vary between passes */ 149 "_SDA_BASE_", /* ppc */ 150 "_SDA2_BASE_", /* ppc */ 151 NULL }; 152 int i; 153 int offset = 1; 154 155 /* skip prefix char */ 156 if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) 157 offset++; 158 159 /* if --all-symbols is not specified, then symbols outside the text 160 * and inittext sections are discarded */ 161 if (!all_symbols) { 162 if ((s->addr < _stext || s->addr > _etext) 163 && (s->addr < _sinittext || s->addr > _einittext)) 164 return 0; 165 /* Corner case. Discard any symbols with the same value as 166 * _etext _einittext; they can move between pass 1 and 2 when 167 * the kallsyms data are added. If these symbols move then 168 * they may get dropped in pass 2, which breaks the kallsyms 169 * rules. 170 */ 171 if ((s->addr == _etext && 172 strcmp((char *)s->sym + offset, "_etext")) || 173 (s->addr == _einittext && 174 strcmp((char *)s->sym + offset, "_einittext"))) 175 return 0; 176 } 177 178 /* Exclude symbols which vary between passes. */ 179 if (strstr((char *)s->sym + offset, "_compiled.")) 180 return 0; 181 182 for (i = 0; special_symbols[i]; i++) 183 if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 ) 184 return 0; 185 186 return 1; 187 } 188 189 static void read_map(FILE *in) 190 { 191 while (!feof(in)) { 192 if (table_cnt >= table_size) { 193 table_size += 10000; 194 table = realloc(table, sizeof(*table) * table_size); 195 if (!table) { 196 fprintf(stderr, "out of memory\n"); 197 exit (1); 198 } 199 } 200 if (read_symbol(in, &table[table_cnt]) == 0) { 201 table[table_cnt].start_pos = table_cnt; 202 table_cnt++; 203 } 204 } 205 } 206 207 static void output_label(char *label) 208 { 209 if (symbol_prefix_char) 210 printf(".globl %c%s\n", symbol_prefix_char, label); 211 else 212 printf(".globl %s\n", label); 213 printf("\tALGN\n"); 214 if (symbol_prefix_char) 215 printf("%c%s:\n", symbol_prefix_char, label); 216 else 217 printf("%s:\n", label); 218 } 219 220 /* uncompress a compressed symbol. When this function is called, the best table 221 * might still be compressed itself, so the function needs to be recursive */ 222 static int expand_symbol(unsigned char *data, int len, char *result) 223 { 224 int c, rlen, total=0; 225 226 while (len) { 227 c = *data; 228 /* if the table holds a single char that is the same as the one 229 * we are looking for, then end the search */ 230 if (best_table[c][0]==c && best_table_len[c]==1) { 231 *result++ = c; 232 total++; 233 } else { 234 /* if not, recurse and expand */ 235 rlen = expand_symbol(best_table[c], best_table_len[c], result); 236 total += rlen; 237 result += rlen; 238 } 239 data++; 240 len--; 241 } 242 *result=0; 243 244 return total; 245 } 246 247 static void write_src(void) 248 { 249 unsigned int i, k, off; 250 unsigned int best_idx[256]; 251 unsigned int *markers; 252 char buf[KSYM_NAME_LEN]; 253 254 printf("#include <asm/types.h>\n"); 255 printf("#if BITS_PER_LONG == 64\n"); 256 printf("#define PTR .quad\n"); 257 printf("#define ALGN .align 8\n"); 258 printf("#else\n"); 259 printf("#define PTR .long\n"); 260 printf("#define ALGN .align 4\n"); 261 printf("#endif\n"); 262 263 printf("\t.section .rodata, \"a\"\n"); 264 265 /* Provide proper symbols relocatability by their '_text' 266 * relativeness. The symbol names cannot be used to construct 267 * normal symbol references as the list of symbols contains 268 * symbols that are declared static and are private to their 269 * .o files. This prevents .tmp_kallsyms.o or any other 270 * object from referencing them. 271 */ 272 output_label("kallsyms_addresses"); 273 for (i = 0; i < table_cnt; i++) { 274 if (toupper(table[i].sym[0]) != 'A') { 275 if (_text <= table[i].addr) 276 printf("\tPTR\t_text + %#llx\n", 277 table[i].addr - _text); 278 else 279 printf("\tPTR\t_text - %#llx\n", 280 _text - table[i].addr); 281 } else { 282 printf("\tPTR\t%#llx\n", table[i].addr); 283 } 284 } 285 printf("\n"); 286 287 output_label("kallsyms_num_syms"); 288 printf("\tPTR\t%d\n", table_cnt); 289 printf("\n"); 290 291 /* table of offset markers, that give the offset in the compressed stream 292 * every 256 symbols */ 293 markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); 294 if (!markers) { 295 fprintf(stderr, "kallsyms failure: " 296 "unable to allocate required memory\n"); 297 exit(EXIT_FAILURE); 298 } 299 300 output_label("kallsyms_names"); 301 off = 0; 302 for (i = 0; i < table_cnt; i++) { 303 if ((i & 0xFF) == 0) 304 markers[i >> 8] = off; 305 306 printf("\t.byte 0x%02x", table[i].len); 307 for (k = 0; k < table[i].len; k++) 308 printf(", 0x%02x", table[i].sym[k]); 309 printf("\n"); 310 311 off += table[i].len + 1; 312 } 313 printf("\n"); 314 315 output_label("kallsyms_markers"); 316 for (i = 0; i < ((table_cnt + 255) >> 8); i++) 317 printf("\tPTR\t%d\n", markers[i]); 318 printf("\n"); 319 320 free(markers); 321 322 output_label("kallsyms_token_table"); 323 off = 0; 324 for (i = 0; i < 256; i++) { 325 best_idx[i] = off; 326 expand_symbol(best_table[i], best_table_len[i], buf); 327 printf("\t.asciz\t\"%s\"\n", buf); 328 off += strlen(buf) + 1; 329 } 330 printf("\n"); 331 332 output_label("kallsyms_token_index"); 333 for (i = 0; i < 256; i++) 334 printf("\t.short\t%d\n", best_idx[i]); 335 printf("\n"); 336 } 337 338 339 /* table lookup compression functions */ 340 341 /* count all the possible tokens in a symbol */ 342 static void learn_symbol(unsigned char *symbol, int len) 343 { 344 int i; 345 346 for (i = 0; i < len - 1; i++) 347 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; 348 } 349 350 /* decrease the count for all the possible tokens in a symbol */ 351 static void forget_symbol(unsigned char *symbol, int len) 352 { 353 int i; 354 355 for (i = 0; i < len - 1; i++) 356 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; 357 } 358 359 /* remove all the invalid symbols from the table and do the initial token count */ 360 static void build_initial_tok_table(void) 361 { 362 unsigned int i, pos; 363 364 pos = 0; 365 for (i = 0; i < table_cnt; i++) { 366 if ( symbol_valid(&table[i]) ) { 367 if (pos != i) 368 table[pos] = table[i]; 369 learn_symbol(table[pos].sym, table[pos].len); 370 pos++; 371 } 372 } 373 table_cnt = pos; 374 } 375 376 static void *find_token(unsigned char *str, int len, unsigned char *token) 377 { 378 int i; 379 380 for (i = 0; i < len - 1; i++) { 381 if (str[i] == token[0] && str[i+1] == token[1]) 382 return &str[i]; 383 } 384 return NULL; 385 } 386 387 /* replace a given token in all the valid symbols. Use the sampled symbols 388 * to update the counts */ 389 static void compress_symbols(unsigned char *str, int idx) 390 { 391 unsigned int i, len, size; 392 unsigned char *p1, *p2; 393 394 for (i = 0; i < table_cnt; i++) { 395 396 len = table[i].len; 397 p1 = table[i].sym; 398 399 /* find the token on the symbol */ 400 p2 = find_token(p1, len, str); 401 if (!p2) continue; 402 403 /* decrease the counts for this symbol's tokens */ 404 forget_symbol(table[i].sym, len); 405 406 size = len; 407 408 do { 409 *p2 = idx; 410 p2++; 411 size -= (p2 - p1); 412 memmove(p2, p2 + 1, size); 413 p1 = p2; 414 len--; 415 416 if (size < 2) break; 417 418 /* find the token on the symbol */ 419 p2 = find_token(p1, size, str); 420 421 } while (p2); 422 423 table[i].len = len; 424 425 /* increase the counts for this symbol's new tokens */ 426 learn_symbol(table[i].sym, len); 427 } 428 } 429 430 /* search the token with the maximum profit */ 431 static int find_best_token(void) 432 { 433 int i, best, bestprofit; 434 435 bestprofit=-10000; 436 best = 0; 437 438 for (i = 0; i < 0x10000; i++) { 439 if (token_profit[i] > bestprofit) { 440 best = i; 441 bestprofit = token_profit[i]; 442 } 443 } 444 return best; 445 } 446 447 /* this is the core of the algorithm: calculate the "best" table */ 448 static void optimize_result(void) 449 { 450 int i, best; 451 452 /* using the '\0' symbol last allows compress_symbols to use standard 453 * fast string functions */ 454 for (i = 255; i >= 0; i--) { 455 456 /* if this table slot is empty (it is not used by an actual 457 * original char code */ 458 if (!best_table_len[i]) { 459 460 /* find the token with the breates profit value */ 461 best = find_best_token(); 462 463 /* place it in the "best" table */ 464 best_table_len[i] = 2; 465 best_table[i][0] = best & 0xFF; 466 best_table[i][1] = (best >> 8) & 0xFF; 467 468 /* replace this token in all the valid symbols */ 469 compress_symbols(best_table[i], i); 470 } 471 } 472 } 473 474 /* start by placing the symbols that are actually used on the table */ 475 static void insert_real_symbols_in_table(void) 476 { 477 unsigned int i, j, c; 478 479 memset(best_table, 0, sizeof(best_table)); 480 memset(best_table_len, 0, sizeof(best_table_len)); 481 482 for (i = 0; i < table_cnt; i++) { 483 for (j = 0; j < table[i].len; j++) { 484 c = table[i].sym[j]; 485 best_table[c][0]=c; 486 best_table_len[c]=1; 487 } 488 } 489 } 490 491 static void optimize_token_table(void) 492 { 493 build_initial_tok_table(); 494 495 insert_real_symbols_in_table(); 496 497 /* When valid symbol is not registered, exit to error */ 498 if (!table_cnt) { 499 fprintf(stderr, "No valid symbol.\n"); 500 exit(1); 501 } 502 503 optimize_result(); 504 } 505 506 static int compare_symbols(const void *a, const void *b) 507 { 508 const struct sym_entry *sa; 509 const struct sym_entry *sb; 510 int wa, wb; 511 512 sa = a; 513 sb = b; 514 515 /* sort by address first */ 516 if (sa->addr > sb->addr) 517 return 1; 518 if (sa->addr < sb->addr) 519 return -1; 520 521 /* sort by "weakness" type */ 522 wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); 523 wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); 524 if (wa != wb) 525 return wa - wb; 526 527 /* sort by initial order, so that other symbols are left undisturbed */ 528 return sa->start_pos - sb->start_pos; 529 } 530 531 static void sort_symbols(void) 532 { 533 qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols); 534 } 535 536 int main(int argc, char **argv) 537 { 538 if (argc >= 2) { 539 int i; 540 for (i = 1; i < argc; i++) { 541 if(strcmp(argv[i], "--all-symbols") == 0) 542 all_symbols = 1; 543 else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) { 544 char *p = &argv[i][16]; 545 /* skip quote */ 546 if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) 547 p++; 548 symbol_prefix_char = *p; 549 } else 550 usage(); 551 } 552 } else if (argc != 1) 553 usage(); 554 555 read_map(stdin); 556 sort_symbols(); 557 optimize_token_table(); 558 write_src(); 559 560 return 0; 561 } 562