1 /* Generate assembler source containing symbol information 2 * 3 * Copyright 2002 by Kai Germaschewski 4 * 5 * This software may be used and distributed according to the terms 6 * of the GNU General Public License, incorporated herein by reference. 7 * 8 * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S 9 * 10 * ChangeLog: 11 * 12 * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> 13 * Changed the compression method from stem compression to "table lookup" 14 * compression 15 * 16 * Table compression uses all the unused char codes on the symbols and 17 * maps these to the most used substrings (tokens). For instance, it might 18 * map char code 0xF7 to represent "write_" and then in every symbol where 19 * "write_" appears it can be replaced by 0xF7, saving 5 bytes. 20 * The used codes themselves are also placed in the table so that the 21 * decompresion can work without "special cases". 22 * Applied to kernel symbols, this usually produces a compression ratio 23 * of about 50%. 24 * 25 */ 26 27 #define _GNU_SOURCE 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <ctype.h> 33 34 #define KSYM_NAME_LEN 127 35 36 37 struct sym_entry { 38 unsigned long long addr; 39 unsigned int len; 40 unsigned char *sym; 41 }; 42 43 44 static struct sym_entry *table; 45 static unsigned int table_size, table_cnt; 46 static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext; 47 static int all_symbols = 0; 48 static char symbol_prefix_char = '\0'; 49 50 int token_profit[0x10000]; 51 52 /* the table that holds the result of the compression */ 53 unsigned char best_table[256][2]; 54 unsigned char best_table_len[256]; 55 56 57 static void usage(void) 58 { 59 fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n"); 60 exit(1); 61 } 62 63 /* 64 * This ignores the intensely annoying "mapping symbols" found 65 * in ARM ELF files: $a, $t and $d. 66 */ 67 static inline int is_arm_mapping_symbol(const char *str) 68 { 69 return str[0] == '$' && strchr("atd", str[1]) 70 && (str[2] == '\0' || str[2] == '.'); 71 } 72 73 static int read_symbol(FILE *in, struct sym_entry *s) 74 { 75 char str[500]; 76 char *sym, stype; 77 int rc; 78 79 rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str); 80 if (rc != 3) { 81 if (rc != EOF) { 82 /* skip line */ 83 fgets(str, 500, in); 84 } 85 return -1; 86 } 87 88 sym = str; 89 /* skip prefix char */ 90 if (symbol_prefix_char && str[0] == symbol_prefix_char) 91 sym++; 92 93 /* Ignore most absolute/undefined (?) symbols. */ 94 if (strcmp(sym, "_stext") == 0) 95 _stext = s->addr; 96 else if (strcmp(sym, "_etext") == 0) 97 _etext = s->addr; 98 else if (strcmp(sym, "_sinittext") == 0) 99 _sinittext = s->addr; 100 else if (strcmp(sym, "_einittext") == 0) 101 _einittext = s->addr; 102 else if (strcmp(sym, "_sextratext") == 0) 103 _sextratext = s->addr; 104 else if (strcmp(sym, "_eextratext") == 0) 105 _eextratext = s->addr; 106 else if (toupper(stype) == 'A') 107 { 108 /* Keep these useful absolute symbols */ 109 if (strcmp(sym, "__kernel_syscall_via_break") && 110 strcmp(sym, "__kernel_syscall_via_epc") && 111 strcmp(sym, "__kernel_sigtramp") && 112 strcmp(sym, "__gp")) 113 return -1; 114 115 } 116 else if (toupper(stype) == 'U' || 117 is_arm_mapping_symbol(sym)) 118 return -1; 119 /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ 120 else if (str[0] == '$') 121 return -1; 122 123 /* include the type field in the symbol name, so that it gets 124 * compressed together */ 125 s->len = strlen(str) + 1; 126 s->sym = malloc(s->len + 1); 127 strcpy((char *)s->sym + 1, str); 128 s->sym[0] = stype; 129 130 return 0; 131 } 132 133 static int symbol_valid(struct sym_entry *s) 134 { 135 /* Symbols which vary between passes. Passes 1 and 2 must have 136 * identical symbol lists. The kallsyms_* symbols below are only added 137 * after pass 1, they would be included in pass 2 when --all-symbols is 138 * specified so exclude them to get a stable symbol list. 139 */ 140 static char *special_symbols[] = { 141 "kallsyms_addresses", 142 "kallsyms_num_syms", 143 "kallsyms_names", 144 "kallsyms_markers", 145 "kallsyms_token_table", 146 "kallsyms_token_index", 147 148 /* Exclude linker generated symbols which vary between passes */ 149 "_SDA_BASE_", /* ppc */ 150 "_SDA2_BASE_", /* ppc */ 151 NULL }; 152 int i; 153 int offset = 1; 154 155 /* skip prefix char */ 156 if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) 157 offset++; 158 159 /* if --all-symbols is not specified, then symbols outside the text 160 * and inittext sections are discarded */ 161 if (!all_symbols) { 162 if ((s->addr < _stext || s->addr > _etext) 163 && (s->addr < _sinittext || s->addr > _einittext) 164 && (s->addr < _sextratext || s->addr > _eextratext)) 165 return 0; 166 /* Corner case. Discard any symbols with the same value as 167 * _etext _einittext or _eextratext; they can move between pass 168 * 1 and 2 when the kallsyms data are added. If these symbols 169 * move then they may get dropped in pass 2, which breaks the 170 * kallsyms rules. 171 */ 172 if ((s->addr == _etext && strcmp((char*)s->sym + offset, "_etext")) || 173 (s->addr == _einittext && strcmp((char*)s->sym + offset, "_einittext")) || 174 (s->addr == _eextratext && strcmp((char*)s->sym + offset, "_eextratext"))) 175 return 0; 176 } 177 178 /* Exclude symbols which vary between passes. */ 179 if (strstr((char *)s->sym + offset, "_compiled.")) 180 return 0; 181 182 for (i = 0; special_symbols[i]; i++) 183 if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 ) 184 return 0; 185 186 return 1; 187 } 188 189 static void read_map(FILE *in) 190 { 191 while (!feof(in)) { 192 if (table_cnt >= table_size) { 193 table_size += 10000; 194 table = realloc(table, sizeof(*table) * table_size); 195 if (!table) { 196 fprintf(stderr, "out of memory\n"); 197 exit (1); 198 } 199 } 200 if (read_symbol(in, &table[table_cnt]) == 0) 201 table_cnt++; 202 } 203 } 204 205 static void output_label(char *label) 206 { 207 if (symbol_prefix_char) 208 printf(".globl %c%s\n", symbol_prefix_char, label); 209 else 210 printf(".globl %s\n", label); 211 printf("\tALGN\n"); 212 if (symbol_prefix_char) 213 printf("%c%s:\n", symbol_prefix_char, label); 214 else 215 printf("%s:\n", label); 216 } 217 218 /* uncompress a compressed symbol. When this function is called, the best table 219 * might still be compressed itself, so the function needs to be recursive */ 220 static int expand_symbol(unsigned char *data, int len, char *result) 221 { 222 int c, rlen, total=0; 223 224 while (len) { 225 c = *data; 226 /* if the table holds a single char that is the same as the one 227 * we are looking for, then end the search */ 228 if (best_table[c][0]==c && best_table_len[c]==1) { 229 *result++ = c; 230 total++; 231 } else { 232 /* if not, recurse and expand */ 233 rlen = expand_symbol(best_table[c], best_table_len[c], result); 234 total += rlen; 235 result += rlen; 236 } 237 data++; 238 len--; 239 } 240 *result=0; 241 242 return total; 243 } 244 245 static void write_src(void) 246 { 247 unsigned int i, k, off; 248 unsigned int best_idx[256]; 249 unsigned int *markers; 250 char buf[KSYM_NAME_LEN+1]; 251 252 printf("#include <asm/types.h>\n"); 253 printf("#if BITS_PER_LONG == 64\n"); 254 printf("#define PTR .quad\n"); 255 printf("#define ALGN .align 8\n"); 256 printf("#else\n"); 257 printf("#define PTR .long\n"); 258 printf("#define ALGN .align 4\n"); 259 printf("#endif\n"); 260 261 printf(".data\n"); 262 263 output_label("kallsyms_addresses"); 264 for (i = 0; i < table_cnt; i++) { 265 printf("\tPTR\t%#llx\n", table[i].addr); 266 } 267 printf("\n"); 268 269 output_label("kallsyms_num_syms"); 270 printf("\tPTR\t%d\n", table_cnt); 271 printf("\n"); 272 273 /* table of offset markers, that give the offset in the compressed stream 274 * every 256 symbols */ 275 markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); 276 277 output_label("kallsyms_names"); 278 off = 0; 279 for (i = 0; i < table_cnt; i++) { 280 if ((i & 0xFF) == 0) 281 markers[i >> 8] = off; 282 283 printf("\t.byte 0x%02x", table[i].len); 284 for (k = 0; k < table[i].len; k++) 285 printf(", 0x%02x", table[i].sym[k]); 286 printf("\n"); 287 288 off += table[i].len + 1; 289 } 290 printf("\n"); 291 292 output_label("kallsyms_markers"); 293 for (i = 0; i < ((table_cnt + 255) >> 8); i++) 294 printf("\tPTR\t%d\n", markers[i]); 295 printf("\n"); 296 297 free(markers); 298 299 output_label("kallsyms_token_table"); 300 off = 0; 301 for (i = 0; i < 256; i++) { 302 best_idx[i] = off; 303 expand_symbol(best_table[i], best_table_len[i], buf); 304 printf("\t.asciz\t\"%s\"\n", buf); 305 off += strlen(buf) + 1; 306 } 307 printf("\n"); 308 309 output_label("kallsyms_token_index"); 310 for (i = 0; i < 256; i++) 311 printf("\t.short\t%d\n", best_idx[i]); 312 printf("\n"); 313 } 314 315 316 /* table lookup compression functions */ 317 318 /* count all the possible tokens in a symbol */ 319 static void learn_symbol(unsigned char *symbol, int len) 320 { 321 int i; 322 323 for (i = 0; i < len - 1; i++) 324 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; 325 } 326 327 /* decrease the count for all the possible tokens in a symbol */ 328 static void forget_symbol(unsigned char *symbol, int len) 329 { 330 int i; 331 332 for (i = 0; i < len - 1; i++) 333 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; 334 } 335 336 /* remove all the invalid symbols from the table and do the initial token count */ 337 static void build_initial_tok_table(void) 338 { 339 unsigned int i, pos; 340 341 pos = 0; 342 for (i = 0; i < table_cnt; i++) { 343 if ( symbol_valid(&table[i]) ) { 344 if (pos != i) 345 table[pos] = table[i]; 346 learn_symbol(table[pos].sym, table[pos].len); 347 pos++; 348 } 349 } 350 table_cnt = pos; 351 } 352 353 /* replace a given token in all the valid symbols. Use the sampled symbols 354 * to update the counts */ 355 static void compress_symbols(unsigned char *str, int idx) 356 { 357 unsigned int i, len, size; 358 unsigned char *p1, *p2; 359 360 for (i = 0; i < table_cnt; i++) { 361 362 len = table[i].len; 363 p1 = table[i].sym; 364 365 /* find the token on the symbol */ 366 p2 = memmem(p1, len, str, 2); 367 if (!p2) continue; 368 369 /* decrease the counts for this symbol's tokens */ 370 forget_symbol(table[i].sym, len); 371 372 size = len; 373 374 do { 375 *p2 = idx; 376 p2++; 377 size -= (p2 - p1); 378 memmove(p2, p2 + 1, size); 379 p1 = p2; 380 len--; 381 382 if (size < 2) break; 383 384 /* find the token on the symbol */ 385 p2 = memmem(p1, size, str, 2); 386 387 } while (p2); 388 389 table[i].len = len; 390 391 /* increase the counts for this symbol's new tokens */ 392 learn_symbol(table[i].sym, len); 393 } 394 } 395 396 /* search the token with the maximum profit */ 397 static int find_best_token(void) 398 { 399 int i, best, bestprofit; 400 401 bestprofit=-10000; 402 best = 0; 403 404 for (i = 0; i < 0x10000; i++) { 405 if (token_profit[i] > bestprofit) { 406 best = i; 407 bestprofit = token_profit[i]; 408 } 409 } 410 return best; 411 } 412 413 /* this is the core of the algorithm: calculate the "best" table */ 414 static void optimize_result(void) 415 { 416 int i, best; 417 418 /* using the '\0' symbol last allows compress_symbols to use standard 419 * fast string functions */ 420 for (i = 255; i >= 0; i--) { 421 422 /* if this table slot is empty (it is not used by an actual 423 * original char code */ 424 if (!best_table_len[i]) { 425 426 /* find the token with the breates profit value */ 427 best = find_best_token(); 428 429 /* place it in the "best" table */ 430 best_table_len[i] = 2; 431 best_table[i][0] = best & 0xFF; 432 best_table[i][1] = (best >> 8) & 0xFF; 433 434 /* replace this token in all the valid symbols */ 435 compress_symbols(best_table[i], i); 436 } 437 } 438 } 439 440 /* start by placing the symbols that are actually used on the table */ 441 static void insert_real_symbols_in_table(void) 442 { 443 unsigned int i, j, c; 444 445 memset(best_table, 0, sizeof(best_table)); 446 memset(best_table_len, 0, sizeof(best_table_len)); 447 448 for (i = 0; i < table_cnt; i++) { 449 for (j = 0; j < table[i].len; j++) { 450 c = table[i].sym[j]; 451 best_table[c][0]=c; 452 best_table_len[c]=1; 453 } 454 } 455 } 456 457 static void optimize_token_table(void) 458 { 459 build_initial_tok_table(); 460 461 insert_real_symbols_in_table(); 462 463 /* When valid symbol is not registered, exit to error */ 464 if (!table_cnt) { 465 fprintf(stderr, "No valid symbol.\n"); 466 exit(1); 467 } 468 469 optimize_result(); 470 } 471 472 473 int main(int argc, char **argv) 474 { 475 if (argc >= 2) { 476 int i; 477 for (i = 1; i < argc; i++) { 478 if(strcmp(argv[i], "--all-symbols") == 0) 479 all_symbols = 1; 480 else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) { 481 char *p = &argv[i][16]; 482 /* skip quote */ 483 if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) 484 p++; 485 symbol_prefix_char = *p; 486 } else 487 usage(); 488 } 489 } else if (argc != 1) 490 usage(); 491 492 read_map(stdin); 493 optimize_token_table(); 494 write_src(); 495 496 return 0; 497 } 498