xref: /openbmc/linux/tools/perf/util/symbol.c (revision 78c99ba1)
1 #include "util.h"
2 #include "../perf.h"
3 #include "string.h"
4 #include "symbol.h"
5 
6 #include <libelf.h>
7 #include <gelf.h>
8 #include <elf.h>
9 
10 const char *sym_hist_filter;
11 
12 static struct symbol *symbol__new(__u64 start, __u64 len,
13 				  const char *name, unsigned int priv_size,
14 				  __u64 obj_start, int verbose)
15 {
16 	size_t namelen = strlen(name) + 1;
17 	struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen);
18 
19 	if (!self)
20 		return NULL;
21 
22 	if (verbose >= 2)
23 		printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n",
24 			(__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start);
25 
26 	self->obj_start= obj_start;
27 	self->hist = NULL;
28 	self->hist_sum = 0;
29 
30 	if (sym_hist_filter && !strcmp(name, sym_hist_filter))
31 		self->hist = calloc(sizeof(__u64), len);
32 
33 	if (priv_size) {
34 		memset(self, 0, priv_size);
35 		self = ((void *)self) + priv_size;
36 	}
37 	self->start = start;
38 	self->end   = start + len - 1;
39 	memcpy(self->name, name, namelen);
40 
41 	return self;
42 }
43 
44 static void symbol__delete(struct symbol *self, unsigned int priv_size)
45 {
46 	free(((void *)self) - priv_size);
47 }
48 
49 static size_t symbol__fprintf(struct symbol *self, FILE *fp)
50 {
51 	return fprintf(fp, " %llx-%llx %s\n",
52 		       self->start, self->end, self->name);
53 }
54 
55 struct dso *dso__new(const char *name, unsigned int sym_priv_size)
56 {
57 	struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
58 
59 	if (self != NULL) {
60 		strcpy(self->name, name);
61 		self->syms = RB_ROOT;
62 		self->sym_priv_size = sym_priv_size;
63 		self->find_symbol = dso__find_symbol;
64 	}
65 
66 	return self;
67 }
68 
69 static void dso__delete_symbols(struct dso *self)
70 {
71 	struct symbol *pos;
72 	struct rb_node *next = rb_first(&self->syms);
73 
74 	while (next) {
75 		pos = rb_entry(next, struct symbol, rb_node);
76 		next = rb_next(&pos->rb_node);
77 		rb_erase(&pos->rb_node, &self->syms);
78 		symbol__delete(pos, self->sym_priv_size);
79 	}
80 }
81 
82 void dso__delete(struct dso *self)
83 {
84 	dso__delete_symbols(self);
85 	free(self);
86 }
87 
88 static void dso__insert_symbol(struct dso *self, struct symbol *sym)
89 {
90 	struct rb_node **p = &self->syms.rb_node;
91 	struct rb_node *parent = NULL;
92 	const __u64 ip = sym->start;
93 	struct symbol *s;
94 
95 	while (*p != NULL) {
96 		parent = *p;
97 		s = rb_entry(parent, struct symbol, rb_node);
98 		if (ip < s->start)
99 			p = &(*p)->rb_left;
100 		else
101 			p = &(*p)->rb_right;
102 	}
103 	rb_link_node(&sym->rb_node, parent, p);
104 	rb_insert_color(&sym->rb_node, &self->syms);
105 }
106 
107 struct symbol *dso__find_symbol(struct dso *self, __u64 ip)
108 {
109 	struct rb_node *n;
110 
111 	if (self == NULL)
112 		return NULL;
113 
114 	n = self->syms.rb_node;
115 
116 	while (n) {
117 		struct symbol *s = rb_entry(n, struct symbol, rb_node);
118 
119 		if (ip < s->start)
120 			n = n->rb_left;
121 		else if (ip > s->end)
122 			n = n->rb_right;
123 		else
124 			return s;
125 	}
126 
127 	return NULL;
128 }
129 
130 size_t dso__fprintf(struct dso *self, FILE *fp)
131 {
132 	size_t ret = fprintf(fp, "dso: %s\n", self->name);
133 
134 	struct rb_node *nd;
135 	for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) {
136 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
137 		ret += symbol__fprintf(pos, fp);
138 	}
139 
140 	return ret;
141 }
142 
143 static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose)
144 {
145 	struct rb_node *nd, *prevnd;
146 	char *line = NULL;
147 	size_t n;
148 	FILE *file = fopen("/proc/kallsyms", "r");
149 
150 	if (file == NULL)
151 		goto out_failure;
152 
153 	while (!feof(file)) {
154 		__u64 start;
155 		struct symbol *sym;
156 		int line_len, len;
157 		char symbol_type;
158 
159 		line_len = getline(&line, &n, file);
160 		if (line_len < 0)
161 			break;
162 
163 		if (!line)
164 			goto out_failure;
165 
166 		line[--line_len] = '\0'; /* \n */
167 
168 		len = hex2u64(line, &start);
169 
170 		len++;
171 		if (len + 2 >= line_len)
172 			continue;
173 
174 		symbol_type = toupper(line[len]);
175 		/*
176 		 * We're interested only in code ('T'ext)
177 		 */
178 		if (symbol_type != 'T' && symbol_type != 'W')
179 			continue;
180 		/*
181 		 * Well fix up the end later, when we have all sorted.
182 		 */
183 		sym = symbol__new(start, 0xdead, line + len + 2,
184 				  self->sym_priv_size, 0, verbose);
185 
186 		if (sym == NULL)
187 			goto out_delete_line;
188 
189 		if (filter && filter(self, sym))
190 			symbol__delete(sym, self->sym_priv_size);
191 		else
192 			dso__insert_symbol(self, sym);
193 	}
194 
195 	/*
196 	 * Now that we have all sorted out, just set the ->end of all
197 	 * symbols
198 	 */
199 	prevnd = rb_first(&self->syms);
200 
201 	if (prevnd == NULL)
202 		goto out_delete_line;
203 
204 	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
205 		struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node),
206 			      *curr = rb_entry(nd, struct symbol, rb_node);
207 
208 		prev->end = curr->start - 1;
209 		prevnd = nd;
210 	}
211 
212 	free(line);
213 	fclose(file);
214 
215 	return 0;
216 
217 out_delete_line:
218 	free(line);
219 out_failure:
220 	return -1;
221 }
222 
223 static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose)
224 {
225 	char *line = NULL;
226 	size_t n;
227 	FILE *file;
228 	int nr_syms = 0;
229 
230 	file = fopen(self->name, "r");
231 	if (file == NULL)
232 		goto out_failure;
233 
234 	while (!feof(file)) {
235 		__u64 start, size;
236 		struct symbol *sym;
237 		int line_len, len;
238 
239 		line_len = getline(&line, &n, file);
240 		if (line_len < 0)
241 			break;
242 
243 		if (!line)
244 			goto out_failure;
245 
246 		line[--line_len] = '\0'; /* \n */
247 
248 		len = hex2u64(line, &start);
249 
250 		len++;
251 		if (len + 2 >= line_len)
252 			continue;
253 
254 		len += hex2u64(line + len, &size);
255 
256 		len++;
257 		if (len + 2 >= line_len)
258 			continue;
259 
260 		sym = symbol__new(start, size, line + len,
261 				  self->sym_priv_size, start, verbose);
262 
263 		if (sym == NULL)
264 			goto out_delete_line;
265 
266 		if (filter && filter(self, sym))
267 			symbol__delete(sym, self->sym_priv_size);
268 		else {
269 			dso__insert_symbol(self, sym);
270 			nr_syms++;
271 		}
272 	}
273 
274 	free(line);
275 	fclose(file);
276 
277 	return nr_syms;
278 
279 out_delete_line:
280 	free(line);
281 out_failure:
282 	return -1;
283 }
284 
285 /**
286  * elf_symtab__for_each_symbol - iterate thru all the symbols
287  *
288  * @self: struct elf_symtab instance to iterate
289  * @index: uint32_t index
290  * @sym: GElf_Sym iterator
291  */
292 #define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \
293 	for (index = 0, gelf_getsym(syms, index, &sym);\
294 	     index < nr_syms; \
295 	     index++, gelf_getsym(syms, index, &sym))
296 
297 static inline uint8_t elf_sym__type(const GElf_Sym *sym)
298 {
299 	return GELF_ST_TYPE(sym->st_info);
300 }
301 
302 static inline int elf_sym__is_function(const GElf_Sym *sym)
303 {
304 	return elf_sym__type(sym) == STT_FUNC &&
305 	       sym->st_name != 0 &&
306 	       sym->st_shndx != SHN_UNDEF &&
307 	       sym->st_size != 0;
308 }
309 
310 static inline const char *elf_sym__name(const GElf_Sym *sym,
311 					const Elf_Data *symstrs)
312 {
313 	return symstrs->d_buf + sym->st_name;
314 }
315 
316 static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
317 				    GElf_Shdr *shp, const char *name,
318 				    size_t *index)
319 {
320 	Elf_Scn *sec = NULL;
321 	size_t cnt = 1;
322 
323 	while ((sec = elf_nextscn(elf, sec)) != NULL) {
324 		char *str;
325 
326 		gelf_getshdr(sec, shp);
327 		str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
328 		if (!strcmp(name, str)) {
329 			if (index)
330 				*index = cnt;
331 			break;
332 		}
333 		++cnt;
334 	}
335 
336 	return sec;
337 }
338 
339 #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
340 	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
341 	     idx < nr_entries; \
342 	     ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
343 
344 #define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
345 	for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
346 	     idx < nr_entries; \
347 	     ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
348 
349 static int dso__synthesize_plt_symbols(struct  dso *self, Elf *elf,
350 				       GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym,
351 				       GElf_Shdr *shdr_dynsym,
352 				       size_t dynsym_idx, int verbose)
353 {
354 	uint32_t nr_rel_entries, idx;
355 	GElf_Sym sym;
356 	__u64 plt_offset;
357 	GElf_Shdr shdr_plt;
358 	struct symbol *f;
359 	GElf_Shdr shdr_rel_plt;
360 	Elf_Data *reldata, *syms, *symstrs;
361 	Elf_Scn *scn_plt_rel, *scn_symstrs;
362 	char sympltname[1024];
363 	int nr = 0, symidx;
364 
365 	scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
366 					  ".rela.plt", NULL);
367 	if (scn_plt_rel == NULL) {
368 		scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt,
369 						  ".rel.plt", NULL);
370 		if (scn_plt_rel == NULL)
371 			return 0;
372 	}
373 
374 	if (shdr_rel_plt.sh_link != dynsym_idx)
375 		return 0;
376 
377 	if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL)
378 		return 0;
379 
380 	/*
381 	 * Fetch the relocation section to find the indexes to the GOT
382 	 * and the symbols in the .dynsym they refer to.
383 	 */
384 	reldata = elf_getdata(scn_plt_rel, NULL);
385 	if (reldata == NULL)
386 		return -1;
387 
388 	syms = elf_getdata(scn_dynsym, NULL);
389 	if (syms == NULL)
390 		return -1;
391 
392 	scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link);
393 	if (scn_symstrs == NULL)
394 		return -1;
395 
396 	symstrs = elf_getdata(scn_symstrs, NULL);
397 	if (symstrs == NULL)
398 		return -1;
399 
400 	nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
401 	plt_offset = shdr_plt.sh_offset;
402 
403 	if (shdr_rel_plt.sh_type == SHT_RELA) {
404 		GElf_Rela pos_mem, *pos;
405 
406 		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
407 					   nr_rel_entries) {
408 			symidx = GELF_R_SYM(pos->r_info);
409 			plt_offset += shdr_plt.sh_entsize;
410 			gelf_getsym(syms, symidx, &sym);
411 			snprintf(sympltname, sizeof(sympltname),
412 				 "%s@plt", elf_sym__name(&sym, symstrs));
413 
414 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
415 					sympltname, self->sym_priv_size, 0, verbose);
416 			if (!f)
417 				return -1;
418 
419 			dso__insert_symbol(self, f);
420 			++nr;
421 		}
422 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
423 		GElf_Rel pos_mem, *pos;
424 		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
425 					  nr_rel_entries) {
426 			symidx = GELF_R_SYM(pos->r_info);
427 			plt_offset += shdr_plt.sh_entsize;
428 			gelf_getsym(syms, symidx, &sym);
429 			snprintf(sympltname, sizeof(sympltname),
430 				 "%s@plt", elf_sym__name(&sym, symstrs));
431 
432 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
433 					sympltname, self->sym_priv_size, 0, verbose);
434 			if (!f)
435 				return -1;
436 
437 			dso__insert_symbol(self, f);
438 			++nr;
439 		}
440 	} else {
441 		/*
442 		 * TODO: There are still one more shdr_rel_plt.sh_type
443 		 * I have to investigate, but probably should be ignored.
444 		 */
445 	}
446 
447 	return nr;
448 }
449 
450 static int dso__load_sym(struct dso *self, int fd, const char *name,
451 			 symbol_filter_t filter, int verbose)
452 {
453 	Elf_Data *symstrs;
454 	uint32_t nr_syms;
455 	int err = -1;
456 	uint32_t index;
457 	GElf_Ehdr ehdr;
458 	GElf_Shdr shdr;
459 	Elf_Data *syms;
460 	GElf_Sym sym;
461 	Elf_Scn *sec, *sec_dynsym;
462 	Elf *elf;
463 	size_t dynsym_idx;
464 	int nr = 0;
465 
466 	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
467 	if (elf == NULL) {
468 		if (verbose)
469 			fprintf(stderr, "%s: cannot read %s ELF file.\n",
470 				__func__, name);
471 		goto out_close;
472 	}
473 
474 	if (gelf_getehdr(elf, &ehdr) == NULL) {
475 		if (verbose)
476 			fprintf(stderr, "%s: cannot get elf header.\n", __func__);
477 		goto out_elf_end;
478 	}
479 
480 	/*
481 	 * We need to check if we have a .dynsym, so that we can handle the
482 	 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
483 	 * .dynsym or .symtab)
484 	 */
485 	sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
486 					 ".dynsym", &dynsym_idx);
487 	if (sec_dynsym != NULL) {
488 		nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
489 						 sec_dynsym, &shdr,
490 						 dynsym_idx, verbose);
491 		if (nr < 0)
492 			goto out_elf_end;
493 	}
494 
495 	/*
496 	 * But if we have a full .symtab (that is a superset of .dynsym) we
497 	 * should add the symbols not in the .dynsyn
498 	 */
499 	sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
500 	if (sec == NULL) {
501 		if (sec_dynsym == NULL)
502 			goto out_elf_end;
503 
504 		sec = sec_dynsym;
505 		gelf_getshdr(sec, &shdr);
506 	}
507 
508 	syms = elf_getdata(sec, NULL);
509 	if (syms == NULL)
510 		goto out_elf_end;
511 
512 	sec = elf_getscn(elf, shdr.sh_link);
513 	if (sec == NULL)
514 		goto out_elf_end;
515 
516 	symstrs = elf_getdata(sec, NULL);
517 	if (symstrs == NULL)
518 		goto out_elf_end;
519 
520 	nr_syms = shdr.sh_size / shdr.sh_entsize;
521 
522 	memset(&sym, 0, sizeof(sym));
523 
524 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
525 		struct symbol *f;
526 		__u64 obj_start;
527 
528 		if (!elf_sym__is_function(&sym))
529 			continue;
530 
531 		sec = elf_getscn(elf, sym.st_shndx);
532 		if (!sec)
533 			goto out_elf_end;
534 
535 		gelf_getshdr(sec, &shdr);
536 		obj_start = sym.st_value;
537 
538 		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
539 
540 		f = symbol__new(sym.st_value, sym.st_size,
541 				elf_sym__name(&sym, symstrs),
542 				self->sym_priv_size, obj_start, verbose);
543 		if (!f)
544 			goto out_elf_end;
545 
546 		if (filter && filter(self, f))
547 			symbol__delete(f, self->sym_priv_size);
548 		else {
549 			dso__insert_symbol(self, f);
550 			nr++;
551 		}
552 	}
553 
554 	err = nr;
555 out_elf_end:
556 	elf_end(elf);
557 out_close:
558 	return err;
559 }
560 
561 int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
562 {
563 	int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug");
564 	char *name = malloc(size);
565 	int variant = 0;
566 	int ret = -1;
567 	int fd;
568 
569 	if (!name)
570 		return -1;
571 
572 	if (strncmp(self->name, "/tmp/perf-", 10) == 0)
573 		return dso__load_perf_map(self, filter, verbose);
574 
575 more:
576 	do {
577 		switch (variant) {
578 		case 0: /* Fedora */
579 			snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
580 			break;
581 		case 1: /* Ubuntu */
582 			snprintf(name, size, "/usr/lib/debug%s", self->name);
583 			break;
584 		case 2: /* Sane people */
585 			snprintf(name, size, "%s", self->name);
586 			break;
587 
588 		default:
589 			goto out;
590 		}
591 		variant++;
592 
593 		fd = open(name, O_RDONLY);
594 	} while (fd < 0);
595 
596 	ret = dso__load_sym(self, fd, name, filter, verbose);
597 	close(fd);
598 
599 	/*
600 	 * Some people seem to have debuginfo files _WITHOUT_ debug info!?!?
601 	 */
602 	if (!ret)
603 		goto more;
604 
605 out:
606 	free(name);
607 	return ret;
608 }
609 
610 static int dso__load_vmlinux(struct dso *self, const char *vmlinux,
611 			     symbol_filter_t filter, int verbose)
612 {
613 	int err, fd = open(vmlinux, O_RDONLY);
614 
615 	if (fd < 0)
616 		return -1;
617 
618 	err = dso__load_sym(self, fd, vmlinux, filter, verbose);
619 	close(fd);
620 
621 	return err;
622 }
623 
624 int dso__load_kernel(struct dso *self, const char *vmlinux,
625 		     symbol_filter_t filter, int verbose)
626 {
627 	int err = -1;
628 
629 	if (vmlinux)
630 		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
631 
632 	if (err)
633 		err = dso__load_kallsyms(self, filter, verbose);
634 
635 	return err;
636 }
637 
638 void symbol__init(void)
639 {
640 	elf_version(EV_CURRENT);
641 }
642