xref: /openbmc/linux/tools/lib/bpf/btf.c (revision 740e69c3c511aa207155ba993a854b5bee79cdc2)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2018 Facebook */
3 
4 #include <endian.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <errno.h>
11 #include <sys/utsname.h>
12 #include <sys/param.h>
13 #include <sys/stat.h>
14 #include <linux/kernel.h>
15 #include <linux/err.h>
16 #include <linux/btf.h>
17 #include <gelf.h>
18 #include "btf.h"
19 #include "bpf.h"
20 #include "libbpf.h"
21 #include "libbpf_internal.h"
22 #include "hashmap.h"
23 
24 #define BTF_MAX_NR_TYPES 0x7fffffffU
25 #define BTF_MAX_STR_OFFSET 0x7fffffffU
26 
27 static struct btf_type btf_void;
28 
29 struct btf {
30 	union {
31 		struct btf_header *hdr;
32 		void *data;
33 	};
34 	__u32 *type_offs;
35 	__u32 type_offs_cap;
36 	const char *strings;
37 	void *nohdr_data;
38 	void *types_data;
39 	__u32 nr_types;
40 	__u32 data_size;
41 	int fd;
42 	int ptr_sz;
43 };
44 
45 static inline __u64 ptr_to_u64(const void *ptr)
46 {
47 	return (__u64) (unsigned long) ptr;
48 }
49 
50 static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
51 {
52 	/* nr_types is 1-based, so N types means we need N+1-sized array */
53 	if (btf->nr_types + 2 > btf->type_offs_cap) {
54 		__u32 *new_offs;
55 		__u32 expand_by, new_size;
56 
57 		if (btf->type_offs_cap == BTF_MAX_NR_TYPES)
58 			return -E2BIG;
59 
60 		expand_by = max(btf->type_offs_cap / 4, 16U);
61 		new_size = min(BTF_MAX_NR_TYPES, btf->type_offs_cap + expand_by);
62 
63 		new_offs = libbpf_reallocarray(btf->type_offs, new_size, sizeof(*new_offs));
64 		if (!new_offs)
65 			return -ENOMEM;
66 
67 		new_offs[0] = UINT_MAX; /* VOID is specially handled */
68 
69 		btf->type_offs = new_offs;
70 		btf->type_offs_cap = new_size;
71 	}
72 
73 	btf->type_offs[btf->nr_types + 1] = type_off;
74 
75 	return 0;
76 }
77 
78 static int btf_parse_hdr(struct btf *btf)
79 {
80 	const struct btf_header *hdr = btf->hdr;
81 	__u32 meta_left;
82 
83 	if (btf->data_size < sizeof(struct btf_header)) {
84 		pr_debug("BTF header not found\n");
85 		return -EINVAL;
86 	}
87 
88 	if (hdr->magic != BTF_MAGIC) {
89 		pr_debug("Invalid BTF magic:%x\n", hdr->magic);
90 		return -EINVAL;
91 	}
92 
93 	if (hdr->version != BTF_VERSION) {
94 		pr_debug("Unsupported BTF version:%u\n", hdr->version);
95 		return -ENOTSUP;
96 	}
97 
98 	if (hdr->flags) {
99 		pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
100 		return -ENOTSUP;
101 	}
102 
103 	meta_left = btf->data_size - sizeof(*hdr);
104 	if (!meta_left) {
105 		pr_debug("BTF has no data\n");
106 		return -EINVAL;
107 	}
108 
109 	if (meta_left < hdr->type_off) {
110 		pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
111 		return -EINVAL;
112 	}
113 
114 	if (meta_left < hdr->str_off) {
115 		pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
116 		return -EINVAL;
117 	}
118 
119 	if (hdr->type_off >= hdr->str_off) {
120 		pr_debug("BTF type section offset >= string section offset. No type?\n");
121 		return -EINVAL;
122 	}
123 
124 	if (hdr->type_off & 0x02) {
125 		pr_debug("BTF type section is not aligned to 4 bytes\n");
126 		return -EINVAL;
127 	}
128 
129 	btf->nohdr_data = btf->hdr + 1;
130 
131 	return 0;
132 }
133 
134 static int btf_parse_str_sec(struct btf *btf)
135 {
136 	const struct btf_header *hdr = btf->hdr;
137 	const char *start = btf->nohdr_data + hdr->str_off;
138 	const char *end = start + btf->hdr->str_len;
139 
140 	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
141 	    start[0] || end[-1]) {
142 		pr_debug("Invalid BTF string section\n");
143 		return -EINVAL;
144 	}
145 
146 	btf->strings = start;
147 
148 	return 0;
149 }
150 
151 static int btf_type_size(const struct btf_type *t)
152 {
153 	int base_size = sizeof(struct btf_type);
154 	__u16 vlen = btf_vlen(t);
155 
156 	switch (btf_kind(t)) {
157 	case BTF_KIND_FWD:
158 	case BTF_KIND_CONST:
159 	case BTF_KIND_VOLATILE:
160 	case BTF_KIND_RESTRICT:
161 	case BTF_KIND_PTR:
162 	case BTF_KIND_TYPEDEF:
163 	case BTF_KIND_FUNC:
164 		return base_size;
165 	case BTF_KIND_INT:
166 		return base_size + sizeof(__u32);
167 	case BTF_KIND_ENUM:
168 		return base_size + vlen * sizeof(struct btf_enum);
169 	case BTF_KIND_ARRAY:
170 		return base_size + sizeof(struct btf_array);
171 	case BTF_KIND_STRUCT:
172 	case BTF_KIND_UNION:
173 		return base_size + vlen * sizeof(struct btf_member);
174 	case BTF_KIND_FUNC_PROTO:
175 		return base_size + vlen * sizeof(struct btf_param);
176 	case BTF_KIND_VAR:
177 		return base_size + sizeof(struct btf_var);
178 	case BTF_KIND_DATASEC:
179 		return base_size + vlen * sizeof(struct btf_var_secinfo);
180 	default:
181 		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
182 		return -EINVAL;
183 	}
184 }
185 
186 static int btf_parse_type_sec(struct btf *btf)
187 {
188 	struct btf_header *hdr = btf->hdr;
189 	void *next_type = btf->nohdr_data + hdr->type_off;
190 	void *end_type = next_type + hdr->type_len;
191 
192 	btf->types_data = next_type;
193 
194 	while (next_type < end_type) {
195 		int type_size;
196 		int err;
197 
198 		err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
199 		if (err)
200 			return err;
201 
202 		type_size = btf_type_size(next_type);
203 		if (type_size < 0)
204 			return type_size;
205 
206 		next_type += type_size;
207 		btf->nr_types++;
208 	}
209 
210 	return 0;
211 }
212 
213 __u32 btf__get_nr_types(const struct btf *btf)
214 {
215 	return btf->nr_types;
216 }
217 
218 /* internal helper returning non-const pointer to a type */
219 static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
220 {
221 	if (type_id == 0)
222 		return &btf_void;
223 
224 	return btf->types_data + btf->type_offs[type_id];
225 }
226 
227 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
228 {
229 	if (type_id > btf->nr_types)
230 		return NULL;
231 	return btf_type_by_id((struct btf *)btf, type_id);
232 }
233 
234 static int determine_ptr_size(const struct btf *btf)
235 {
236 	const struct btf_type *t;
237 	const char *name;
238 	int i;
239 
240 	for (i = 1; i <= btf->nr_types; i++) {
241 		t = btf__type_by_id(btf, i);
242 		if (!btf_is_int(t))
243 			continue;
244 
245 		name = btf__name_by_offset(btf, t->name_off);
246 		if (!name)
247 			continue;
248 
249 		if (strcmp(name, "long int") == 0 ||
250 		    strcmp(name, "long unsigned int") == 0) {
251 			if (t->size != 4 && t->size != 8)
252 				continue;
253 			return t->size;
254 		}
255 	}
256 
257 	return -1;
258 }
259 
260 static size_t btf_ptr_sz(const struct btf *btf)
261 {
262 	if (!btf->ptr_sz)
263 		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
264 	return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
265 }
266 
267 /* Return pointer size this BTF instance assumes. The size is heuristically
268  * determined by looking for 'long' or 'unsigned long' integer type and
269  * recording its size in bytes. If BTF type information doesn't have any such
270  * type, this function returns 0. In the latter case, native architecture's
271  * pointer size is assumed, so will be either 4 or 8, depending on
272  * architecture that libbpf was compiled for. It's possible to override
273  * guessed value by using btf__set_pointer_size() API.
274  */
275 size_t btf__pointer_size(const struct btf *btf)
276 {
277 	if (!btf->ptr_sz)
278 		((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
279 
280 	if (btf->ptr_sz < 0)
281 		/* not enough BTF type info to guess */
282 		return 0;
283 
284 	return btf->ptr_sz;
285 }
286 
287 /* Override or set pointer size in bytes. Only values of 4 and 8 are
288  * supported.
289  */
290 int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
291 {
292 	if (ptr_sz != 4 && ptr_sz != 8)
293 		return -EINVAL;
294 	btf->ptr_sz = ptr_sz;
295 	return 0;
296 }
297 
298 static bool btf_type_is_void(const struct btf_type *t)
299 {
300 	return t == &btf_void || btf_is_fwd(t);
301 }
302 
303 static bool btf_type_is_void_or_null(const struct btf_type *t)
304 {
305 	return !t || btf_type_is_void(t);
306 }
307 
308 #define MAX_RESOLVE_DEPTH 32
309 
310 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
311 {
312 	const struct btf_array *array;
313 	const struct btf_type *t;
314 	__u32 nelems = 1;
315 	__s64 size = -1;
316 	int i;
317 
318 	t = btf__type_by_id(btf, type_id);
319 	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
320 	     i++) {
321 		switch (btf_kind(t)) {
322 		case BTF_KIND_INT:
323 		case BTF_KIND_STRUCT:
324 		case BTF_KIND_UNION:
325 		case BTF_KIND_ENUM:
326 		case BTF_KIND_DATASEC:
327 			size = t->size;
328 			goto done;
329 		case BTF_KIND_PTR:
330 			size = btf_ptr_sz(btf);
331 			goto done;
332 		case BTF_KIND_TYPEDEF:
333 		case BTF_KIND_VOLATILE:
334 		case BTF_KIND_CONST:
335 		case BTF_KIND_RESTRICT:
336 		case BTF_KIND_VAR:
337 			type_id = t->type;
338 			break;
339 		case BTF_KIND_ARRAY:
340 			array = btf_array(t);
341 			if (nelems && array->nelems > UINT32_MAX / nelems)
342 				return -E2BIG;
343 			nelems *= array->nelems;
344 			type_id = array->type;
345 			break;
346 		default:
347 			return -EINVAL;
348 		}
349 
350 		t = btf__type_by_id(btf, type_id);
351 	}
352 
353 done:
354 	if (size < 0)
355 		return -EINVAL;
356 	if (nelems && size > UINT32_MAX / nelems)
357 		return -E2BIG;
358 
359 	return nelems * size;
360 }
361 
362 int btf__align_of(const struct btf *btf, __u32 id)
363 {
364 	const struct btf_type *t = btf__type_by_id(btf, id);
365 	__u16 kind = btf_kind(t);
366 
367 	switch (kind) {
368 	case BTF_KIND_INT:
369 	case BTF_KIND_ENUM:
370 		return min(btf_ptr_sz(btf), (size_t)t->size);
371 	case BTF_KIND_PTR:
372 		return btf_ptr_sz(btf);
373 	case BTF_KIND_TYPEDEF:
374 	case BTF_KIND_VOLATILE:
375 	case BTF_KIND_CONST:
376 	case BTF_KIND_RESTRICT:
377 		return btf__align_of(btf, t->type);
378 	case BTF_KIND_ARRAY:
379 		return btf__align_of(btf, btf_array(t)->type);
380 	case BTF_KIND_STRUCT:
381 	case BTF_KIND_UNION: {
382 		const struct btf_member *m = btf_members(t);
383 		__u16 vlen = btf_vlen(t);
384 		int i, max_align = 1, align;
385 
386 		for (i = 0; i < vlen; i++, m++) {
387 			align = btf__align_of(btf, m->type);
388 			if (align <= 0)
389 				return align;
390 			max_align = max(max_align, align);
391 		}
392 
393 		return max_align;
394 	}
395 	default:
396 		pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
397 		return 0;
398 	}
399 }
400 
401 int btf__resolve_type(const struct btf *btf, __u32 type_id)
402 {
403 	const struct btf_type *t;
404 	int depth = 0;
405 
406 	t = btf__type_by_id(btf, type_id);
407 	while (depth < MAX_RESOLVE_DEPTH &&
408 	       !btf_type_is_void_or_null(t) &&
409 	       (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
410 		type_id = t->type;
411 		t = btf__type_by_id(btf, type_id);
412 		depth++;
413 	}
414 
415 	if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
416 		return -EINVAL;
417 
418 	return type_id;
419 }
420 
421 __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
422 {
423 	__u32 i;
424 
425 	if (!strcmp(type_name, "void"))
426 		return 0;
427 
428 	for (i = 1; i <= btf->nr_types; i++) {
429 		const struct btf_type *t = btf__type_by_id(btf, i);
430 		const char *name = btf__name_by_offset(btf, t->name_off);
431 
432 		if (name && !strcmp(type_name, name))
433 			return i;
434 	}
435 
436 	return -ENOENT;
437 }
438 
439 __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
440 			     __u32 kind)
441 {
442 	__u32 i;
443 
444 	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
445 		return 0;
446 
447 	for (i = 1; i <= btf->nr_types; i++) {
448 		const struct btf_type *t = btf__type_by_id(btf, i);
449 		const char *name;
450 
451 		if (btf_kind(t) != kind)
452 			continue;
453 		name = btf__name_by_offset(btf, t->name_off);
454 		if (name && !strcmp(type_name, name))
455 			return i;
456 	}
457 
458 	return -ENOENT;
459 }
460 
461 void btf__free(struct btf *btf)
462 {
463 	if (IS_ERR_OR_NULL(btf))
464 		return;
465 
466 	if (btf->fd >= 0)
467 		close(btf->fd);
468 
469 	free(btf->data);
470 	free(btf->type_offs);
471 	free(btf);
472 }
473 
474 struct btf *btf__new(const void *data, __u32 size)
475 {
476 	struct btf *btf;
477 	int err;
478 
479 	btf = calloc(1, sizeof(struct btf));
480 	if (!btf)
481 		return ERR_PTR(-ENOMEM);
482 
483 	btf->fd = -1;
484 
485 	btf->data = malloc(size);
486 	if (!btf->data) {
487 		err = -ENOMEM;
488 		goto done;
489 	}
490 
491 	memcpy(btf->data, data, size);
492 	btf->data_size = size;
493 
494 	err = btf_parse_hdr(btf);
495 	if (err)
496 		goto done;
497 
498 	err = btf_parse_str_sec(btf);
499 	if (err)
500 		goto done;
501 
502 	err = btf_parse_type_sec(btf);
503 
504 done:
505 	if (err) {
506 		btf__free(btf);
507 		return ERR_PTR(err);
508 	}
509 
510 	return btf;
511 }
512 
513 static bool btf_check_endianness(const GElf_Ehdr *ehdr)
514 {
515 #if __BYTE_ORDER == __LITTLE_ENDIAN
516 	return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
517 #elif __BYTE_ORDER == __BIG_ENDIAN
518 	return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
519 #else
520 # error "Unrecognized __BYTE_ORDER__"
521 #endif
522 }
523 
524 struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
525 {
526 	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
527 	int err = 0, fd = -1, idx = 0;
528 	struct btf *btf = NULL;
529 	Elf_Scn *scn = NULL;
530 	Elf *elf = NULL;
531 	GElf_Ehdr ehdr;
532 
533 	if (elf_version(EV_CURRENT) == EV_NONE) {
534 		pr_warn("failed to init libelf for %s\n", path);
535 		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
536 	}
537 
538 	fd = open(path, O_RDONLY);
539 	if (fd < 0) {
540 		err = -errno;
541 		pr_warn("failed to open %s: %s\n", path, strerror(errno));
542 		return ERR_PTR(err);
543 	}
544 
545 	err = -LIBBPF_ERRNO__FORMAT;
546 
547 	elf = elf_begin(fd, ELF_C_READ, NULL);
548 	if (!elf) {
549 		pr_warn("failed to open %s as ELF file\n", path);
550 		goto done;
551 	}
552 	if (!gelf_getehdr(elf, &ehdr)) {
553 		pr_warn("failed to get EHDR from %s\n", path);
554 		goto done;
555 	}
556 	if (!btf_check_endianness(&ehdr)) {
557 		pr_warn("non-native ELF endianness is not supported\n");
558 		goto done;
559 	}
560 	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
561 		pr_warn("failed to get e_shstrndx from %s\n", path);
562 		goto done;
563 	}
564 
565 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
566 		GElf_Shdr sh;
567 		char *name;
568 
569 		idx++;
570 		if (gelf_getshdr(scn, &sh) != &sh) {
571 			pr_warn("failed to get section(%d) header from %s\n",
572 				idx, path);
573 			goto done;
574 		}
575 		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
576 		if (!name) {
577 			pr_warn("failed to get section(%d) name from %s\n",
578 				idx, path);
579 			goto done;
580 		}
581 		if (strcmp(name, BTF_ELF_SEC) == 0) {
582 			btf_data = elf_getdata(scn, 0);
583 			if (!btf_data) {
584 				pr_warn("failed to get section(%d, %s) data from %s\n",
585 					idx, name, path);
586 				goto done;
587 			}
588 			continue;
589 		} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
590 			btf_ext_data = elf_getdata(scn, 0);
591 			if (!btf_ext_data) {
592 				pr_warn("failed to get section(%d, %s) data from %s\n",
593 					idx, name, path);
594 				goto done;
595 			}
596 			continue;
597 		}
598 	}
599 
600 	err = 0;
601 
602 	if (!btf_data) {
603 		err = -ENOENT;
604 		goto done;
605 	}
606 	btf = btf__new(btf_data->d_buf, btf_data->d_size);
607 	if (IS_ERR(btf))
608 		goto done;
609 
610 	switch (gelf_getclass(elf)) {
611 	case ELFCLASS32:
612 		btf__set_pointer_size(btf, 4);
613 		break;
614 	case ELFCLASS64:
615 		btf__set_pointer_size(btf, 8);
616 		break;
617 	default:
618 		pr_warn("failed to get ELF class (bitness) for %s\n", path);
619 		break;
620 	}
621 
622 	if (btf_ext && btf_ext_data) {
623 		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
624 					btf_ext_data->d_size);
625 		if (IS_ERR(*btf_ext))
626 			goto done;
627 	} else if (btf_ext) {
628 		*btf_ext = NULL;
629 	}
630 done:
631 	if (elf)
632 		elf_end(elf);
633 	close(fd);
634 
635 	if (err)
636 		return ERR_PTR(err);
637 	/*
638 	 * btf is always parsed before btf_ext, so no need to clean up
639 	 * btf_ext, if btf loading failed
640 	 */
641 	if (IS_ERR(btf))
642 		return btf;
643 	if (btf_ext && IS_ERR(*btf_ext)) {
644 		btf__free(btf);
645 		err = PTR_ERR(*btf_ext);
646 		return ERR_PTR(err);
647 	}
648 	return btf;
649 }
650 
651 struct btf *btf__parse_raw(const char *path)
652 {
653 	struct btf *btf = NULL;
654 	void *data = NULL;
655 	FILE *f = NULL;
656 	__u16 magic;
657 	int err = 0;
658 	long sz;
659 
660 	f = fopen(path, "rb");
661 	if (!f) {
662 		err = -errno;
663 		goto err_out;
664 	}
665 
666 	/* check BTF magic */
667 	if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
668 		err = -EIO;
669 		goto err_out;
670 	}
671 	if (magic != BTF_MAGIC) {
672 		/* definitely not a raw BTF */
673 		err = -EPROTO;
674 		goto err_out;
675 	}
676 
677 	/* get file size */
678 	if (fseek(f, 0, SEEK_END)) {
679 		err = -errno;
680 		goto err_out;
681 	}
682 	sz = ftell(f);
683 	if (sz < 0) {
684 		err = -errno;
685 		goto err_out;
686 	}
687 	/* rewind to the start */
688 	if (fseek(f, 0, SEEK_SET)) {
689 		err = -errno;
690 		goto err_out;
691 	}
692 
693 	/* pre-alloc memory and read all of BTF data */
694 	data = malloc(sz);
695 	if (!data) {
696 		err = -ENOMEM;
697 		goto err_out;
698 	}
699 	if (fread(data, 1, sz, f) < sz) {
700 		err = -EIO;
701 		goto err_out;
702 	}
703 
704 	/* finally parse BTF data */
705 	btf = btf__new(data, sz);
706 
707 err_out:
708 	free(data);
709 	if (f)
710 		fclose(f);
711 	return err ? ERR_PTR(err) : btf;
712 }
713 
714 struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
715 {
716 	struct btf *btf;
717 
718 	if (btf_ext)
719 		*btf_ext = NULL;
720 
721 	btf = btf__parse_raw(path);
722 	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
723 		return btf;
724 
725 	return btf__parse_elf(path, btf_ext);
726 }
727 
728 static int compare_vsi_off(const void *_a, const void *_b)
729 {
730 	const struct btf_var_secinfo *a = _a;
731 	const struct btf_var_secinfo *b = _b;
732 
733 	return a->offset - b->offset;
734 }
735 
736 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
737 			     struct btf_type *t)
738 {
739 	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
740 	const char *name = btf__name_by_offset(btf, t->name_off);
741 	const struct btf_type *t_var;
742 	struct btf_var_secinfo *vsi;
743 	const struct btf_var *var;
744 	int ret;
745 
746 	if (!name) {
747 		pr_debug("No name found in string section for DATASEC kind.\n");
748 		return -ENOENT;
749 	}
750 
751 	/* .extern datasec size and var offsets were set correctly during
752 	 * extern collection step, so just skip straight to sorting variables
753 	 */
754 	if (t->size)
755 		goto sort_vars;
756 
757 	ret = bpf_object__section_size(obj, name, &size);
758 	if (ret || !size || (t->size && t->size != size)) {
759 		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
760 		return -ENOENT;
761 	}
762 
763 	t->size = size;
764 
765 	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
766 		t_var = btf__type_by_id(btf, vsi->type);
767 		var = btf_var(t_var);
768 
769 		if (!btf_is_var(t_var)) {
770 			pr_debug("Non-VAR type seen in section %s\n", name);
771 			return -EINVAL;
772 		}
773 
774 		if (var->linkage == BTF_VAR_STATIC)
775 			continue;
776 
777 		name = btf__name_by_offset(btf, t_var->name_off);
778 		if (!name) {
779 			pr_debug("No name found in string section for VAR kind\n");
780 			return -ENOENT;
781 		}
782 
783 		ret = bpf_object__variable_offset(obj, name, &off);
784 		if (ret) {
785 			pr_debug("No offset found in symbol table for VAR %s\n",
786 				 name);
787 			return -ENOENT;
788 		}
789 
790 		vsi->offset = off;
791 	}
792 
793 sort_vars:
794 	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
795 	return 0;
796 }
797 
798 int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
799 {
800 	int err = 0;
801 	__u32 i;
802 
803 	for (i = 1; i <= btf->nr_types; i++) {
804 		struct btf_type *t = btf_type_by_id(btf, i);
805 
806 		/* Loader needs to fix up some of the things compiler
807 		 * couldn't get its hands on while emitting BTF. This
808 		 * is section size and global variable offset. We use
809 		 * the info from the ELF itself for this purpose.
810 		 */
811 		if (btf_is_datasec(t)) {
812 			err = btf_fixup_datasec(obj, btf, t);
813 			if (err)
814 				break;
815 		}
816 	}
817 
818 	return err;
819 }
820 
821 int btf__load(struct btf *btf)
822 {
823 	__u32 log_buf_size = 0;
824 	char *log_buf = NULL;
825 	int err = 0;
826 
827 	if (btf->fd >= 0)
828 		return -EEXIST;
829 
830 retry_load:
831 	if (log_buf_size) {
832 		log_buf = malloc(log_buf_size);
833 		if (!log_buf)
834 			return -ENOMEM;
835 
836 		*log_buf = 0;
837 	}
838 
839 	btf->fd = bpf_load_btf(btf->data, btf->data_size,
840 			       log_buf, log_buf_size, false);
841 	if (btf->fd < 0) {
842 		if (!log_buf || errno == ENOSPC) {
843 			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
844 					   log_buf_size << 1);
845 			free(log_buf);
846 			goto retry_load;
847 		}
848 
849 		err = -errno;
850 		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
851 		if (*log_buf)
852 			pr_warn("%s\n", log_buf);
853 		goto done;
854 	}
855 
856 done:
857 	free(log_buf);
858 	return err;
859 }
860 
861 int btf__fd(const struct btf *btf)
862 {
863 	return btf->fd;
864 }
865 
866 void btf__set_fd(struct btf *btf, int fd)
867 {
868 	btf->fd = fd;
869 }
870 
871 const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
872 {
873 	*size = btf->data_size;
874 	return btf->data;
875 }
876 
877 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
878 {
879 	if (offset < btf->hdr->str_len)
880 		return &btf->strings[offset];
881 	else
882 		return NULL;
883 }
884 
885 int btf__get_from_id(__u32 id, struct btf **btf)
886 {
887 	struct bpf_btf_info btf_info = { 0 };
888 	__u32 len = sizeof(btf_info);
889 	__u32 last_size;
890 	int btf_fd;
891 	void *ptr;
892 	int err;
893 
894 	err = 0;
895 	*btf = NULL;
896 	btf_fd = bpf_btf_get_fd_by_id(id);
897 	if (btf_fd < 0)
898 		return 0;
899 
900 	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
901 	 * let's start with a sane default - 4KiB here - and resize it only if
902 	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
903 	 */
904 	btf_info.btf_size = 4096;
905 	last_size = btf_info.btf_size;
906 	ptr = malloc(last_size);
907 	if (!ptr) {
908 		err = -ENOMEM;
909 		goto exit_free;
910 	}
911 
912 	memset(ptr, 0, last_size);
913 	btf_info.btf = ptr_to_u64(ptr);
914 	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
915 
916 	if (!err && btf_info.btf_size > last_size) {
917 		void *temp_ptr;
918 
919 		last_size = btf_info.btf_size;
920 		temp_ptr = realloc(ptr, last_size);
921 		if (!temp_ptr) {
922 			err = -ENOMEM;
923 			goto exit_free;
924 		}
925 		ptr = temp_ptr;
926 		memset(ptr, 0, last_size);
927 		btf_info.btf = ptr_to_u64(ptr);
928 		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
929 	}
930 
931 	if (err || btf_info.btf_size > last_size) {
932 		err = errno;
933 		goto exit_free;
934 	}
935 
936 	*btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
937 	if (IS_ERR(*btf)) {
938 		err = PTR_ERR(*btf);
939 		*btf = NULL;
940 	}
941 
942 exit_free:
943 	close(btf_fd);
944 	free(ptr);
945 
946 	return err;
947 }
948 
949 int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
950 			 __u32 expected_key_size, __u32 expected_value_size,
951 			 __u32 *key_type_id, __u32 *value_type_id)
952 {
953 	const struct btf_type *container_type;
954 	const struct btf_member *key, *value;
955 	const size_t max_name = 256;
956 	char container_name[max_name];
957 	__s64 key_size, value_size;
958 	__s32 container_id;
959 
960 	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
961 	    max_name) {
962 		pr_warn("map:%s length of '____btf_map_%s' is too long\n",
963 			map_name, map_name);
964 		return -EINVAL;
965 	}
966 
967 	container_id = btf__find_by_name(btf, container_name);
968 	if (container_id < 0) {
969 		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
970 			 map_name, container_name);
971 		return container_id;
972 	}
973 
974 	container_type = btf__type_by_id(btf, container_id);
975 	if (!container_type) {
976 		pr_warn("map:%s cannot find BTF type for container_id:%u\n",
977 			map_name, container_id);
978 		return -EINVAL;
979 	}
980 
981 	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
982 		pr_warn("map:%s container_name:%s is an invalid container struct\n",
983 			map_name, container_name);
984 		return -EINVAL;
985 	}
986 
987 	key = btf_members(container_type);
988 	value = key + 1;
989 
990 	key_size = btf__resolve_size(btf, key->type);
991 	if (key_size < 0) {
992 		pr_warn("map:%s invalid BTF key_type_size\n", map_name);
993 		return key_size;
994 	}
995 
996 	if (expected_key_size != key_size) {
997 		pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
998 			map_name, (__u32)key_size, expected_key_size);
999 		return -EINVAL;
1000 	}
1001 
1002 	value_size = btf__resolve_size(btf, value->type);
1003 	if (value_size < 0) {
1004 		pr_warn("map:%s invalid BTF value_type_size\n", map_name);
1005 		return value_size;
1006 	}
1007 
1008 	if (expected_value_size != value_size) {
1009 		pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
1010 			map_name, (__u32)value_size, expected_value_size);
1011 		return -EINVAL;
1012 	}
1013 
1014 	*key_type_id = key->type;
1015 	*value_type_id = value->type;
1016 
1017 	return 0;
1018 }
1019 
1020 struct btf_ext_sec_setup_param {
1021 	__u32 off;
1022 	__u32 len;
1023 	__u32 min_rec_size;
1024 	struct btf_ext_info *ext_info;
1025 	const char *desc;
1026 };
1027 
1028 static int btf_ext_setup_info(struct btf_ext *btf_ext,
1029 			      struct btf_ext_sec_setup_param *ext_sec)
1030 {
1031 	const struct btf_ext_info_sec *sinfo;
1032 	struct btf_ext_info *ext_info;
1033 	__u32 info_left, record_size;
1034 	/* The start of the info sec (including the __u32 record_size). */
1035 	void *info;
1036 
1037 	if (ext_sec->len == 0)
1038 		return 0;
1039 
1040 	if (ext_sec->off & 0x03) {
1041 		pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
1042 		     ext_sec->desc);
1043 		return -EINVAL;
1044 	}
1045 
1046 	info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
1047 	info_left = ext_sec->len;
1048 
1049 	if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
1050 		pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
1051 			 ext_sec->desc, ext_sec->off, ext_sec->len);
1052 		return -EINVAL;
1053 	}
1054 
1055 	/* At least a record size */
1056 	if (info_left < sizeof(__u32)) {
1057 		pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
1058 		return -EINVAL;
1059 	}
1060 
1061 	/* The record size needs to meet the minimum standard */
1062 	record_size = *(__u32 *)info;
1063 	if (record_size < ext_sec->min_rec_size ||
1064 	    record_size & 0x03) {
1065 		pr_debug("%s section in .BTF.ext has invalid record size %u\n",
1066 			 ext_sec->desc, record_size);
1067 		return -EINVAL;
1068 	}
1069 
1070 	sinfo = info + sizeof(__u32);
1071 	info_left -= sizeof(__u32);
1072 
1073 	/* If no records, return failure now so .BTF.ext won't be used. */
1074 	if (!info_left) {
1075 		pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
1076 		return -EINVAL;
1077 	}
1078 
1079 	while (info_left) {
1080 		unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
1081 		__u64 total_record_size;
1082 		__u32 num_records;
1083 
1084 		if (info_left < sec_hdrlen) {
1085 			pr_debug("%s section header is not found in .BTF.ext\n",
1086 			     ext_sec->desc);
1087 			return -EINVAL;
1088 		}
1089 
1090 		num_records = sinfo->num_info;
1091 		if (num_records == 0) {
1092 			pr_debug("%s section has incorrect num_records in .BTF.ext\n",
1093 			     ext_sec->desc);
1094 			return -EINVAL;
1095 		}
1096 
1097 		total_record_size = sec_hdrlen +
1098 				    (__u64)num_records * record_size;
1099 		if (info_left < total_record_size) {
1100 			pr_debug("%s section has incorrect num_records in .BTF.ext\n",
1101 			     ext_sec->desc);
1102 			return -EINVAL;
1103 		}
1104 
1105 		info_left -= total_record_size;
1106 		sinfo = (void *)sinfo + total_record_size;
1107 	}
1108 
1109 	ext_info = ext_sec->ext_info;
1110 	ext_info->len = ext_sec->len - sizeof(__u32);
1111 	ext_info->rec_size = record_size;
1112 	ext_info->info = info + sizeof(__u32);
1113 
1114 	return 0;
1115 }
1116 
1117 static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
1118 {
1119 	struct btf_ext_sec_setup_param param = {
1120 		.off = btf_ext->hdr->func_info_off,
1121 		.len = btf_ext->hdr->func_info_len,
1122 		.min_rec_size = sizeof(struct bpf_func_info_min),
1123 		.ext_info = &btf_ext->func_info,
1124 		.desc = "func_info"
1125 	};
1126 
1127 	return btf_ext_setup_info(btf_ext, &param);
1128 }
1129 
1130 static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
1131 {
1132 	struct btf_ext_sec_setup_param param = {
1133 		.off = btf_ext->hdr->line_info_off,
1134 		.len = btf_ext->hdr->line_info_len,
1135 		.min_rec_size = sizeof(struct bpf_line_info_min),
1136 		.ext_info = &btf_ext->line_info,
1137 		.desc = "line_info",
1138 	};
1139 
1140 	return btf_ext_setup_info(btf_ext, &param);
1141 }
1142 
1143 static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)
1144 {
1145 	struct btf_ext_sec_setup_param param = {
1146 		.off = btf_ext->hdr->core_relo_off,
1147 		.len = btf_ext->hdr->core_relo_len,
1148 		.min_rec_size = sizeof(struct bpf_core_relo),
1149 		.ext_info = &btf_ext->core_relo_info,
1150 		.desc = "core_relo",
1151 	};
1152 
1153 	return btf_ext_setup_info(btf_ext, &param);
1154 }
1155 
1156 static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
1157 {
1158 	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
1159 
1160 	if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
1161 	    data_size < hdr->hdr_len) {
1162 		pr_debug("BTF.ext header not found");
1163 		return -EINVAL;
1164 	}
1165 
1166 	if (hdr->magic != BTF_MAGIC) {
1167 		pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
1168 		return -EINVAL;
1169 	}
1170 
1171 	if (hdr->version != BTF_VERSION) {
1172 		pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
1173 		return -ENOTSUP;
1174 	}
1175 
1176 	if (hdr->flags) {
1177 		pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
1178 		return -ENOTSUP;
1179 	}
1180 
1181 	if (data_size == hdr->hdr_len) {
1182 		pr_debug("BTF.ext has no data\n");
1183 		return -EINVAL;
1184 	}
1185 
1186 	return 0;
1187 }
1188 
1189 void btf_ext__free(struct btf_ext *btf_ext)
1190 {
1191 	if (IS_ERR_OR_NULL(btf_ext))
1192 		return;
1193 	free(btf_ext->data);
1194 	free(btf_ext);
1195 }
1196 
1197 struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
1198 {
1199 	struct btf_ext *btf_ext;
1200 	int err;
1201 
1202 	err = btf_ext_parse_hdr(data, size);
1203 	if (err)
1204 		return ERR_PTR(err);
1205 
1206 	btf_ext = calloc(1, sizeof(struct btf_ext));
1207 	if (!btf_ext)
1208 		return ERR_PTR(-ENOMEM);
1209 
1210 	btf_ext->data_size = size;
1211 	btf_ext->data = malloc(size);
1212 	if (!btf_ext->data) {
1213 		err = -ENOMEM;
1214 		goto done;
1215 	}
1216 	memcpy(btf_ext->data, data, size);
1217 
1218 	if (btf_ext->hdr->hdr_len <
1219 	    offsetofend(struct btf_ext_header, line_info_len))
1220 		goto done;
1221 	err = btf_ext_setup_func_info(btf_ext);
1222 	if (err)
1223 		goto done;
1224 
1225 	err = btf_ext_setup_line_info(btf_ext);
1226 	if (err)
1227 		goto done;
1228 
1229 	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
1230 		goto done;
1231 	err = btf_ext_setup_core_relos(btf_ext);
1232 	if (err)
1233 		goto done;
1234 
1235 done:
1236 	if (err) {
1237 		btf_ext__free(btf_ext);
1238 		return ERR_PTR(err);
1239 	}
1240 
1241 	return btf_ext;
1242 }
1243 
1244 const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
1245 {
1246 	*size = btf_ext->data_size;
1247 	return btf_ext->data;
1248 }
1249 
1250 static int btf_ext_reloc_info(const struct btf *btf,
1251 			      const struct btf_ext_info *ext_info,
1252 			      const char *sec_name, __u32 insns_cnt,
1253 			      void **info, __u32 *cnt)
1254 {
1255 	__u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
1256 	__u32 i, record_size, existing_len, records_len;
1257 	struct btf_ext_info_sec *sinfo;
1258 	const char *info_sec_name;
1259 	__u64 remain_len;
1260 	void *data;
1261 
1262 	record_size = ext_info->rec_size;
1263 	sinfo = ext_info->info;
1264 	remain_len = ext_info->len;
1265 	while (remain_len > 0) {
1266 		records_len = sinfo->num_info * record_size;
1267 		info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
1268 		if (strcmp(info_sec_name, sec_name)) {
1269 			remain_len -= sec_hdrlen + records_len;
1270 			sinfo = (void *)sinfo + sec_hdrlen + records_len;
1271 			continue;
1272 		}
1273 
1274 		existing_len = (*cnt) * record_size;
1275 		data = realloc(*info, existing_len + records_len);
1276 		if (!data)
1277 			return -ENOMEM;
1278 
1279 		memcpy(data + existing_len, sinfo->data, records_len);
1280 		/* adjust insn_off only, the rest data will be passed
1281 		 * to the kernel.
1282 		 */
1283 		for (i = 0; i < sinfo->num_info; i++) {
1284 			__u32 *insn_off;
1285 
1286 			insn_off = data + existing_len + (i * record_size);
1287 			*insn_off = *insn_off / sizeof(struct bpf_insn) +
1288 				insns_cnt;
1289 		}
1290 		*info = data;
1291 		*cnt += sinfo->num_info;
1292 		return 0;
1293 	}
1294 
1295 	return -ENOENT;
1296 }
1297 
1298 int btf_ext__reloc_func_info(const struct btf *btf,
1299 			     const struct btf_ext *btf_ext,
1300 			     const char *sec_name, __u32 insns_cnt,
1301 			     void **func_info, __u32 *cnt)
1302 {
1303 	return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
1304 				  insns_cnt, func_info, cnt);
1305 }
1306 
1307 int btf_ext__reloc_line_info(const struct btf *btf,
1308 			     const struct btf_ext *btf_ext,
1309 			     const char *sec_name, __u32 insns_cnt,
1310 			     void **line_info, __u32 *cnt)
1311 {
1312 	return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
1313 				  insns_cnt, line_info, cnt);
1314 }
1315 
1316 __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
1317 {
1318 	return btf_ext->func_info.rec_size;
1319 }
1320 
1321 __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
1322 {
1323 	return btf_ext->line_info.rec_size;
1324 }
1325 
1326 struct btf_dedup;
1327 
1328 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1329 				       const struct btf_dedup_opts *opts);
1330 static void btf_dedup_free(struct btf_dedup *d);
1331 static int btf_dedup_strings(struct btf_dedup *d);
1332 static int btf_dedup_prim_types(struct btf_dedup *d);
1333 static int btf_dedup_struct_types(struct btf_dedup *d);
1334 static int btf_dedup_ref_types(struct btf_dedup *d);
1335 static int btf_dedup_compact_types(struct btf_dedup *d);
1336 static int btf_dedup_remap_types(struct btf_dedup *d);
1337 
1338 /*
1339  * Deduplicate BTF types and strings.
1340  *
1341  * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
1342  * section with all BTF type descriptors and string data. It overwrites that
1343  * memory in-place with deduplicated types and strings without any loss of
1344  * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
1345  * is provided, all the strings referenced from .BTF.ext section are honored
1346  * and updated to point to the right offsets after deduplication.
1347  *
1348  * If function returns with error, type/string data might be garbled and should
1349  * be discarded.
1350  *
1351  * More verbose and detailed description of both problem btf_dedup is solving,
1352  * as well as solution could be found at:
1353  * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
1354  *
1355  * Problem description and justification
1356  * =====================================
1357  *
1358  * BTF type information is typically emitted either as a result of conversion
1359  * from DWARF to BTF or directly by compiler. In both cases, each compilation
1360  * unit contains information about a subset of all the types that are used
1361  * in an application. These subsets are frequently overlapping and contain a lot
1362  * of duplicated information when later concatenated together into a single
1363  * binary. This algorithm ensures that each unique type is represented by single
1364  * BTF type descriptor, greatly reducing resulting size of BTF data.
1365  *
1366  * Compilation unit isolation and subsequent duplication of data is not the only
1367  * problem. The same type hierarchy (e.g., struct and all the type that struct
1368  * references) in different compilation units can be represented in BTF to
1369  * various degrees of completeness (or, rather, incompleteness) due to
1370  * struct/union forward declarations.
1371  *
1372  * Let's take a look at an example, that we'll use to better understand the
1373  * problem (and solution). Suppose we have two compilation units, each using
1374  * same `struct S`, but each of them having incomplete type information about
1375  * struct's fields:
1376  *
1377  * // CU #1:
1378  * struct S;
1379  * struct A {
1380  *	int a;
1381  *	struct A* self;
1382  *	struct S* parent;
1383  * };
1384  * struct B;
1385  * struct S {
1386  *	struct A* a_ptr;
1387  *	struct B* b_ptr;
1388  * };
1389  *
1390  * // CU #2:
1391  * struct S;
1392  * struct A;
1393  * struct B {
1394  *	int b;
1395  *	struct B* self;
1396  *	struct S* parent;
1397  * };
1398  * struct S {
1399  *	struct A* a_ptr;
1400  *	struct B* b_ptr;
1401  * };
1402  *
1403  * In case of CU #1, BTF data will know only that `struct B` exist (but no
1404  * more), but will know the complete type information about `struct A`. While
1405  * for CU #2, it will know full type information about `struct B`, but will
1406  * only know about forward declaration of `struct A` (in BTF terms, it will
1407  * have `BTF_KIND_FWD` type descriptor with name `B`).
1408  *
1409  * This compilation unit isolation means that it's possible that there is no
1410  * single CU with complete type information describing structs `S`, `A`, and
1411  * `B`. Also, we might get tons of duplicated and redundant type information.
1412  *
1413  * Additional complication we need to keep in mind comes from the fact that
1414  * types, in general, can form graphs containing cycles, not just DAGs.
1415  *
1416  * While algorithm does deduplication, it also merges and resolves type
1417  * information (unless disabled throught `struct btf_opts`), whenever possible.
1418  * E.g., in the example above with two compilation units having partial type
1419  * information for structs `A` and `B`, the output of algorithm will emit
1420  * a single copy of each BTF type that describes structs `A`, `B`, and `S`
1421  * (as well as type information for `int` and pointers), as if they were defined
1422  * in a single compilation unit as:
1423  *
1424  * struct A {
1425  *	int a;
1426  *	struct A* self;
1427  *	struct S* parent;
1428  * };
1429  * struct B {
1430  *	int b;
1431  *	struct B* self;
1432  *	struct S* parent;
1433  * };
1434  * struct S {
1435  *	struct A* a_ptr;
1436  *	struct B* b_ptr;
1437  * };
1438  *
1439  * Algorithm summary
1440  * =================
1441  *
1442  * Algorithm completes its work in 6 separate passes:
1443  *
1444  * 1. Strings deduplication.
1445  * 2. Primitive types deduplication (int, enum, fwd).
1446  * 3. Struct/union types deduplication.
1447  * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
1448  *    protos, and const/volatile/restrict modifiers).
1449  * 5. Types compaction.
1450  * 6. Types remapping.
1451  *
1452  * Algorithm determines canonical type descriptor, which is a single
1453  * representative type for each truly unique type. This canonical type is the
1454  * one that will go into final deduplicated BTF type information. For
1455  * struct/unions, it is also the type that algorithm will merge additional type
1456  * information into (while resolving FWDs), as it discovers it from data in
1457  * other CUs. Each input BTF type eventually gets either mapped to itself, if
1458  * that type is canonical, or to some other type, if that type is equivalent
1459  * and was chosen as canonical representative. This mapping is stored in
1460  * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
1461  * FWD type got resolved to.
1462  *
1463  * To facilitate fast discovery of canonical types, we also maintain canonical
1464  * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
1465  * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
1466  * that match that signature. With sufficiently good choice of type signature
1467  * hashing function, we can limit number of canonical types for each unique type
1468  * signature to a very small number, allowing to find canonical type for any
1469  * duplicated type very quickly.
1470  *
1471  * Struct/union deduplication is the most critical part and algorithm for
1472  * deduplicating structs/unions is described in greater details in comments for
1473  * `btf_dedup_is_equiv` function.
1474  */
1475 int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
1476 	       const struct btf_dedup_opts *opts)
1477 {
1478 	struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
1479 	int err;
1480 
1481 	if (IS_ERR(d)) {
1482 		pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
1483 		return -EINVAL;
1484 	}
1485 
1486 	err = btf_dedup_strings(d);
1487 	if (err < 0) {
1488 		pr_debug("btf_dedup_strings failed:%d\n", err);
1489 		goto done;
1490 	}
1491 	err = btf_dedup_prim_types(d);
1492 	if (err < 0) {
1493 		pr_debug("btf_dedup_prim_types failed:%d\n", err);
1494 		goto done;
1495 	}
1496 	err = btf_dedup_struct_types(d);
1497 	if (err < 0) {
1498 		pr_debug("btf_dedup_struct_types failed:%d\n", err);
1499 		goto done;
1500 	}
1501 	err = btf_dedup_ref_types(d);
1502 	if (err < 0) {
1503 		pr_debug("btf_dedup_ref_types failed:%d\n", err);
1504 		goto done;
1505 	}
1506 	err = btf_dedup_compact_types(d);
1507 	if (err < 0) {
1508 		pr_debug("btf_dedup_compact_types failed:%d\n", err);
1509 		goto done;
1510 	}
1511 	err = btf_dedup_remap_types(d);
1512 	if (err < 0) {
1513 		pr_debug("btf_dedup_remap_types failed:%d\n", err);
1514 		goto done;
1515 	}
1516 
1517 done:
1518 	btf_dedup_free(d);
1519 	return err;
1520 }
1521 
1522 #define BTF_UNPROCESSED_ID ((__u32)-1)
1523 #define BTF_IN_PROGRESS_ID ((__u32)-2)
1524 
1525 struct btf_dedup {
1526 	/* .BTF section to be deduped in-place */
1527 	struct btf *btf;
1528 	/*
1529 	 * Optional .BTF.ext section. When provided, any strings referenced
1530 	 * from it will be taken into account when deduping strings
1531 	 */
1532 	struct btf_ext *btf_ext;
1533 	/*
1534 	 * This is a map from any type's signature hash to a list of possible
1535 	 * canonical representative type candidates. Hash collisions are
1536 	 * ignored, so even types of various kinds can share same list of
1537 	 * candidates, which is fine because we rely on subsequent
1538 	 * btf_xxx_equal() checks to authoritatively verify type equality.
1539 	 */
1540 	struct hashmap *dedup_table;
1541 	/* Canonical types map */
1542 	__u32 *map;
1543 	/* Hypothetical mapping, used during type graph equivalence checks */
1544 	__u32 *hypot_map;
1545 	__u32 *hypot_list;
1546 	size_t hypot_cnt;
1547 	size_t hypot_cap;
1548 	/* Various option modifying behavior of algorithm */
1549 	struct btf_dedup_opts opts;
1550 };
1551 
1552 struct btf_str_ptr {
1553 	const char *str;
1554 	__u32 new_off;
1555 	bool used;
1556 };
1557 
1558 struct btf_str_ptrs {
1559 	struct btf_str_ptr *ptrs;
1560 	const char *data;
1561 	__u32 cnt;
1562 	__u32 cap;
1563 };
1564 
1565 static long hash_combine(long h, long value)
1566 {
1567 	return h * 31 + value;
1568 }
1569 
1570 #define for_each_dedup_cand(d, node, hash) \
1571 	hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
1572 
1573 static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
1574 {
1575 	return hashmap__append(d->dedup_table,
1576 			       (void *)hash, (void *)(long)type_id);
1577 }
1578 
1579 static int btf_dedup_hypot_map_add(struct btf_dedup *d,
1580 				   __u32 from_id, __u32 to_id)
1581 {
1582 	if (d->hypot_cnt == d->hypot_cap) {
1583 		__u32 *new_list;
1584 
1585 		d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
1586 		new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
1587 		if (!new_list)
1588 			return -ENOMEM;
1589 		d->hypot_list = new_list;
1590 	}
1591 	d->hypot_list[d->hypot_cnt++] = from_id;
1592 	d->hypot_map[from_id] = to_id;
1593 	return 0;
1594 }
1595 
1596 static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
1597 {
1598 	int i;
1599 
1600 	for (i = 0; i < d->hypot_cnt; i++)
1601 		d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
1602 	d->hypot_cnt = 0;
1603 }
1604 
1605 static void btf_dedup_free(struct btf_dedup *d)
1606 {
1607 	hashmap__free(d->dedup_table);
1608 	d->dedup_table = NULL;
1609 
1610 	free(d->map);
1611 	d->map = NULL;
1612 
1613 	free(d->hypot_map);
1614 	d->hypot_map = NULL;
1615 
1616 	free(d->hypot_list);
1617 	d->hypot_list = NULL;
1618 
1619 	free(d);
1620 }
1621 
1622 static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
1623 {
1624 	return (size_t)key;
1625 }
1626 
1627 static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
1628 {
1629 	return 0;
1630 }
1631 
1632 static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
1633 {
1634 	return k1 == k2;
1635 }
1636 
1637 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1638 				       const struct btf_dedup_opts *opts)
1639 {
1640 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
1641 	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
1642 	int i, err = 0;
1643 
1644 	if (!d)
1645 		return ERR_PTR(-ENOMEM);
1646 
1647 	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
1648 	/* dedup_table_size is now used only to force collisions in tests */
1649 	if (opts && opts->dedup_table_size == 1)
1650 		hash_fn = btf_dedup_collision_hash_fn;
1651 
1652 	d->btf = btf;
1653 	d->btf_ext = btf_ext;
1654 
1655 	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
1656 	if (IS_ERR(d->dedup_table)) {
1657 		err = PTR_ERR(d->dedup_table);
1658 		d->dedup_table = NULL;
1659 		goto done;
1660 	}
1661 
1662 	d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1663 	if (!d->map) {
1664 		err = -ENOMEM;
1665 		goto done;
1666 	}
1667 	/* special BTF "void" type is made canonical immediately */
1668 	d->map[0] = 0;
1669 	for (i = 1; i <= btf->nr_types; i++) {
1670 		struct btf_type *t = btf_type_by_id(d->btf, i);
1671 
1672 		/* VAR and DATASEC are never deduped and are self-canonical */
1673 		if (btf_is_var(t) || btf_is_datasec(t))
1674 			d->map[i] = i;
1675 		else
1676 			d->map[i] = BTF_UNPROCESSED_ID;
1677 	}
1678 
1679 	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1680 	if (!d->hypot_map) {
1681 		err = -ENOMEM;
1682 		goto done;
1683 	}
1684 	for (i = 0; i <= btf->nr_types; i++)
1685 		d->hypot_map[i] = BTF_UNPROCESSED_ID;
1686 
1687 done:
1688 	if (err) {
1689 		btf_dedup_free(d);
1690 		return ERR_PTR(err);
1691 	}
1692 
1693 	return d;
1694 }
1695 
1696 typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
1697 
1698 /*
1699  * Iterate over all possible places in .BTF and .BTF.ext that can reference
1700  * string and pass pointer to it to a provided callback `fn`.
1701  */
1702 static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
1703 {
1704 	void *line_data_cur, *line_data_end;
1705 	int i, j, r, rec_size;
1706 	struct btf_type *t;
1707 
1708 	for (i = 1; i <= d->btf->nr_types; i++) {
1709 		t = btf_type_by_id(d->btf, i);
1710 		r = fn(&t->name_off, ctx);
1711 		if (r)
1712 			return r;
1713 
1714 		switch (btf_kind(t)) {
1715 		case BTF_KIND_STRUCT:
1716 		case BTF_KIND_UNION: {
1717 			struct btf_member *m = btf_members(t);
1718 			__u16 vlen = btf_vlen(t);
1719 
1720 			for (j = 0; j < vlen; j++) {
1721 				r = fn(&m->name_off, ctx);
1722 				if (r)
1723 					return r;
1724 				m++;
1725 			}
1726 			break;
1727 		}
1728 		case BTF_KIND_ENUM: {
1729 			struct btf_enum *m = btf_enum(t);
1730 			__u16 vlen = btf_vlen(t);
1731 
1732 			for (j = 0; j < vlen; j++) {
1733 				r = fn(&m->name_off, ctx);
1734 				if (r)
1735 					return r;
1736 				m++;
1737 			}
1738 			break;
1739 		}
1740 		case BTF_KIND_FUNC_PROTO: {
1741 			struct btf_param *m = btf_params(t);
1742 			__u16 vlen = btf_vlen(t);
1743 
1744 			for (j = 0; j < vlen; j++) {
1745 				r = fn(&m->name_off, ctx);
1746 				if (r)
1747 					return r;
1748 				m++;
1749 			}
1750 			break;
1751 		}
1752 		default:
1753 			break;
1754 		}
1755 	}
1756 
1757 	if (!d->btf_ext)
1758 		return 0;
1759 
1760 	line_data_cur = d->btf_ext->line_info.info;
1761 	line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
1762 	rec_size = d->btf_ext->line_info.rec_size;
1763 
1764 	while (line_data_cur < line_data_end) {
1765 		struct btf_ext_info_sec *sec = line_data_cur;
1766 		struct bpf_line_info_min *line_info;
1767 		__u32 num_info = sec->num_info;
1768 
1769 		r = fn(&sec->sec_name_off, ctx);
1770 		if (r)
1771 			return r;
1772 
1773 		line_data_cur += sizeof(struct btf_ext_info_sec);
1774 		for (i = 0; i < num_info; i++) {
1775 			line_info = line_data_cur;
1776 			r = fn(&line_info->file_name_off, ctx);
1777 			if (r)
1778 				return r;
1779 			r = fn(&line_info->line_off, ctx);
1780 			if (r)
1781 				return r;
1782 			line_data_cur += rec_size;
1783 		}
1784 	}
1785 
1786 	return 0;
1787 }
1788 
1789 static int str_sort_by_content(const void *a1, const void *a2)
1790 {
1791 	const struct btf_str_ptr *p1 = a1;
1792 	const struct btf_str_ptr *p2 = a2;
1793 
1794 	return strcmp(p1->str, p2->str);
1795 }
1796 
1797 static int str_sort_by_offset(const void *a1, const void *a2)
1798 {
1799 	const struct btf_str_ptr *p1 = a1;
1800 	const struct btf_str_ptr *p2 = a2;
1801 
1802 	if (p1->str != p2->str)
1803 		return p1->str < p2->str ? -1 : 1;
1804 	return 0;
1805 }
1806 
1807 static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
1808 {
1809 	const struct btf_str_ptr *p = pelem;
1810 
1811 	if (str_ptr != p->str)
1812 		return (const char *)str_ptr < p->str ? -1 : 1;
1813 	return 0;
1814 }
1815 
1816 static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
1817 {
1818 	struct btf_str_ptrs *strs;
1819 	struct btf_str_ptr *s;
1820 
1821 	if (*str_off_ptr == 0)
1822 		return 0;
1823 
1824 	strs = ctx;
1825 	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1826 		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1827 	if (!s)
1828 		return -EINVAL;
1829 	s->used = true;
1830 	return 0;
1831 }
1832 
1833 static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
1834 {
1835 	struct btf_str_ptrs *strs;
1836 	struct btf_str_ptr *s;
1837 
1838 	if (*str_off_ptr == 0)
1839 		return 0;
1840 
1841 	strs = ctx;
1842 	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1843 		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1844 	if (!s)
1845 		return -EINVAL;
1846 	*str_off_ptr = s->new_off;
1847 	return 0;
1848 }
1849 
1850 /*
1851  * Dedup string and filter out those that are not referenced from either .BTF
1852  * or .BTF.ext (if provided) sections.
1853  *
1854  * This is done by building index of all strings in BTF's string section,
1855  * then iterating over all entities that can reference strings (e.g., type
1856  * names, struct field names, .BTF.ext line info, etc) and marking corresponding
1857  * strings as used. After that all used strings are deduped and compacted into
1858  * sequential blob of memory and new offsets are calculated. Then all the string
1859  * references are iterated again and rewritten using new offsets.
1860  */
1861 static int btf_dedup_strings(struct btf_dedup *d)
1862 {
1863 	const struct btf_header *hdr = d->btf->hdr;
1864 	char *start = (char *)d->btf->nohdr_data + hdr->str_off;
1865 	char *end = start + d->btf->hdr->str_len;
1866 	char *p = start, *tmp_strs = NULL;
1867 	struct btf_str_ptrs strs = {
1868 		.cnt = 0,
1869 		.cap = 0,
1870 		.ptrs = NULL,
1871 		.data = start,
1872 	};
1873 	int i, j, err = 0, grp_idx;
1874 	bool grp_used;
1875 
1876 	/* build index of all strings */
1877 	while (p < end) {
1878 		if (strs.cnt + 1 > strs.cap) {
1879 			struct btf_str_ptr *new_ptrs;
1880 
1881 			strs.cap += max(strs.cnt / 2, 16U);
1882 			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
1883 			if (!new_ptrs) {
1884 				err = -ENOMEM;
1885 				goto done;
1886 			}
1887 			strs.ptrs = new_ptrs;
1888 		}
1889 
1890 		strs.ptrs[strs.cnt].str = p;
1891 		strs.ptrs[strs.cnt].used = false;
1892 
1893 		p += strlen(p) + 1;
1894 		strs.cnt++;
1895 	}
1896 
1897 	/* temporary storage for deduplicated strings */
1898 	tmp_strs = malloc(d->btf->hdr->str_len);
1899 	if (!tmp_strs) {
1900 		err = -ENOMEM;
1901 		goto done;
1902 	}
1903 
1904 	/* mark all used strings */
1905 	strs.ptrs[0].used = true;
1906 	err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
1907 	if (err)
1908 		goto done;
1909 
1910 	/* sort strings by context, so that we can identify duplicates */
1911 	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
1912 
1913 	/*
1914 	 * iterate groups of equal strings and if any instance in a group was
1915 	 * referenced, emit single instance and remember new offset
1916 	 */
1917 	p = tmp_strs;
1918 	grp_idx = 0;
1919 	grp_used = strs.ptrs[0].used;
1920 	/* iterate past end to avoid code duplication after loop */
1921 	for (i = 1; i <= strs.cnt; i++) {
1922 		/*
1923 		 * when i == strs.cnt, we want to skip string comparison and go
1924 		 * straight to handling last group of strings (otherwise we'd
1925 		 * need to handle last group after the loop w/ duplicated code)
1926 		 */
1927 		if (i < strs.cnt &&
1928 		    !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
1929 			grp_used = grp_used || strs.ptrs[i].used;
1930 			continue;
1931 		}
1932 
1933 		/*
1934 		 * this check would have been required after the loop to handle
1935 		 * last group of strings, but due to <= condition in a loop
1936 		 * we avoid that duplication
1937 		 */
1938 		if (grp_used) {
1939 			int new_off = p - tmp_strs;
1940 			__u32 len = strlen(strs.ptrs[grp_idx].str);
1941 
1942 			memmove(p, strs.ptrs[grp_idx].str, len + 1);
1943 			for (j = grp_idx; j < i; j++)
1944 				strs.ptrs[j].new_off = new_off;
1945 			p += len + 1;
1946 		}
1947 
1948 		if (i < strs.cnt) {
1949 			grp_idx = i;
1950 			grp_used = strs.ptrs[i].used;
1951 		}
1952 	}
1953 
1954 	/* replace original strings with deduped ones */
1955 	d->btf->hdr->str_len = p - tmp_strs;
1956 	memmove(start, tmp_strs, d->btf->hdr->str_len);
1957 	end = start + d->btf->hdr->str_len;
1958 
1959 	/* restore original order for further binary search lookups */
1960 	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
1961 
1962 	/* remap string offsets */
1963 	err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
1964 	if (err)
1965 		goto done;
1966 
1967 	d->btf->hdr->str_len = end - start;
1968 
1969 done:
1970 	free(tmp_strs);
1971 	free(strs.ptrs);
1972 	return err;
1973 }
1974 
1975 static long btf_hash_common(struct btf_type *t)
1976 {
1977 	long h;
1978 
1979 	h = hash_combine(0, t->name_off);
1980 	h = hash_combine(h, t->info);
1981 	h = hash_combine(h, t->size);
1982 	return h;
1983 }
1984 
1985 static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
1986 {
1987 	return t1->name_off == t2->name_off &&
1988 	       t1->info == t2->info &&
1989 	       t1->size == t2->size;
1990 }
1991 
1992 /* Calculate type signature hash of INT. */
1993 static long btf_hash_int(struct btf_type *t)
1994 {
1995 	__u32 info = *(__u32 *)(t + 1);
1996 	long h;
1997 
1998 	h = btf_hash_common(t);
1999 	h = hash_combine(h, info);
2000 	return h;
2001 }
2002 
2003 /* Check structural equality of two INTs. */
2004 static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
2005 {
2006 	__u32 info1, info2;
2007 
2008 	if (!btf_equal_common(t1, t2))
2009 		return false;
2010 	info1 = *(__u32 *)(t1 + 1);
2011 	info2 = *(__u32 *)(t2 + 1);
2012 	return info1 == info2;
2013 }
2014 
2015 /* Calculate type signature hash of ENUM. */
2016 static long btf_hash_enum(struct btf_type *t)
2017 {
2018 	long h;
2019 
2020 	/* don't hash vlen and enum members to support enum fwd resolving */
2021 	h = hash_combine(0, t->name_off);
2022 	h = hash_combine(h, t->info & ~0xffff);
2023 	h = hash_combine(h, t->size);
2024 	return h;
2025 }
2026 
2027 /* Check structural equality of two ENUMs. */
2028 static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
2029 {
2030 	const struct btf_enum *m1, *m2;
2031 	__u16 vlen;
2032 	int i;
2033 
2034 	if (!btf_equal_common(t1, t2))
2035 		return false;
2036 
2037 	vlen = btf_vlen(t1);
2038 	m1 = btf_enum(t1);
2039 	m2 = btf_enum(t2);
2040 	for (i = 0; i < vlen; i++) {
2041 		if (m1->name_off != m2->name_off || m1->val != m2->val)
2042 			return false;
2043 		m1++;
2044 		m2++;
2045 	}
2046 	return true;
2047 }
2048 
2049 static inline bool btf_is_enum_fwd(struct btf_type *t)
2050 {
2051 	return btf_is_enum(t) && btf_vlen(t) == 0;
2052 }
2053 
2054 static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
2055 {
2056 	if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
2057 		return btf_equal_enum(t1, t2);
2058 	/* ignore vlen when comparing */
2059 	return t1->name_off == t2->name_off &&
2060 	       (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
2061 	       t1->size == t2->size;
2062 }
2063 
2064 /*
2065  * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
2066  * as referenced type IDs equivalence is established separately during type
2067  * graph equivalence check algorithm.
2068  */
2069 static long btf_hash_struct(struct btf_type *t)
2070 {
2071 	const struct btf_member *member = btf_members(t);
2072 	__u32 vlen = btf_vlen(t);
2073 	long h = btf_hash_common(t);
2074 	int i;
2075 
2076 	for (i = 0; i < vlen; i++) {
2077 		h = hash_combine(h, member->name_off);
2078 		h = hash_combine(h, member->offset);
2079 		/* no hashing of referenced type ID, it can be unresolved yet */
2080 		member++;
2081 	}
2082 	return h;
2083 }
2084 
2085 /*
2086  * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
2087  * IDs. This check is performed during type graph equivalence check and
2088  * referenced types equivalence is checked separately.
2089  */
2090 static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
2091 {
2092 	const struct btf_member *m1, *m2;
2093 	__u16 vlen;
2094 	int i;
2095 
2096 	if (!btf_equal_common(t1, t2))
2097 		return false;
2098 
2099 	vlen = btf_vlen(t1);
2100 	m1 = btf_members(t1);
2101 	m2 = btf_members(t2);
2102 	for (i = 0; i < vlen; i++) {
2103 		if (m1->name_off != m2->name_off || m1->offset != m2->offset)
2104 			return false;
2105 		m1++;
2106 		m2++;
2107 	}
2108 	return true;
2109 }
2110 
2111 /*
2112  * Calculate type signature hash of ARRAY, including referenced type IDs,
2113  * under assumption that they were already resolved to canonical type IDs and
2114  * are not going to change.
2115  */
2116 static long btf_hash_array(struct btf_type *t)
2117 {
2118 	const struct btf_array *info = btf_array(t);
2119 	long h = btf_hash_common(t);
2120 
2121 	h = hash_combine(h, info->type);
2122 	h = hash_combine(h, info->index_type);
2123 	h = hash_combine(h, info->nelems);
2124 	return h;
2125 }
2126 
2127 /*
2128  * Check exact equality of two ARRAYs, taking into account referenced
2129  * type IDs, under assumption that they were already resolved to canonical
2130  * type IDs and are not going to change.
2131  * This function is called during reference types deduplication to compare
2132  * ARRAY to potential canonical representative.
2133  */
2134 static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
2135 {
2136 	const struct btf_array *info1, *info2;
2137 
2138 	if (!btf_equal_common(t1, t2))
2139 		return false;
2140 
2141 	info1 = btf_array(t1);
2142 	info2 = btf_array(t2);
2143 	return info1->type == info2->type &&
2144 	       info1->index_type == info2->index_type &&
2145 	       info1->nelems == info2->nelems;
2146 }
2147 
2148 /*
2149  * Check structural compatibility of two ARRAYs, ignoring referenced type
2150  * IDs. This check is performed during type graph equivalence check and
2151  * referenced types equivalence is checked separately.
2152  */
2153 static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
2154 {
2155 	if (!btf_equal_common(t1, t2))
2156 		return false;
2157 
2158 	return btf_array(t1)->nelems == btf_array(t2)->nelems;
2159 }
2160 
2161 /*
2162  * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
2163  * under assumption that they were already resolved to canonical type IDs and
2164  * are not going to change.
2165  */
2166 static long btf_hash_fnproto(struct btf_type *t)
2167 {
2168 	const struct btf_param *member = btf_params(t);
2169 	__u16 vlen = btf_vlen(t);
2170 	long h = btf_hash_common(t);
2171 	int i;
2172 
2173 	for (i = 0; i < vlen; i++) {
2174 		h = hash_combine(h, member->name_off);
2175 		h = hash_combine(h, member->type);
2176 		member++;
2177 	}
2178 	return h;
2179 }
2180 
2181 /*
2182  * Check exact equality of two FUNC_PROTOs, taking into account referenced
2183  * type IDs, under assumption that they were already resolved to canonical
2184  * type IDs and are not going to change.
2185  * This function is called during reference types deduplication to compare
2186  * FUNC_PROTO to potential canonical representative.
2187  */
2188 static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
2189 {
2190 	const struct btf_param *m1, *m2;
2191 	__u16 vlen;
2192 	int i;
2193 
2194 	if (!btf_equal_common(t1, t2))
2195 		return false;
2196 
2197 	vlen = btf_vlen(t1);
2198 	m1 = btf_params(t1);
2199 	m2 = btf_params(t2);
2200 	for (i = 0; i < vlen; i++) {
2201 		if (m1->name_off != m2->name_off || m1->type != m2->type)
2202 			return false;
2203 		m1++;
2204 		m2++;
2205 	}
2206 	return true;
2207 }
2208 
2209 /*
2210  * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
2211  * IDs. This check is performed during type graph equivalence check and
2212  * referenced types equivalence is checked separately.
2213  */
2214 static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
2215 {
2216 	const struct btf_param *m1, *m2;
2217 	__u16 vlen;
2218 	int i;
2219 
2220 	/* skip return type ID */
2221 	if (t1->name_off != t2->name_off || t1->info != t2->info)
2222 		return false;
2223 
2224 	vlen = btf_vlen(t1);
2225 	m1 = btf_params(t1);
2226 	m2 = btf_params(t2);
2227 	for (i = 0; i < vlen; i++) {
2228 		if (m1->name_off != m2->name_off)
2229 			return false;
2230 		m1++;
2231 		m2++;
2232 	}
2233 	return true;
2234 }
2235 
2236 /*
2237  * Deduplicate primitive types, that can't reference other types, by calculating
2238  * their type signature hash and comparing them with any possible canonical
2239  * candidate. If no canonical candidate matches, type itself is marked as
2240  * canonical and is added into `btf_dedup->dedup_table` as another candidate.
2241  */
2242 static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
2243 {
2244 	struct btf_type *t = btf_type_by_id(d->btf, type_id);
2245 	struct hashmap_entry *hash_entry;
2246 	struct btf_type *cand;
2247 	/* if we don't find equivalent type, then we are canonical */
2248 	__u32 new_id = type_id;
2249 	__u32 cand_id;
2250 	long h;
2251 
2252 	switch (btf_kind(t)) {
2253 	case BTF_KIND_CONST:
2254 	case BTF_KIND_VOLATILE:
2255 	case BTF_KIND_RESTRICT:
2256 	case BTF_KIND_PTR:
2257 	case BTF_KIND_TYPEDEF:
2258 	case BTF_KIND_ARRAY:
2259 	case BTF_KIND_STRUCT:
2260 	case BTF_KIND_UNION:
2261 	case BTF_KIND_FUNC:
2262 	case BTF_KIND_FUNC_PROTO:
2263 	case BTF_KIND_VAR:
2264 	case BTF_KIND_DATASEC:
2265 		return 0;
2266 
2267 	case BTF_KIND_INT:
2268 		h = btf_hash_int(t);
2269 		for_each_dedup_cand(d, hash_entry, h) {
2270 			cand_id = (__u32)(long)hash_entry->value;
2271 			cand = btf_type_by_id(d->btf, cand_id);
2272 			if (btf_equal_int(t, cand)) {
2273 				new_id = cand_id;
2274 				break;
2275 			}
2276 		}
2277 		break;
2278 
2279 	case BTF_KIND_ENUM:
2280 		h = btf_hash_enum(t);
2281 		for_each_dedup_cand(d, hash_entry, h) {
2282 			cand_id = (__u32)(long)hash_entry->value;
2283 			cand = btf_type_by_id(d->btf, cand_id);
2284 			if (btf_equal_enum(t, cand)) {
2285 				new_id = cand_id;
2286 				break;
2287 			}
2288 			if (d->opts.dont_resolve_fwds)
2289 				continue;
2290 			if (btf_compat_enum(t, cand)) {
2291 				if (btf_is_enum_fwd(t)) {
2292 					/* resolve fwd to full enum */
2293 					new_id = cand_id;
2294 					break;
2295 				}
2296 				/* resolve canonical enum fwd to full enum */
2297 				d->map[cand_id] = type_id;
2298 			}
2299 		}
2300 		break;
2301 
2302 	case BTF_KIND_FWD:
2303 		h = btf_hash_common(t);
2304 		for_each_dedup_cand(d, hash_entry, h) {
2305 			cand_id = (__u32)(long)hash_entry->value;
2306 			cand = btf_type_by_id(d->btf, cand_id);
2307 			if (btf_equal_common(t, cand)) {
2308 				new_id = cand_id;
2309 				break;
2310 			}
2311 		}
2312 		break;
2313 
2314 	default:
2315 		return -EINVAL;
2316 	}
2317 
2318 	d->map[type_id] = new_id;
2319 	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2320 		return -ENOMEM;
2321 
2322 	return 0;
2323 }
2324 
2325 static int btf_dedup_prim_types(struct btf_dedup *d)
2326 {
2327 	int i, err;
2328 
2329 	for (i = 1; i <= d->btf->nr_types; i++) {
2330 		err = btf_dedup_prim_type(d, i);
2331 		if (err)
2332 			return err;
2333 	}
2334 	return 0;
2335 }
2336 
2337 /*
2338  * Check whether type is already mapped into canonical one (could be to itself).
2339  */
2340 static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
2341 {
2342 	return d->map[type_id] <= BTF_MAX_NR_TYPES;
2343 }
2344 
2345 /*
2346  * Resolve type ID into its canonical type ID, if any; otherwise return original
2347  * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
2348  * STRUCT/UNION link and resolve it into canonical type ID as well.
2349  */
2350 static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
2351 {
2352 	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
2353 		type_id = d->map[type_id];
2354 	return type_id;
2355 }
2356 
2357 /*
2358  * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
2359  * type ID.
2360  */
2361 static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
2362 {
2363 	__u32 orig_type_id = type_id;
2364 
2365 	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
2366 		return type_id;
2367 
2368 	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
2369 		type_id = d->map[type_id];
2370 
2371 	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
2372 		return type_id;
2373 
2374 	return orig_type_id;
2375 }
2376 
2377 
2378 static inline __u16 btf_fwd_kind(struct btf_type *t)
2379 {
2380 	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
2381 }
2382 
2383 /*
2384  * Check equivalence of BTF type graph formed by candidate struct/union (we'll
2385  * call it "candidate graph" in this description for brevity) to a type graph
2386  * formed by (potential) canonical struct/union ("canonical graph" for brevity
2387  * here, though keep in mind that not all types in canonical graph are
2388  * necessarily canonical representatives themselves, some of them might be
2389  * duplicates or its uniqueness might not have been established yet).
2390  * Returns:
2391  *  - >0, if type graphs are equivalent;
2392  *  -  0, if not equivalent;
2393  *  - <0, on error.
2394  *
2395  * Algorithm performs side-by-side DFS traversal of both type graphs and checks
2396  * equivalence of BTF types at each step. If at any point BTF types in candidate
2397  * and canonical graphs are not compatible structurally, whole graphs are
2398  * incompatible. If types are structurally equivalent (i.e., all information
2399  * except referenced type IDs is exactly the same), a mapping from `canon_id` to
2400  * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
2401  * If a type references other types, then those referenced types are checked
2402  * for equivalence recursively.
2403  *
2404  * During DFS traversal, if we find that for current `canon_id` type we
2405  * already have some mapping in hypothetical map, we check for two possible
2406  * situations:
2407  *   - `canon_id` is mapped to exactly the same type as `cand_id`. This will
2408  *     happen when type graphs have cycles. In this case we assume those two
2409  *     types are equivalent.
2410  *   - `canon_id` is mapped to different type. This is contradiction in our
2411  *     hypothetical mapping, because same graph in canonical graph corresponds
2412  *     to two different types in candidate graph, which for equivalent type
2413  *     graphs shouldn't happen. This condition terminates equivalence check
2414  *     with negative result.
2415  *
2416  * If type graphs traversal exhausts types to check and find no contradiction,
2417  * then type graphs are equivalent.
2418  *
2419  * When checking types for equivalence, there is one special case: FWD types.
2420  * If FWD type resolution is allowed and one of the types (either from canonical
2421  * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
2422  * flag) and their names match, hypothetical mapping is updated to point from
2423  * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
2424  * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
2425  *
2426  * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
2427  * if there are two exactly named (or anonymous) structs/unions that are
2428  * compatible structurally, one of which has FWD field, while other is concrete
2429  * STRUCT/UNION, but according to C sources they are different structs/unions
2430  * that are referencing different types with the same name. This is extremely
2431  * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
2432  * this logic is causing problems.
2433  *
2434  * Doing FWD resolution means that both candidate and/or canonical graphs can
2435  * consists of portions of the graph that come from multiple compilation units.
2436  * This is due to the fact that types within single compilation unit are always
2437  * deduplicated and FWDs are already resolved, if referenced struct/union
2438  * definiton is available. So, if we had unresolved FWD and found corresponding
2439  * STRUCT/UNION, they will be from different compilation units. This
2440  * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
2441  * type graph will likely have at least two different BTF types that describe
2442  * same type (e.g., most probably there will be two different BTF types for the
2443  * same 'int' primitive type) and could even have "overlapping" parts of type
2444  * graph that describe same subset of types.
2445  *
2446  * This in turn means that our assumption that each type in canonical graph
2447  * must correspond to exactly one type in candidate graph might not hold
2448  * anymore and will make it harder to detect contradictions using hypothetical
2449  * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
2450  * resolution only in canonical graph. FWDs in candidate graphs are never
2451  * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
2452  * that can occur:
2453  *   - Both types in canonical and candidate graphs are FWDs. If they are
2454  *     structurally equivalent, then they can either be both resolved to the
2455  *     same STRUCT/UNION or not resolved at all. In both cases they are
2456  *     equivalent and there is no need to resolve FWD on candidate side.
2457  *   - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
2458  *     so nothing to resolve as well, algorithm will check equivalence anyway.
2459  *   - Type in canonical graph is FWD, while type in candidate is concrete
2460  *     STRUCT/UNION. In this case candidate graph comes from single compilation
2461  *     unit, so there is exactly one BTF type for each unique C type. After
2462  *     resolving FWD into STRUCT/UNION, there might be more than one BTF type
2463  *     in canonical graph mapping to single BTF type in candidate graph, but
2464  *     because hypothetical mapping maps from canonical to candidate types, it's
2465  *     alright, and we still maintain the property of having single `canon_id`
2466  *     mapping to single `cand_id` (there could be two different `canon_id`
2467  *     mapped to the same `cand_id`, but it's not contradictory).
2468  *   - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
2469  *     graph is FWD. In this case we are just going to check compatibility of
2470  *     STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
2471  *     assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
2472  *     a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
2473  *     turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
2474  *     canonical graph.
2475  */
2476 static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
2477 			      __u32 canon_id)
2478 {
2479 	struct btf_type *cand_type;
2480 	struct btf_type *canon_type;
2481 	__u32 hypot_type_id;
2482 	__u16 cand_kind;
2483 	__u16 canon_kind;
2484 	int i, eq;
2485 
2486 	/* if both resolve to the same canonical, they must be equivalent */
2487 	if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
2488 		return 1;
2489 
2490 	canon_id = resolve_fwd_id(d, canon_id);
2491 
2492 	hypot_type_id = d->hypot_map[canon_id];
2493 	if (hypot_type_id <= BTF_MAX_NR_TYPES)
2494 		return hypot_type_id == cand_id;
2495 
2496 	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
2497 		return -ENOMEM;
2498 
2499 	cand_type = btf_type_by_id(d->btf, cand_id);
2500 	canon_type = btf_type_by_id(d->btf, canon_id);
2501 	cand_kind = btf_kind(cand_type);
2502 	canon_kind = btf_kind(canon_type);
2503 
2504 	if (cand_type->name_off != canon_type->name_off)
2505 		return 0;
2506 
2507 	/* FWD <--> STRUCT/UNION equivalence check, if enabled */
2508 	if (!d->opts.dont_resolve_fwds
2509 	    && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
2510 	    && cand_kind != canon_kind) {
2511 		__u16 real_kind;
2512 		__u16 fwd_kind;
2513 
2514 		if (cand_kind == BTF_KIND_FWD) {
2515 			real_kind = canon_kind;
2516 			fwd_kind = btf_fwd_kind(cand_type);
2517 		} else {
2518 			real_kind = cand_kind;
2519 			fwd_kind = btf_fwd_kind(canon_type);
2520 		}
2521 		return fwd_kind == real_kind;
2522 	}
2523 
2524 	if (cand_kind != canon_kind)
2525 		return 0;
2526 
2527 	switch (cand_kind) {
2528 	case BTF_KIND_INT:
2529 		return btf_equal_int(cand_type, canon_type);
2530 
2531 	case BTF_KIND_ENUM:
2532 		if (d->opts.dont_resolve_fwds)
2533 			return btf_equal_enum(cand_type, canon_type);
2534 		else
2535 			return btf_compat_enum(cand_type, canon_type);
2536 
2537 	case BTF_KIND_FWD:
2538 		return btf_equal_common(cand_type, canon_type);
2539 
2540 	case BTF_KIND_CONST:
2541 	case BTF_KIND_VOLATILE:
2542 	case BTF_KIND_RESTRICT:
2543 	case BTF_KIND_PTR:
2544 	case BTF_KIND_TYPEDEF:
2545 	case BTF_KIND_FUNC:
2546 		if (cand_type->info != canon_type->info)
2547 			return 0;
2548 		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2549 
2550 	case BTF_KIND_ARRAY: {
2551 		const struct btf_array *cand_arr, *canon_arr;
2552 
2553 		if (!btf_compat_array(cand_type, canon_type))
2554 			return 0;
2555 		cand_arr = btf_array(cand_type);
2556 		canon_arr = btf_array(canon_type);
2557 		eq = btf_dedup_is_equiv(d,
2558 			cand_arr->index_type, canon_arr->index_type);
2559 		if (eq <= 0)
2560 			return eq;
2561 		return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
2562 	}
2563 
2564 	case BTF_KIND_STRUCT:
2565 	case BTF_KIND_UNION: {
2566 		const struct btf_member *cand_m, *canon_m;
2567 		__u16 vlen;
2568 
2569 		if (!btf_shallow_equal_struct(cand_type, canon_type))
2570 			return 0;
2571 		vlen = btf_vlen(cand_type);
2572 		cand_m = btf_members(cand_type);
2573 		canon_m = btf_members(canon_type);
2574 		for (i = 0; i < vlen; i++) {
2575 			eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
2576 			if (eq <= 0)
2577 				return eq;
2578 			cand_m++;
2579 			canon_m++;
2580 		}
2581 
2582 		return 1;
2583 	}
2584 
2585 	case BTF_KIND_FUNC_PROTO: {
2586 		const struct btf_param *cand_p, *canon_p;
2587 		__u16 vlen;
2588 
2589 		if (!btf_compat_fnproto(cand_type, canon_type))
2590 			return 0;
2591 		eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2592 		if (eq <= 0)
2593 			return eq;
2594 		vlen = btf_vlen(cand_type);
2595 		cand_p = btf_params(cand_type);
2596 		canon_p = btf_params(canon_type);
2597 		for (i = 0; i < vlen; i++) {
2598 			eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
2599 			if (eq <= 0)
2600 				return eq;
2601 			cand_p++;
2602 			canon_p++;
2603 		}
2604 		return 1;
2605 	}
2606 
2607 	default:
2608 		return -EINVAL;
2609 	}
2610 	return 0;
2611 }
2612 
2613 /*
2614  * Use hypothetical mapping, produced by successful type graph equivalence
2615  * check, to augment existing struct/union canonical mapping, where possible.
2616  *
2617  * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
2618  * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
2619  * it doesn't matter if FWD type was part of canonical graph or candidate one,
2620  * we are recording the mapping anyway. As opposed to carefulness required
2621  * for struct/union correspondence mapping (described below), for FWD resolution
2622  * it's not important, as by the time that FWD type (reference type) will be
2623  * deduplicated all structs/unions will be deduped already anyway.
2624  *
2625  * Recording STRUCT/UNION mapping is purely a performance optimization and is
2626  * not required for correctness. It needs to be done carefully to ensure that
2627  * struct/union from candidate's type graph is not mapped into corresponding
2628  * struct/union from canonical type graph that itself hasn't been resolved into
2629  * canonical representative. The only guarantee we have is that canonical
2630  * struct/union was determined as canonical and that won't change. But any
2631  * types referenced through that struct/union fields could have been not yet
2632  * resolved, so in case like that it's too early to establish any kind of
2633  * correspondence between structs/unions.
2634  *
2635  * No canonical correspondence is derived for primitive types (they are already
2636  * deduplicated completely already anyway) or reference types (they rely on
2637  * stability of struct/union canonical relationship for equivalence checks).
2638  */
2639 static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
2640 {
2641 	__u32 cand_type_id, targ_type_id;
2642 	__u16 t_kind, c_kind;
2643 	__u32 t_id, c_id;
2644 	int i;
2645 
2646 	for (i = 0; i < d->hypot_cnt; i++) {
2647 		cand_type_id = d->hypot_list[i];
2648 		targ_type_id = d->hypot_map[cand_type_id];
2649 		t_id = resolve_type_id(d, targ_type_id);
2650 		c_id = resolve_type_id(d, cand_type_id);
2651 		t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
2652 		c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
2653 		/*
2654 		 * Resolve FWD into STRUCT/UNION.
2655 		 * It's ok to resolve FWD into STRUCT/UNION that's not yet
2656 		 * mapped to canonical representative (as opposed to
2657 		 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
2658 		 * eventually that struct is going to be mapped and all resolved
2659 		 * FWDs will automatically resolve to correct canonical
2660 		 * representative. This will happen before ref type deduping,
2661 		 * which critically depends on stability of these mapping. This
2662 		 * stability is not a requirement for STRUCT/UNION equivalence
2663 		 * checks, though.
2664 		 */
2665 		if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
2666 			d->map[c_id] = t_id;
2667 		else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
2668 			d->map[t_id] = c_id;
2669 
2670 		if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
2671 		    c_kind != BTF_KIND_FWD &&
2672 		    is_type_mapped(d, c_id) &&
2673 		    !is_type_mapped(d, t_id)) {
2674 			/*
2675 			 * as a perf optimization, we can map struct/union
2676 			 * that's part of type graph we just verified for
2677 			 * equivalence. We can do that for struct/union that has
2678 			 * canonical representative only, though.
2679 			 */
2680 			d->map[t_id] = c_id;
2681 		}
2682 	}
2683 }
2684 
2685 /*
2686  * Deduplicate struct/union types.
2687  *
2688  * For each struct/union type its type signature hash is calculated, taking
2689  * into account type's name, size, number, order and names of fields, but
2690  * ignoring type ID's referenced from fields, because they might not be deduped
2691  * completely until after reference types deduplication phase. This type hash
2692  * is used to iterate over all potential canonical types, sharing same hash.
2693  * For each canonical candidate we check whether type graphs that they form
2694  * (through referenced types in fields and so on) are equivalent using algorithm
2695  * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
2696  * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
2697  * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
2698  * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
2699  * potentially map other structs/unions to their canonical representatives,
2700  * if such relationship hasn't yet been established. This speeds up algorithm
2701  * by eliminating some of the duplicate work.
2702  *
2703  * If no matching canonical representative was found, struct/union is marked
2704  * as canonical for itself and is added into btf_dedup->dedup_table hash map
2705  * for further look ups.
2706  */
2707 static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2708 {
2709 	struct btf_type *cand_type, *t;
2710 	struct hashmap_entry *hash_entry;
2711 	/* if we don't find equivalent type, then we are canonical */
2712 	__u32 new_id = type_id;
2713 	__u16 kind;
2714 	long h;
2715 
2716 	/* already deduped or is in process of deduping (loop detected) */
2717 	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
2718 		return 0;
2719 
2720 	t = btf_type_by_id(d->btf, type_id);
2721 	kind = btf_kind(t);
2722 
2723 	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
2724 		return 0;
2725 
2726 	h = btf_hash_struct(t);
2727 	for_each_dedup_cand(d, hash_entry, h) {
2728 		__u32 cand_id = (__u32)(long)hash_entry->value;
2729 		int eq;
2730 
2731 		/*
2732 		 * Even though btf_dedup_is_equiv() checks for
2733 		 * btf_shallow_equal_struct() internally when checking two
2734 		 * structs (unions) for equivalence, we need to guard here
2735 		 * from picking matching FWD type as a dedup candidate.
2736 		 * This can happen due to hash collision. In such case just
2737 		 * relying on btf_dedup_is_equiv() would lead to potentially
2738 		 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
2739 		 * FWD and compatible STRUCT/UNION are considered equivalent.
2740 		 */
2741 		cand_type = btf_type_by_id(d->btf, cand_id);
2742 		if (!btf_shallow_equal_struct(t, cand_type))
2743 			continue;
2744 
2745 		btf_dedup_clear_hypot_map(d);
2746 		eq = btf_dedup_is_equiv(d, type_id, cand_id);
2747 		if (eq < 0)
2748 			return eq;
2749 		if (!eq)
2750 			continue;
2751 		new_id = cand_id;
2752 		btf_dedup_merge_hypot_map(d);
2753 		break;
2754 	}
2755 
2756 	d->map[type_id] = new_id;
2757 	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2758 		return -ENOMEM;
2759 
2760 	return 0;
2761 }
2762 
2763 static int btf_dedup_struct_types(struct btf_dedup *d)
2764 {
2765 	int i, err;
2766 
2767 	for (i = 1; i <= d->btf->nr_types; i++) {
2768 		err = btf_dedup_struct_type(d, i);
2769 		if (err)
2770 			return err;
2771 	}
2772 	return 0;
2773 }
2774 
2775 /*
2776  * Deduplicate reference type.
2777  *
2778  * Once all primitive and struct/union types got deduplicated, we can easily
2779  * deduplicate all other (reference) BTF types. This is done in two steps:
2780  *
2781  * 1. Resolve all referenced type IDs into their canonical type IDs. This
2782  * resolution can be done either immediately for primitive or struct/union types
2783  * (because they were deduped in previous two phases) or recursively for
2784  * reference types. Recursion will always terminate at either primitive or
2785  * struct/union type, at which point we can "unwind" chain of reference types
2786  * one by one. There is no danger of encountering cycles because in C type
2787  * system the only way to form type cycle is through struct/union, so any chain
2788  * of reference types, even those taking part in a type cycle, will inevitably
2789  * reach struct/union at some point.
2790  *
2791  * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
2792  * becomes "stable", in the sense that no further deduplication will cause
2793  * any changes to it. With that, it's now possible to calculate type's signature
2794  * hash (this time taking into account referenced type IDs) and loop over all
2795  * potential canonical representatives. If no match was found, current type
2796  * will become canonical representative of itself and will be added into
2797  * btf_dedup->dedup_table as another possible canonical representative.
2798  */
2799 static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2800 {
2801 	struct hashmap_entry *hash_entry;
2802 	__u32 new_id = type_id, cand_id;
2803 	struct btf_type *t, *cand;
2804 	/* if we don't find equivalent type, then we are representative type */
2805 	int ref_type_id;
2806 	long h;
2807 
2808 	if (d->map[type_id] == BTF_IN_PROGRESS_ID)
2809 		return -ELOOP;
2810 	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
2811 		return resolve_type_id(d, type_id);
2812 
2813 	t = btf_type_by_id(d->btf, type_id);
2814 	d->map[type_id] = BTF_IN_PROGRESS_ID;
2815 
2816 	switch (btf_kind(t)) {
2817 	case BTF_KIND_CONST:
2818 	case BTF_KIND_VOLATILE:
2819 	case BTF_KIND_RESTRICT:
2820 	case BTF_KIND_PTR:
2821 	case BTF_KIND_TYPEDEF:
2822 	case BTF_KIND_FUNC:
2823 		ref_type_id = btf_dedup_ref_type(d, t->type);
2824 		if (ref_type_id < 0)
2825 			return ref_type_id;
2826 		t->type = ref_type_id;
2827 
2828 		h = btf_hash_common(t);
2829 		for_each_dedup_cand(d, hash_entry, h) {
2830 			cand_id = (__u32)(long)hash_entry->value;
2831 			cand = btf_type_by_id(d->btf, cand_id);
2832 			if (btf_equal_common(t, cand)) {
2833 				new_id = cand_id;
2834 				break;
2835 			}
2836 		}
2837 		break;
2838 
2839 	case BTF_KIND_ARRAY: {
2840 		struct btf_array *info = btf_array(t);
2841 
2842 		ref_type_id = btf_dedup_ref_type(d, info->type);
2843 		if (ref_type_id < 0)
2844 			return ref_type_id;
2845 		info->type = ref_type_id;
2846 
2847 		ref_type_id = btf_dedup_ref_type(d, info->index_type);
2848 		if (ref_type_id < 0)
2849 			return ref_type_id;
2850 		info->index_type = ref_type_id;
2851 
2852 		h = btf_hash_array(t);
2853 		for_each_dedup_cand(d, hash_entry, h) {
2854 			cand_id = (__u32)(long)hash_entry->value;
2855 			cand = btf_type_by_id(d->btf, cand_id);
2856 			if (btf_equal_array(t, cand)) {
2857 				new_id = cand_id;
2858 				break;
2859 			}
2860 		}
2861 		break;
2862 	}
2863 
2864 	case BTF_KIND_FUNC_PROTO: {
2865 		struct btf_param *param;
2866 		__u16 vlen;
2867 		int i;
2868 
2869 		ref_type_id = btf_dedup_ref_type(d, t->type);
2870 		if (ref_type_id < 0)
2871 			return ref_type_id;
2872 		t->type = ref_type_id;
2873 
2874 		vlen = btf_vlen(t);
2875 		param = btf_params(t);
2876 		for (i = 0; i < vlen; i++) {
2877 			ref_type_id = btf_dedup_ref_type(d, param->type);
2878 			if (ref_type_id < 0)
2879 				return ref_type_id;
2880 			param->type = ref_type_id;
2881 			param++;
2882 		}
2883 
2884 		h = btf_hash_fnproto(t);
2885 		for_each_dedup_cand(d, hash_entry, h) {
2886 			cand_id = (__u32)(long)hash_entry->value;
2887 			cand = btf_type_by_id(d->btf, cand_id);
2888 			if (btf_equal_fnproto(t, cand)) {
2889 				new_id = cand_id;
2890 				break;
2891 			}
2892 		}
2893 		break;
2894 	}
2895 
2896 	default:
2897 		return -EINVAL;
2898 	}
2899 
2900 	d->map[type_id] = new_id;
2901 	if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2902 		return -ENOMEM;
2903 
2904 	return new_id;
2905 }
2906 
2907 static int btf_dedup_ref_types(struct btf_dedup *d)
2908 {
2909 	int i, err;
2910 
2911 	for (i = 1; i <= d->btf->nr_types; i++) {
2912 		err = btf_dedup_ref_type(d, i);
2913 		if (err < 0)
2914 			return err;
2915 	}
2916 	/* we won't need d->dedup_table anymore */
2917 	hashmap__free(d->dedup_table);
2918 	d->dedup_table = NULL;
2919 	return 0;
2920 }
2921 
2922 /*
2923  * Compact types.
2924  *
2925  * After we established for each type its corresponding canonical representative
2926  * type, we now can eliminate types that are not canonical and leave only
2927  * canonical ones layed out sequentially in memory by copying them over
2928  * duplicates. During compaction btf_dedup->hypot_map array is reused to store
2929  * a map from original type ID to a new compacted type ID, which will be used
2930  * during next phase to "fix up" type IDs, referenced from struct/union and
2931  * reference types.
2932  */
2933 static int btf_dedup_compact_types(struct btf_dedup *d)
2934 {
2935 	__u32 *new_offs;
2936 	__u32 next_type_id = 1;
2937 	void *p;
2938 	int i, len;
2939 
2940 	/* we are going to reuse hypot_map to store compaction remapping */
2941 	d->hypot_map[0] = 0;
2942 	for (i = 1; i <= d->btf->nr_types; i++)
2943 		d->hypot_map[i] = BTF_UNPROCESSED_ID;
2944 
2945 	p = d->btf->types_data;
2946 
2947 	for (i = 1; i <= d->btf->nr_types; i++) {
2948 		if (d->map[i] != i)
2949 			continue;
2950 
2951 		len = btf_type_size(btf__type_by_id(d->btf, i));
2952 		if (len < 0)
2953 			return len;
2954 
2955 		memmove(p, btf__type_by_id(d->btf, i), len);
2956 		d->hypot_map[i] = next_type_id;
2957 		d->btf->type_offs[next_type_id] = p - d->btf->types_data;
2958 		p += len;
2959 		next_type_id++;
2960 	}
2961 
2962 	/* shrink struct btf's internal types index and update btf_header */
2963 	d->btf->nr_types = next_type_id - 1;
2964 	d->btf->type_offs_cap = d->btf->nr_types + 1;
2965 	d->btf->hdr->type_len = p - d->btf->types_data;
2966 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
2967 				       sizeof(*new_offs));
2968 	if (!new_offs)
2969 		return -ENOMEM;
2970 	d->btf->type_offs = new_offs;
2971 
2972 	/* make sure string section follows type information without gaps */
2973 	d->btf->hdr->str_off = p - d->btf->nohdr_data;
2974 	memmove(p, d->btf->strings, d->btf->hdr->str_len);
2975 	d->btf->strings = p;
2976 	p += d->btf->hdr->str_len;
2977 
2978 	d->btf->data_size = p - d->btf->data;
2979 	return 0;
2980 }
2981 
2982 /*
2983  * Figure out final (deduplicated and compacted) type ID for provided original
2984  * `type_id` by first resolving it into corresponding canonical type ID and
2985  * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
2986  * which is populated during compaction phase.
2987  */
2988 static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
2989 {
2990 	__u32 resolved_type_id, new_type_id;
2991 
2992 	resolved_type_id = resolve_type_id(d, type_id);
2993 	new_type_id = d->hypot_map[resolved_type_id];
2994 	if (new_type_id > BTF_MAX_NR_TYPES)
2995 		return -EINVAL;
2996 	return new_type_id;
2997 }
2998 
2999 /*
3000  * Remap referenced type IDs into deduped type IDs.
3001  *
3002  * After BTF types are deduplicated and compacted, their final type IDs may
3003  * differ from original ones. The map from original to a corresponding
3004  * deduped type ID is stored in btf_dedup->hypot_map and is populated during
3005  * compaction phase. During remapping phase we are rewriting all type IDs
3006  * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
3007  * their final deduped type IDs.
3008  */
3009 static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
3010 {
3011 	struct btf_type *t = btf_type_by_id(d->btf, type_id);
3012 	int i, r;
3013 
3014 	switch (btf_kind(t)) {
3015 	case BTF_KIND_INT:
3016 	case BTF_KIND_ENUM:
3017 		break;
3018 
3019 	case BTF_KIND_FWD:
3020 	case BTF_KIND_CONST:
3021 	case BTF_KIND_VOLATILE:
3022 	case BTF_KIND_RESTRICT:
3023 	case BTF_KIND_PTR:
3024 	case BTF_KIND_TYPEDEF:
3025 	case BTF_KIND_FUNC:
3026 	case BTF_KIND_VAR:
3027 		r = btf_dedup_remap_type_id(d, t->type);
3028 		if (r < 0)
3029 			return r;
3030 		t->type = r;
3031 		break;
3032 
3033 	case BTF_KIND_ARRAY: {
3034 		struct btf_array *arr_info = btf_array(t);
3035 
3036 		r = btf_dedup_remap_type_id(d, arr_info->type);
3037 		if (r < 0)
3038 			return r;
3039 		arr_info->type = r;
3040 		r = btf_dedup_remap_type_id(d, arr_info->index_type);
3041 		if (r < 0)
3042 			return r;
3043 		arr_info->index_type = r;
3044 		break;
3045 	}
3046 
3047 	case BTF_KIND_STRUCT:
3048 	case BTF_KIND_UNION: {
3049 		struct btf_member *member = btf_members(t);
3050 		__u16 vlen = btf_vlen(t);
3051 
3052 		for (i = 0; i < vlen; i++) {
3053 			r = btf_dedup_remap_type_id(d, member->type);
3054 			if (r < 0)
3055 				return r;
3056 			member->type = r;
3057 			member++;
3058 		}
3059 		break;
3060 	}
3061 
3062 	case BTF_KIND_FUNC_PROTO: {
3063 		struct btf_param *param = btf_params(t);
3064 		__u16 vlen = btf_vlen(t);
3065 
3066 		r = btf_dedup_remap_type_id(d, t->type);
3067 		if (r < 0)
3068 			return r;
3069 		t->type = r;
3070 
3071 		for (i = 0; i < vlen; i++) {
3072 			r = btf_dedup_remap_type_id(d, param->type);
3073 			if (r < 0)
3074 				return r;
3075 			param->type = r;
3076 			param++;
3077 		}
3078 		break;
3079 	}
3080 
3081 	case BTF_KIND_DATASEC: {
3082 		struct btf_var_secinfo *var = btf_var_secinfos(t);
3083 		__u16 vlen = btf_vlen(t);
3084 
3085 		for (i = 0; i < vlen; i++) {
3086 			r = btf_dedup_remap_type_id(d, var->type);
3087 			if (r < 0)
3088 				return r;
3089 			var->type = r;
3090 			var++;
3091 		}
3092 		break;
3093 	}
3094 
3095 	default:
3096 		return -EINVAL;
3097 	}
3098 
3099 	return 0;
3100 }
3101 
3102 static int btf_dedup_remap_types(struct btf_dedup *d)
3103 {
3104 	int i, r;
3105 
3106 	for (i = 1; i <= d->btf->nr_types; i++) {
3107 		r = btf_dedup_remap_type(d, i);
3108 		if (r < 0)
3109 			return r;
3110 	}
3111 	return 0;
3112 }
3113 
3114 /*
3115  * Probe few well-known locations for vmlinux kernel image and try to load BTF
3116  * data out of it to use for target BTF.
3117  */
3118 struct btf *libbpf_find_kernel_btf(void)
3119 {
3120 	struct {
3121 		const char *path_fmt;
3122 		bool raw_btf;
3123 	} locations[] = {
3124 		/* try canonical vmlinux BTF through sysfs first */
3125 		{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
3126 		/* fall back to trying to find vmlinux ELF on disk otherwise */
3127 		{ "/boot/vmlinux-%1$s" },
3128 		{ "/lib/modules/%1$s/vmlinux-%1$s" },
3129 		{ "/lib/modules/%1$s/build/vmlinux" },
3130 		{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
3131 		{ "/usr/lib/debug/boot/vmlinux-%1$s" },
3132 		{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
3133 		{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
3134 	};
3135 	char path[PATH_MAX + 1];
3136 	struct utsname buf;
3137 	struct btf *btf;
3138 	int i;
3139 
3140 	uname(&buf);
3141 
3142 	for (i = 0; i < ARRAY_SIZE(locations); i++) {
3143 		snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
3144 
3145 		if (access(path, R_OK))
3146 			continue;
3147 
3148 		if (locations[i].raw_btf)
3149 			btf = btf__parse_raw(path);
3150 		else
3151 			btf = btf__parse_elf(path, NULL);
3152 
3153 		pr_debug("loading kernel BTF '%s': %ld\n",
3154 			 path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
3155 		if (IS_ERR(btf))
3156 			continue;
3157 
3158 		return btf;
3159 	}
3160 
3161 	pr_warn("failed to find valid kernel BTF\n");
3162 	return ERR_PTR(-ESRCH);
3163 }
3164