btf.c (c81ed6d81e0560713ceb94917ff1981848d0614e) btf.c (88a82c2a9ab5b2ce533c3de3f4517853c2f67f53)
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/* Copyright (c) 2018 Facebook */
3
4#include <byteswap.h>
5#include <endian.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>

--- 76 unchanged lines hidden (view full) ---

85
86 void *strs_data;
87 size_t strs_data_cap; /* used size stored in hdr->str_len */
88
89 /* lookup index for each unique string in strings section */
90 struct hashmap *strs_hash;
91 /* whether strings are already deduplicated */
92 bool strs_deduped;
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/* Copyright (c) 2018 Facebook */
3
4#include <byteswap.h>
5#include <endian.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>

--- 76 unchanged lines hidden (view full) ---

85
86 void *strs_data;
87 size_t strs_data_cap; /* used size stored in hdr->str_len */
88
89 /* lookup index for each unique string in strings section */
90 struct hashmap *strs_hash;
91 /* whether strings are already deduplicated */
92 bool strs_deduped;
93 /* extra indirection layer to make strings hashmap work with stable
94 * string offsets and ability to transparently choose between
95 * btf->strs_data or btf_dedup->strs_data as a source of strings.
96 * This is used for BTF strings dedup to transfer deduplicated strings
97 * data back to struct btf without re-building strings index.
98 */
99 void **strs_data_ptr;
100
93 /* BTF object FD, if loaded into kernel */
94 int fd;
95
96 /* Pointer size (in bytes) for a target architecture of this BTF */
97 int ptr_sz;
98};
99
100static inline __u64 ptr_to_u64(const void *ptr)

--- 1257 unchanged lines hidden (view full) ---

1358 *key_type_id = key->type;
1359 *value_type_id = value->type;
1360
1361 return 0;
1362}
1363
1364static size_t strs_hash_fn(const void *key, void *ctx)
1365{
101 /* BTF object FD, if loaded into kernel */
102 int fd;
103
104 /* Pointer size (in bytes) for a target architecture of this BTF */
105 int ptr_sz;
106};
107
108static inline __u64 ptr_to_u64(const void *ptr)

--- 1257 unchanged lines hidden (view full) ---

1366 *key_type_id = key->type;
1367 *value_type_id = value->type;
1368
1369 return 0;
1370}
1371
1372static size_t strs_hash_fn(const void *key, void *ctx)
1373{
1366 struct btf *btf = ctx;
1367 const char *str = btf->strs_data + (long)key;
1374 const struct btf *btf = ctx;
1375 const char *strs = *btf->strs_data_ptr;
1376 const char *str = strs + (long)key;
1368
1369 return str_hash(str);
1370}
1371
1372static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
1373{
1377
1378 return str_hash(str);
1379}
1380
1381static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
1382{
1374 struct btf *btf = ctx;
1375 const char *str1 = btf->strs_data + (long)key1;
1376 const char *str2 = btf->strs_data + (long)key2;
1383 const struct btf *btf = ctx;
1384 const char *strs = *btf->strs_data_ptr;
1385 const char *str1 = strs + (long)key1;
1386 const char *str2 = strs + (long)key2;
1377
1378 return strcmp(str1, str2) == 0;
1379}
1380
1381static void btf_invalidate_raw_data(struct btf *btf)
1382{
1383 if (btf->raw_data) {
1384 free(btf->raw_data);

--- 28 unchanged lines hidden (view full) ---

1413 strs = malloc(btf->hdr->str_len);
1414 if (!hdr || !types || !strs)
1415 goto err_out;
1416
1417 memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
1418 memcpy(types, btf->types_data, btf->hdr->type_len);
1419 memcpy(strs, btf->strs_data, btf->hdr->str_len);
1420
1387
1388 return strcmp(str1, str2) == 0;
1389}
1390
1391static void btf_invalidate_raw_data(struct btf *btf)
1392{
1393 if (btf->raw_data) {
1394 free(btf->raw_data);

--- 28 unchanged lines hidden (view full) ---

1423 strs = malloc(btf->hdr->str_len);
1424 if (!hdr || !types || !strs)
1425 goto err_out;
1426
1427 memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
1428 memcpy(types, btf->types_data, btf->hdr->type_len);
1429 memcpy(strs, btf->strs_data, btf->hdr->str_len);
1430
1431 /* make hashmap below use btf->strs_data as a source of strings */
1432 btf->strs_data_ptr = &btf->strs_data;
1433
1421 /* build lookup index for all strings */
1422 hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
1423 if (IS_ERR(hash)) {
1424 err = PTR_ERR(hash);
1425 hash = NULL;
1426 goto err_out;
1427 }
1428

--- 1390 unchanged lines hidden (view full) ---

2819 __u32 *map;
2820 /* Hypothetical mapping, used during type graph equivalence checks */
2821 __u32 *hypot_map;
2822 __u32 *hypot_list;
2823 size_t hypot_cnt;
2824 size_t hypot_cap;
2825 /* Various option modifying behavior of algorithm */
2826 struct btf_dedup_opts opts;
1434 /* build lookup index for all strings */
1435 hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
1436 if (IS_ERR(hash)) {
1437 err = PTR_ERR(hash);
1438 hash = NULL;
1439 goto err_out;
1440 }
1441

--- 1390 unchanged lines hidden (view full) ---

2832 __u32 *map;
2833 /* Hypothetical mapping, used during type graph equivalence checks */
2834 __u32 *hypot_map;
2835 __u32 *hypot_list;
2836 size_t hypot_cnt;
2837 size_t hypot_cap;
2838 /* Various option modifying behavior of algorithm */
2839 struct btf_dedup_opts opts;
2840 /* temporary strings deduplication state */
2841 void *strs_data;
2842 size_t strs_cap;
2843 size_t strs_len;
2844 struct hashmap* strs_hash;
2827};
2828
2845};
2846
2829struct btf_str_ptr {
2830 const char *str;
2831 __u32 new_off;
2832 bool used;
2833};
2834
2835struct btf_str_ptrs {
2836 struct btf_str_ptr *ptrs;
2837 const char *data;
2838 __u32 cnt;
2839 __u32 cap;
2840};
2841
2842static long hash_combine(long h, long value)
2843{
2844 return h * 31 + value;
2845}
2846
2847#define for_each_dedup_cand(d, node, hash) \
2848 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
2849

--- 208 unchanged lines hidden (view full) ---

3058 return r;
3059 line_data_cur += rec_size;
3060 }
3061 }
3062
3063 return 0;
3064}
3065
2847static long hash_combine(long h, long value)
2848{
2849 return h * 31 + value;
2850}
2851
2852#define for_each_dedup_cand(d, node, hash) \
2853 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
2854

--- 208 unchanged lines hidden (view full) ---

3063 return r;
3064 line_data_cur += rec_size;
3065 }
3066 }
3067
3068 return 0;
3069}
3070
3066static int str_sort_by_content(const void *a1, const void *a2)
3071static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
3067{
3072{
3068 const struct btf_str_ptr *p1 = a1;
3069 const struct btf_str_ptr *p2 = a2;
3073 struct btf_dedup *d = ctx;
3074 long old_off, new_off, len;
3075 const char *s;
3076 void *p;
3077 int err;
3070
3078
3071 return strcmp(p1->str, p2->str);
3072}
3073
3074static int str_sort_by_offset(const void *a1, const void *a2)
3075{
3076 const struct btf_str_ptr *p1 = a1;
3077 const struct btf_str_ptr *p2 = a2;
3078
3079 if (p1->str != p2->str)
3080 return p1->str < p2->str ? -1 : 1;
3081 return 0;
3082}
3083
3084static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
3085{
3086 const struct btf_str_ptr *p = pelem;
3087
3088 if (str_ptr != p->str)
3089 return (const char *)str_ptr < p->str ? -1 : 1;
3090 return 0;
3091}
3092
3093static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
3094{
3095 struct btf_str_ptrs *strs;
3096 struct btf_str_ptr *s;
3097
3098 if (*str_off_ptr == 0)
3099 return 0;
3100
3079 if (*str_off_ptr == 0)
3080 return 0;
3081
3101 strs = ctx;
3102 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
3103 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
3104 if (!s)
3105 return -EINVAL;
3106 s->used = true;
3107 return 0;
3108}
3082 s = btf__str_by_offset(d->btf, *str_off_ptr);
3083 len = strlen(s) + 1;
3109
3084
3110static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
3111{
3112 struct btf_str_ptrs *strs;
3113 struct btf_str_ptr *s;
3085 new_off = d->strs_len;
3086 p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
3087 if (!p)
3088 return -ENOMEM;
3114
3089
3115 if (*str_off_ptr == 0)
3116 return 0;
3090 memcpy(p, s, len);
3117
3091
3118 strs = ctx;
3119 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
3120 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
3121 if (!s)
3122 return -EINVAL;
3123 *str_off_ptr = s->new_off;
3092 /* Now attempt to add the string, but only if the string with the same
3093 * contents doesn't exist already (HASHMAP_ADD strategy). If such
3094 * string exists, we'll get its offset in old_off (that's old_key).
3095 */
3096 err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
3097 HASHMAP_ADD, (const void **)&old_off, NULL);
3098 if (err == -EEXIST) {
3099 *str_off_ptr = old_off;
3100 } else if (err) {
3101 return err;
3102 } else {
3103 *str_off_ptr = new_off;
3104 d->strs_len += len;
3105 }
3124 return 0;
3125}
3126
3127/*
3128 * Dedup string and filter out those that are not referenced from either .BTF
3129 * or .BTF.ext (if provided) sections.
3130 *
3131 * This is done by building index of all strings in BTF's string section,
3132 * then iterating over all entities that can reference strings (e.g., type
3133 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
3134 * strings as used. After that all used strings are deduped and compacted into
3135 * sequential blob of memory and new offsets are calculated. Then all the string
3136 * references are iterated again and rewritten using new offsets.
3137 */
3138static int btf_dedup_strings(struct btf_dedup *d)
3139{
3106 return 0;
3107}
3108
3109/*
3110 * Dedup string and filter out those that are not referenced from either .BTF
3111 * or .BTF.ext (if provided) sections.
3112 *
3113 * This is done by building index of all strings in BTF's string section,
3114 * then iterating over all entities that can reference strings (e.g., type
3115 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
3116 * strings as used. After that all used strings are deduped and compacted into
3117 * sequential blob of memory and new offsets are calculated. Then all the string
3118 * references are iterated again and rewritten using new offsets.
3119 */
3120static int btf_dedup_strings(struct btf_dedup *d)
3121{
3140 char *start = d->btf->strs_data;
3141 char *end = start + d->btf->hdr->str_len;
3142 char *p = start, *tmp_strs = NULL;
3143 struct btf_str_ptrs strs = {
3144 .cnt = 0,
3145 .cap = 0,
3146 .ptrs = NULL,
3147 .data = start,
3148 };
3149 int i, j, err = 0, grp_idx;
3150 bool grp_used;
3122 char *s;
3123 int err;
3151
3152 if (d->btf->strs_deduped)
3153 return 0;
3154
3124
3125 if (d->btf->strs_deduped)
3126 return 0;
3127
3155 /* build index of all strings */
3156 while (p < end) {
3157 if (strs.cnt + 1 > strs.cap) {
3158 struct btf_str_ptr *new_ptrs;
3128 s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
3129 if (!s)
3130 return -ENOMEM;
3131 /* initial empty string */
3132 s[0] = 0;
3133 d->strs_len = 1;
3159
3134
3160 strs.cap += max(strs.cnt / 2, 16U);
3161 new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
3162 if (!new_ptrs) {
3163 err = -ENOMEM;
3164 goto done;
3165 }
3166 strs.ptrs = new_ptrs;
3167 }
3135 /* temporarily switch to use btf_dedup's strs_data for strings for hash
3136 * functions; later we'll just transfer hashmap to struct btf as is,
3137 * along the strs_data
3138 */
3139 d->btf->strs_data_ptr = &d->strs_data;
3168
3140
3169 strs.ptrs[strs.cnt].str = p;
3170 strs.ptrs[strs.cnt].used = false;
3171
3172 p += strlen(p) + 1;
3173 strs.cnt++;
3141 d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
3142 if (IS_ERR(d->strs_hash)) {
3143 err = PTR_ERR(d->strs_hash);
3144 d->strs_hash = NULL;
3145 goto err_out;
3174 }
3175
3146 }
3147
3176 /* temporary storage for deduplicated strings */
3177 tmp_strs = malloc(d->btf->hdr->str_len);
3178 if (!tmp_strs) {
3179 err = -ENOMEM;
3180 goto done;
3181 }
3182
3183 /* mark all used strings */
3184 strs.ptrs[0].used = true;
3185 err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
3148 /* insert empty string; we won't be looking it up during strings
3149 * dedup, but it's good to have it for generic BTF string lookups
3150 */
3151 err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
3152 HASHMAP_ADD, NULL, NULL);
3186 if (err)
3153 if (err)
3187 goto done;
3154 goto err_out;
3188
3155
3189 /* sort strings by context, so that we can identify duplicates */
3190 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
3191
3192 /*
3193 * iterate groups of equal strings and if any instance in a group was
3194 * referenced, emit single instance and remember new offset
3195 */
3196 p = tmp_strs;
3197 grp_idx = 0;
3198 grp_used = strs.ptrs[0].used;
3199 /* iterate past end to avoid code duplication after loop */
3200 for (i = 1; i <= strs.cnt; i++) {
3201 /*
3202 * when i == strs.cnt, we want to skip string comparison and go
3203 * straight to handling last group of strings (otherwise we'd
3204 * need to handle last group after the loop w/ duplicated code)
3205 */
3206 if (i < strs.cnt &&
3207 !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
3208 grp_used = grp_used || strs.ptrs[i].used;
3209 continue;
3210 }
3211
3212 /*
3213 * this check would have been required after the loop to handle
3214 * last group of strings, but due to <= condition in a loop
3215 * we avoid that duplication
3216 */
3217 if (grp_used) {
3218 int new_off = p - tmp_strs;
3219 __u32 len = strlen(strs.ptrs[grp_idx].str);
3220
3221 memmove(p, strs.ptrs[grp_idx].str, len + 1);
3222 for (j = grp_idx; j < i; j++)
3223 strs.ptrs[j].new_off = new_off;
3224 p += len + 1;
3225 }
3226
3227 if (i < strs.cnt) {
3228 grp_idx = i;
3229 grp_used = strs.ptrs[i].used;
3230 }
3231 }
3232
3233 /* replace original strings with deduped ones */
3234 d->btf->hdr->str_len = p - tmp_strs;
3235 memmove(start, tmp_strs, d->btf->hdr->str_len);
3236 end = start + d->btf->hdr->str_len;
3237
3238 /* restore original order for further binary search lookups */
3239 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
3240
3241 /* remap string offsets */
3156 /* remap string offsets */
3242 err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
3157 err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
3243 if (err)
3158 if (err)
3244 goto done;
3159 goto err_out;
3245
3160
3246 d->btf->hdr->str_len = end - start;
3161 /* replace BTF string data and hash with deduped ones */
3162 free(d->btf->strs_data);
3163 hashmap__free(d->btf->strs_hash);
3164 d->btf->strs_data = d->strs_data;
3165 d->btf->strs_data_cap = d->strs_cap;
3166 d->btf->hdr->str_len = d->strs_len;
3167 d->btf->strs_hash = d->strs_hash;
3168 /* now point strs_data_ptr back to btf->strs_data */
3169 d->btf->strs_data_ptr = &d->btf->strs_data;
3170
3171 d->strs_data = d->strs_hash = NULL;
3172 d->strs_len = d->strs_cap = 0;
3247 d->btf->strs_deduped = true;
3173 d->btf->strs_deduped = true;
3174 return 0;
3248
3175
3249done:
3250 free(tmp_strs);
3251 free(strs.ptrs);
3176err_out:
3177 free(d->strs_data);
3178 hashmap__free(d->strs_hash);
3179 d->strs_data = d->strs_hash = NULL;
3180 d->strs_len = d->strs_cap = 0;
3181
3182 /* restore strings pointer for existing d->btf->strs_hash back */
3183 d->btf->strs_data_ptr = &d->strs_data;
3184
3252 return err;
3253}
3254
3255static long btf_hash_common(struct btf_type *t)
3256{
3257 long h;
3258
3259 h = hash_combine(0, t->name_off);

--- 1178 unchanged lines hidden ---
3185 return err;
3186}
3187
3188static long btf_hash_common(struct btf_type *t)
3189{
3190 long h;
3191
3192 h = hash_combine(0, t->name_off);

--- 1178 unchanged lines hidden ---