btf.c (c81ed6d81e0560713ceb94917ff1981848d0614e) | btf.c (88a82c2a9ab5b2ce533c3de3f4517853c2f67f53) |
---|---|
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2/* Copyright (c) 2018 Facebook */ 3 4#include <byteswap.h> 5#include <endian.h> 6#include <stdio.h> 7#include <stdlib.h> 8#include <string.h> --- 76 unchanged lines hidden (view full) --- 85 86 void *strs_data; 87 size_t strs_data_cap; /* used size stored in hdr->str_len */ 88 89 /* lookup index for each unique string in strings section */ 90 struct hashmap *strs_hash; 91 /* whether strings are already deduplicated */ 92 bool strs_deduped; | 1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2/* Copyright (c) 2018 Facebook */ 3 4#include <byteswap.h> 5#include <endian.h> 6#include <stdio.h> 7#include <stdlib.h> 8#include <string.h> --- 76 unchanged lines hidden (view full) --- 85 86 void *strs_data; 87 size_t strs_data_cap; /* used size stored in hdr->str_len */ 88 89 /* lookup index for each unique string in strings section */ 90 struct hashmap *strs_hash; 91 /* whether strings are already deduplicated */ 92 bool strs_deduped; |
93 /* extra indirection layer to make strings hashmap work with stable 94 * string offsets and ability to transparently choose between 95 * btf->strs_data or btf_dedup->strs_data as a source of strings. 96 * This is used for BTF strings dedup to transfer deduplicated strings 97 * data back to struct btf without re-building strings index. 98 */ 99 void **strs_data_ptr; 100 |
|
93 /* BTF object FD, if loaded into kernel */ 94 int fd; 95 96 /* Pointer size (in bytes) for a target architecture of this BTF */ 97 int ptr_sz; 98}; 99 100static inline __u64 ptr_to_u64(const void *ptr) --- 1257 unchanged lines hidden (view full) --- 1358 *key_type_id = key->type; 1359 *value_type_id = value->type; 1360 1361 return 0; 1362} 1363 1364static size_t strs_hash_fn(const void *key, void *ctx) 1365{ | 101 /* BTF object FD, if loaded into kernel */ 102 int fd; 103 104 /* Pointer size (in bytes) for a target architecture of this BTF */ 105 int ptr_sz; 106}; 107 108static inline __u64 ptr_to_u64(const void *ptr) --- 1257 unchanged lines hidden (view full) --- 1366 *key_type_id = key->type; 1367 *value_type_id = value->type; 1368 1369 return 0; 1370} 1371 1372static size_t strs_hash_fn(const void *key, void *ctx) 1373{ |
1366 struct btf *btf = ctx; 1367 const char *str = btf->strs_data + (long)key; | 1374 const struct btf *btf = ctx; 1375 const char *strs = *btf->strs_data_ptr; 1376 const char *str = strs + (long)key; |
1368 1369 return str_hash(str); 1370} 1371 1372static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) 1373{ | 1377 1378 return str_hash(str); 1379} 1380 1381static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) 1382{ |
1374 struct btf *btf = ctx; 1375 const char *str1 = btf->strs_data + (long)key1; 1376 const char *str2 = btf->strs_data + (long)key2; | 1383 const struct btf *btf = ctx; 1384 const char *strs = *btf->strs_data_ptr; 1385 const char *str1 = strs + (long)key1; 1386 const char *str2 = strs + (long)key2; |
1377 1378 return strcmp(str1, str2) == 0; 1379} 1380 1381static void btf_invalidate_raw_data(struct btf *btf) 1382{ 1383 if (btf->raw_data) { 1384 free(btf->raw_data); --- 28 unchanged lines hidden (view full) --- 1413 strs = malloc(btf->hdr->str_len); 1414 if (!hdr || !types || !strs) 1415 goto err_out; 1416 1417 memcpy(hdr, btf->hdr, btf->hdr->hdr_len); 1418 memcpy(types, btf->types_data, btf->hdr->type_len); 1419 memcpy(strs, btf->strs_data, btf->hdr->str_len); 1420 | 1387 1388 return strcmp(str1, str2) == 0; 1389} 1390 1391static void btf_invalidate_raw_data(struct btf *btf) 1392{ 1393 if (btf->raw_data) { 1394 free(btf->raw_data); --- 28 unchanged lines hidden (view full) --- 1423 strs = malloc(btf->hdr->str_len); 1424 if (!hdr || !types || !strs) 1425 goto err_out; 1426 1427 memcpy(hdr, btf->hdr, btf->hdr->hdr_len); 1428 memcpy(types, btf->types_data, btf->hdr->type_len); 1429 memcpy(strs, btf->strs_data, btf->hdr->str_len); 1430 |
1431 /* make hashmap below use btf->strs_data as a source of strings */ 1432 btf->strs_data_ptr = &btf->strs_data; 1433 |
|
1421 /* build lookup index for all strings */ 1422 hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); 1423 if (IS_ERR(hash)) { 1424 err = PTR_ERR(hash); 1425 hash = NULL; 1426 goto err_out; 1427 } 1428 --- 1390 unchanged lines hidden (view full) --- 2819 __u32 *map; 2820 /* Hypothetical mapping, used during type graph equivalence checks */ 2821 __u32 *hypot_map; 2822 __u32 *hypot_list; 2823 size_t hypot_cnt; 2824 size_t hypot_cap; 2825 /* Various option modifying behavior of algorithm */ 2826 struct btf_dedup_opts opts; | 1434 /* build lookup index for all strings */ 1435 hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); 1436 if (IS_ERR(hash)) { 1437 err = PTR_ERR(hash); 1438 hash = NULL; 1439 goto err_out; 1440 } 1441 --- 1390 unchanged lines hidden (view full) --- 2832 __u32 *map; 2833 /* Hypothetical mapping, used during type graph equivalence checks */ 2834 __u32 *hypot_map; 2835 __u32 *hypot_list; 2836 size_t hypot_cnt; 2837 size_t hypot_cap; 2838 /* Various option modifying behavior of algorithm */ 2839 struct btf_dedup_opts opts; |
2840 /* temporary strings deduplication state */ 2841 void *strs_data; 2842 size_t strs_cap; 2843 size_t strs_len; 2844 struct hashmap* strs_hash; |
|
2827}; 2828 | 2845}; 2846 |
2829struct btf_str_ptr { 2830 const char *str; 2831 __u32 new_off; 2832 bool used; 2833}; 2834 2835struct btf_str_ptrs { 2836 struct btf_str_ptr *ptrs; 2837 const char *data; 2838 __u32 cnt; 2839 __u32 cap; 2840}; 2841 | |
2842static long hash_combine(long h, long value) 2843{ 2844 return h * 31 + value; 2845} 2846 2847#define for_each_dedup_cand(d, node, hash) \ 2848 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) 2849 --- 208 unchanged lines hidden (view full) --- 3058 return r; 3059 line_data_cur += rec_size; 3060 } 3061 } 3062 3063 return 0; 3064} 3065 | 2847static long hash_combine(long h, long value) 2848{ 2849 return h * 31 + value; 2850} 2851 2852#define for_each_dedup_cand(d, node, hash) \ 2853 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) 2854 --- 208 unchanged lines hidden (view full) --- 3063 return r; 3064 line_data_cur += rec_size; 3065 } 3066 } 3067 3068 return 0; 3069} 3070 |
3066static int str_sort_by_content(const void *a1, const void *a2) | 3071static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) |
3067{ | 3072{ |
3068 const struct btf_str_ptr *p1 = a1; 3069 const struct btf_str_ptr *p2 = a2; | 3073 struct btf_dedup *d = ctx; 3074 long old_off, new_off, len; 3075 const char *s; 3076 void *p; 3077 int err; |
3070 | 3078 |
3071 return strcmp(p1->str, p2->str); 3072} 3073 3074static int str_sort_by_offset(const void *a1, const void *a2) 3075{ 3076 const struct btf_str_ptr *p1 = a1; 3077 const struct btf_str_ptr *p2 = a2; 3078 3079 if (p1->str != p2->str) 3080 return p1->str < p2->str ? -1 : 1; 3081 return 0; 3082} 3083 3084static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) 3085{ 3086 const struct btf_str_ptr *p = pelem; 3087 3088 if (str_ptr != p->str) 3089 return (const char *)str_ptr < p->str ? -1 : 1; 3090 return 0; 3091} 3092 3093static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) 3094{ 3095 struct btf_str_ptrs *strs; 3096 struct btf_str_ptr *s; 3097 | |
3098 if (*str_off_ptr == 0) 3099 return 0; 3100 | 3079 if (*str_off_ptr == 0) 3080 return 0; 3081 |
3101 strs = ctx; 3102 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, 3103 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); 3104 if (!s) 3105 return -EINVAL; 3106 s->used = true; 3107 return 0; 3108} | 3082 s = btf__str_by_offset(d->btf, *str_off_ptr); 3083 len = strlen(s) + 1; |
3109 | 3084 |
3110static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) 3111{ 3112 struct btf_str_ptrs *strs; 3113 struct btf_str_ptr *s; | 3085 new_off = d->strs_len; 3086 p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len); 3087 if (!p) 3088 return -ENOMEM; |
3114 | 3089 |
3115 if (*str_off_ptr == 0) 3116 return 0; | 3090 memcpy(p, s, len); |
3117 | 3091 |
3118 strs = ctx; 3119 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, 3120 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); 3121 if (!s) 3122 return -EINVAL; 3123 *str_off_ptr = s->new_off; | 3092 /* Now attempt to add the string, but only if the string with the same 3093 * contents doesn't exist already (HASHMAP_ADD strategy). If such 3094 * string exists, we'll get its offset in old_off (that's old_key). 3095 */ 3096 err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off, 3097 HASHMAP_ADD, (const void **)&old_off, NULL); 3098 if (err == -EEXIST) { 3099 *str_off_ptr = old_off; 3100 } else if (err) { 3101 return err; 3102 } else { 3103 *str_off_ptr = new_off; 3104 d->strs_len += len; 3105 } |
3124 return 0; 3125} 3126 3127/* 3128 * Dedup string and filter out those that are not referenced from either .BTF 3129 * or .BTF.ext (if provided) sections. 3130 * 3131 * This is done by building index of all strings in BTF's string section, 3132 * then iterating over all entities that can reference strings (e.g., type 3133 * names, struct field names, .BTF.ext line info, etc) and marking corresponding 3134 * strings as used. After that all used strings are deduped and compacted into 3135 * sequential blob of memory and new offsets are calculated. Then all the string 3136 * references are iterated again and rewritten using new offsets. 3137 */ 3138static int btf_dedup_strings(struct btf_dedup *d) 3139{ | 3106 return 0; 3107} 3108 3109/* 3110 * Dedup string and filter out those that are not referenced from either .BTF 3111 * or .BTF.ext (if provided) sections. 3112 * 3113 * This is done by building index of all strings in BTF's string section, 3114 * then iterating over all entities that can reference strings (e.g., type 3115 * names, struct field names, .BTF.ext line info, etc) and marking corresponding 3116 * strings as used. After that all used strings are deduped and compacted into 3117 * sequential blob of memory and new offsets are calculated. Then all the string 3118 * references are iterated again and rewritten using new offsets. 3119 */ 3120static int btf_dedup_strings(struct btf_dedup *d) 3121{ |
3140 char *start = d->btf->strs_data; 3141 char *end = start + d->btf->hdr->str_len; 3142 char *p = start, *tmp_strs = NULL; 3143 struct btf_str_ptrs strs = { 3144 .cnt = 0, 3145 .cap = 0, 3146 .ptrs = NULL, 3147 .data = start, 3148 }; 3149 int i, j, err = 0, grp_idx; 3150 bool grp_used; | 3122 char *s; 3123 int err; |
3151 3152 if (d->btf->strs_deduped) 3153 return 0; 3154 | 3124 3125 if (d->btf->strs_deduped) 3126 return 0; 3127 |
3155 /* build index of all strings */ 3156 while (p < end) { 3157 if (strs.cnt + 1 > strs.cap) { 3158 struct btf_str_ptr *new_ptrs; | 3128 s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); 3129 if (!s) 3130 return -ENOMEM; 3131 /* initial empty string */ 3132 s[0] = 0; 3133 d->strs_len = 1; |
3159 | 3134 |
3160 strs.cap += max(strs.cnt / 2, 16U); 3161 new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0])); 3162 if (!new_ptrs) { 3163 err = -ENOMEM; 3164 goto done; 3165 } 3166 strs.ptrs = new_ptrs; 3167 } | 3135 /* temporarily switch to use btf_dedup's strs_data for strings for hash 3136 * functions; later we'll just transfer hashmap to struct btf as is, 3137 * along the strs_data 3138 */ 3139 d->btf->strs_data_ptr = &d->strs_data; |
3168 | 3140 |
3169 strs.ptrs[strs.cnt].str = p; 3170 strs.ptrs[strs.cnt].used = false; 3171 3172 p += strlen(p) + 1; 3173 strs.cnt++; | 3141 d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf); 3142 if (IS_ERR(d->strs_hash)) { 3143 err = PTR_ERR(d->strs_hash); 3144 d->strs_hash = NULL; 3145 goto err_out; |
3174 } 3175 | 3146 } 3147 |
3176 /* temporary storage for deduplicated strings */ 3177 tmp_strs = malloc(d->btf->hdr->str_len); 3178 if (!tmp_strs) { 3179 err = -ENOMEM; 3180 goto done; 3181 } 3182 3183 /* mark all used strings */ 3184 strs.ptrs[0].used = true; 3185 err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); | 3148 /* insert empty string; we won't be looking it up during strings 3149 * dedup, but it's good to have it for generic BTF string lookups 3150 */ 3151 err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, 3152 HASHMAP_ADD, NULL, NULL); |
3186 if (err) | 3153 if (err) |
3187 goto done; | 3154 goto err_out; |
3188 | 3155 |
3189 /* sort strings by context, so that we can identify duplicates */ 3190 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); 3191 3192 /* 3193 * iterate groups of equal strings and if any instance in a group was 3194 * referenced, emit single instance and remember new offset 3195 */ 3196 p = tmp_strs; 3197 grp_idx = 0; 3198 grp_used = strs.ptrs[0].used; 3199 /* iterate past end to avoid code duplication after loop */ 3200 for (i = 1; i <= strs.cnt; i++) { 3201 /* 3202 * when i == strs.cnt, we want to skip string comparison and go 3203 * straight to handling last group of strings (otherwise we'd 3204 * need to handle last group after the loop w/ duplicated code) 3205 */ 3206 if (i < strs.cnt && 3207 !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { 3208 grp_used = grp_used || strs.ptrs[i].used; 3209 continue; 3210 } 3211 3212 /* 3213 * this check would have been required after the loop to handle 3214 * last group of strings, but due to <= condition in a loop 3215 * we avoid that duplication 3216 */ 3217 if (grp_used) { 3218 int new_off = p - tmp_strs; 3219 __u32 len = strlen(strs.ptrs[grp_idx].str); 3220 3221 memmove(p, strs.ptrs[grp_idx].str, len + 1); 3222 for (j = grp_idx; j < i; j++) 3223 strs.ptrs[j].new_off = new_off; 3224 p += len + 1; 3225 } 3226 3227 if (i < strs.cnt) { 3228 grp_idx = i; 3229 grp_used = strs.ptrs[i].used; 3230 } 3231 } 3232 3233 /* replace original strings with deduped ones */ 3234 d->btf->hdr->str_len = p - tmp_strs; 3235 memmove(start, tmp_strs, d->btf->hdr->str_len); 3236 end = start + d->btf->hdr->str_len; 3237 3238 /* restore original order for further binary search lookups */ 3239 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); 3240 | |
3241 /* remap string offsets */ | 3156 /* remap string offsets */ |
3242 err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); | 3157 err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); |
3243 if (err) | 3158 if (err) |
3244 goto done; | 3159 goto err_out; |
3245 | 3160 |
3246 d->btf->hdr->str_len = end - start; | 3161 /* replace BTF string data and hash with deduped ones */ 3162 free(d->btf->strs_data); 3163 hashmap__free(d->btf->strs_hash); 3164 d->btf->strs_data = d->strs_data; 3165 d->btf->strs_data_cap = d->strs_cap; 3166 d->btf->hdr->str_len = d->strs_len; 3167 d->btf->strs_hash = d->strs_hash; 3168 /* now point strs_data_ptr back to btf->strs_data */ 3169 d->btf->strs_data_ptr = &d->btf->strs_data; 3170 3171 d->strs_data = d->strs_hash = NULL; 3172 d->strs_len = d->strs_cap = 0; |
3247 d->btf->strs_deduped = true; | 3173 d->btf->strs_deduped = true; |
3174 return 0; |
|
3248 | 3175 |
3249done: 3250 free(tmp_strs); 3251 free(strs.ptrs); | 3176err_out: 3177 free(d->strs_data); 3178 hashmap__free(d->strs_hash); 3179 d->strs_data = d->strs_hash = NULL; 3180 d->strs_len = d->strs_cap = 0; 3181 3182 /* restore strings pointer for existing d->btf->strs_hash back */ 3183 d->btf->strs_data_ptr = &d->strs_data; 3184 |
3252 return err; 3253} 3254 3255static long btf_hash_common(struct btf_type *t) 3256{ 3257 long h; 3258 3259 h = hash_combine(0, t->name_off); --- 1178 unchanged lines hidden --- | 3185 return err; 3186} 3187 3188static long btf_hash_common(struct btf_type *t) 3189{ 3190 long h; 3191 3192 h = hash_combine(0, t->name_off); --- 1178 unchanged lines hidden --- |