1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * BTF-to-C type converter. 5 * 6 * Copyright (c) 2019 Facebook 7 */ 8 9 #include <stdbool.h> 10 #include <stddef.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <errno.h> 14 #include <linux/err.h> 15 #include <linux/btf.h> 16 #include "btf.h" 17 #include "hashmap.h" 18 #include "libbpf.h" 19 #include "libbpf_internal.h" 20 21 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; 22 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; 23 24 static const char *pfx(int lvl) 25 { 26 return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl]; 27 } 28 29 enum btf_dump_type_order_state { 30 NOT_ORDERED, 31 ORDERING, 32 ORDERED, 33 }; 34 35 enum btf_dump_type_emit_state { 36 NOT_EMITTED, 37 EMITTING, 38 EMITTED, 39 }; 40 41 /* per-type auxiliary state */ 42 struct btf_dump_type_aux_state { 43 /* topological sorting state */ 44 enum btf_dump_type_order_state order_state: 2; 45 /* emitting state used to determine the need for forward declaration */ 46 enum btf_dump_type_emit_state emit_state: 2; 47 /* whether forward declaration was already emitted */ 48 __u8 fwd_emitted: 1; 49 /* whether unique non-duplicate name was already assigned */ 50 __u8 name_resolved: 1; 51 }; 52 53 struct btf_dump { 54 const struct btf *btf; 55 const struct btf_ext *btf_ext; 56 btf_dump_printf_fn_t printf_fn; 57 struct btf_dump_opts opts; 58 59 /* per-type auxiliary state */ 60 struct btf_dump_type_aux_state *type_states; 61 /* per-type optional cached unique name, must be freed, if present */ 62 const char **cached_names; 63 64 /* topo-sorted list of dependent type definitions */ 65 __u32 *emit_queue; 66 int emit_queue_cap; 67 int emit_queue_cnt; 68 69 /* 70 * stack of type declarations (e.g., chain of modifiers, arrays, 71 * funcs, etc) 72 */ 73 __u32 *decl_stack; 74 int decl_stack_cap; 75 int decl_stack_cnt; 76 77 /* maps struct/union/enum name to a number of name occurrences */ 78 struct hashmap *type_names; 79 /* 80 * maps typedef identifiers and enum value names to a number of such 81 * name occurrences 82 */ 83 struct hashmap *ident_names; 84 }; 85 86 static size_t str_hash_fn(const void *key, void *ctx) 87 { 88 const char *s = key; 89 size_t h = 0; 90 91 while (*s) { 92 h = h * 31 + *s; 93 s++; 94 } 95 return h; 96 } 97 98 static bool str_equal_fn(const void *a, const void *b, void *ctx) 99 { 100 return strcmp(a, b) == 0; 101 } 102 103 static __u16 btf_kind_of(const struct btf_type *t) 104 { 105 return BTF_INFO_KIND(t->info); 106 } 107 108 static __u16 btf_vlen_of(const struct btf_type *t) 109 { 110 return BTF_INFO_VLEN(t->info); 111 } 112 113 static bool btf_kflag_of(const struct btf_type *t) 114 { 115 return BTF_INFO_KFLAG(t->info); 116 } 117 118 static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) 119 { 120 return btf__name_by_offset(d->btf, name_off); 121 } 122 123 static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) 124 { 125 va_list args; 126 127 va_start(args, fmt); 128 d->printf_fn(d->opts.ctx, fmt, args); 129 va_end(args); 130 } 131 132 struct btf_dump *btf_dump__new(const struct btf *btf, 133 const struct btf_ext *btf_ext, 134 const struct btf_dump_opts *opts, 135 btf_dump_printf_fn_t printf_fn) 136 { 137 struct btf_dump *d; 138 int err; 139 140 d = calloc(1, sizeof(struct btf_dump)); 141 if (!d) 142 return ERR_PTR(-ENOMEM); 143 144 d->btf = btf; 145 d->btf_ext = btf_ext; 146 d->printf_fn = printf_fn; 147 d->opts.ctx = opts ? opts->ctx : NULL; 148 149 d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); 150 if (IS_ERR(d->type_names)) { 151 err = PTR_ERR(d->type_names); 152 d->type_names = NULL; 153 btf_dump__free(d); 154 return ERR_PTR(err); 155 } 156 d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); 157 if (IS_ERR(d->ident_names)) { 158 err = PTR_ERR(d->ident_names); 159 d->ident_names = NULL; 160 btf_dump__free(d); 161 return ERR_PTR(err); 162 } 163 164 return d; 165 } 166 167 void btf_dump__free(struct btf_dump *d) 168 { 169 int i, cnt; 170 171 if (!d) 172 return; 173 174 free(d->type_states); 175 if (d->cached_names) { 176 /* any set cached name is owned by us and should be freed */ 177 for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { 178 if (d->cached_names[i]) 179 free((void *)d->cached_names[i]); 180 } 181 } 182 free(d->cached_names); 183 free(d->emit_queue); 184 free(d->decl_stack); 185 hashmap__free(d->type_names); 186 hashmap__free(d->ident_names); 187 188 free(d); 189 } 190 191 static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); 192 static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); 193 194 /* 195 * Dump BTF type in a compilable C syntax, including all the necessary 196 * dependent types, necessary for compilation. If some of the dependent types 197 * were already emitted as part of previous btf_dump__dump_type() invocation 198 * for another type, they won't be emitted again. This API allows callers to 199 * filter out BTF types according to user-defined criterias and emitted only 200 * minimal subset of types, necessary to compile everything. Full struct/union 201 * definitions will still be emitted, even if the only usage is through 202 * pointer and could be satisfied with just a forward declaration. 203 * 204 * Dumping is done in two high-level passes: 205 * 1. Topologically sort type definitions to satisfy C rules of compilation. 206 * 2. Emit type definitions in C syntax. 207 * 208 * Returns 0 on success; <0, otherwise. 209 */ 210 int btf_dump__dump_type(struct btf_dump *d, __u32 id) 211 { 212 int err, i; 213 214 if (id > btf__get_nr_types(d->btf)) 215 return -EINVAL; 216 217 /* type states are lazily allocated, as they might not be needed */ 218 if (!d->type_states) { 219 d->type_states = calloc(1 + btf__get_nr_types(d->btf), 220 sizeof(d->type_states[0])); 221 if (!d->type_states) 222 return -ENOMEM; 223 d->cached_names = calloc(1 + btf__get_nr_types(d->btf), 224 sizeof(d->cached_names[0])); 225 if (!d->cached_names) 226 return -ENOMEM; 227 228 /* VOID is special */ 229 d->type_states[0].order_state = ORDERED; 230 d->type_states[0].emit_state = EMITTED; 231 } 232 233 d->emit_queue_cnt = 0; 234 err = btf_dump_order_type(d, id, false); 235 if (err < 0) 236 return err; 237 238 for (i = 0; i < d->emit_queue_cnt; i++) 239 btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); 240 241 return 0; 242 } 243 244 static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) 245 { 246 __u32 *new_queue; 247 size_t new_cap; 248 249 if (d->emit_queue_cnt >= d->emit_queue_cap) { 250 new_cap = max(16, d->emit_queue_cap * 3 / 2); 251 new_queue = realloc(d->emit_queue, 252 new_cap * sizeof(new_queue[0])); 253 if (!new_queue) 254 return -ENOMEM; 255 d->emit_queue = new_queue; 256 d->emit_queue_cap = new_cap; 257 } 258 259 d->emit_queue[d->emit_queue_cnt++] = id; 260 return 0; 261 } 262 263 /* 264 * Determine order of emitting dependent types and specified type to satisfy 265 * C compilation rules. This is done through topological sorting with an 266 * additional complication which comes from C rules. The main idea for C is 267 * that if some type is "embedded" into a struct/union, it's size needs to be 268 * known at the time of definition of containing type. E.g., for: 269 * 270 * struct A {}; 271 * struct B { struct A x; } 272 * 273 * struct A *HAS* to be defined before struct B, because it's "embedded", 274 * i.e., it is part of struct B layout. But in the following case: 275 * 276 * struct A; 277 * struct B { struct A *x; } 278 * struct A {}; 279 * 280 * it's enough to just have a forward declaration of struct A at the time of 281 * struct B definition, as struct B has a pointer to struct A, so the size of 282 * field x is known without knowing struct A size: it's sizeof(void *). 283 * 284 * Unfortunately, there are some trickier cases we need to handle, e.g.: 285 * 286 * struct A {}; // if this was forward-declaration: compilation error 287 * struct B { 288 * struct { // anonymous struct 289 * struct A y; 290 * } *x; 291 * }; 292 * 293 * In this case, struct B's field x is a pointer, so it's size is known 294 * regardless of the size of (anonymous) struct it points to. But because this 295 * struct is anonymous and thus defined inline inside struct B, *and* it 296 * embeds struct A, compiler requires full definition of struct A to be known 297 * before struct B can be defined. This creates a transitive dependency 298 * between struct A and struct B. If struct A was forward-declared before 299 * struct B definition and fully defined after struct B definition, that would 300 * trigger compilation error. 301 * 302 * All this means that while we are doing topological sorting on BTF type 303 * graph, we need to determine relationships between different types (graph 304 * nodes): 305 * - weak link (relationship) between X and Y, if Y *CAN* be 306 * forward-declared at the point of X definition; 307 * - strong link, if Y *HAS* to be fully-defined before X can be defined. 308 * 309 * The rule is as follows. Given a chain of BTF types from X to Y, if there is 310 * BTF_KIND_PTR type in the chain and at least one non-anonymous type 311 * Z (excluding X, including Y), then link is weak. Otherwise, it's strong. 312 * Weak/strong relationship is determined recursively during DFS traversal and 313 * is returned as a result from btf_dump_order_type(). 314 * 315 * btf_dump_order_type() is trying to avoid unnecessary forward declarations, 316 * but it is not guaranteeing that no extraneous forward declarations will be 317 * emitted. 318 * 319 * To avoid extra work, algorithm marks some of BTF types as ORDERED, when 320 * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT, 321 * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the 322 * entire graph path, so depending where from one came to that BTF type, it 323 * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM, 324 * once they are processed, there is no need to do it again, so they are 325 * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces 326 * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But 327 * in any case, once those are processed, no need to do it again, as the 328 * result won't change. 329 * 330 * Returns: 331 * - 1, if type is part of strong link (so there is strong topological 332 * ordering requirements); 333 * - 0, if type is part of weak link (so can be satisfied through forward 334 * declaration); 335 * - <0, on error (e.g., unsatisfiable type loop detected). 336 */ 337 static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) 338 { 339 /* 340 * Order state is used to detect strong link cycles, but only for BTF 341 * kinds that are or could be an independent definition (i.e., 342 * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays, 343 * func_protos, modifiers are just means to get to these definitions. 344 * Int/void don't need definitions, they are assumed to be always 345 * properly defined. We also ignore datasec, var, and funcs for now. 346 * So for all non-defining kinds, we never even set ordering state, 347 * for defining kinds we set ORDERING and subsequently ORDERED if it 348 * forms a strong link. 349 */ 350 struct btf_dump_type_aux_state *tstate = &d->type_states[id]; 351 const struct btf_type *t; 352 __u16 kind, vlen; 353 int err, i; 354 355 /* return true, letting typedefs know that it's ok to be emitted */ 356 if (tstate->order_state == ORDERED) 357 return 1; 358 359 t = btf__type_by_id(d->btf, id); 360 kind = btf_kind_of(t); 361 362 if (tstate->order_state == ORDERING) { 363 /* type loop, but resolvable through fwd declaration */ 364 if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) && 365 through_ptr && t->name_off != 0) 366 return 0; 367 pr_warning("unsatisfiable type cycle, id:[%u]\n", id); 368 return -ELOOP; 369 } 370 371 switch (kind) { 372 case BTF_KIND_INT: 373 tstate->order_state = ORDERED; 374 return 0; 375 376 case BTF_KIND_PTR: 377 err = btf_dump_order_type(d, t->type, true); 378 tstate->order_state = ORDERED; 379 return err; 380 381 case BTF_KIND_ARRAY: { 382 const struct btf_array *a = (void *)(t + 1); 383 384 return btf_dump_order_type(d, a->type, through_ptr); 385 } 386 case BTF_KIND_STRUCT: 387 case BTF_KIND_UNION: { 388 const struct btf_member *m = (void *)(t + 1); 389 /* 390 * struct/union is part of strong link, only if it's embedded 391 * (so no ptr in a path) or it's anonymous (so has to be 392 * defined inline, even if declared through ptr) 393 */ 394 if (through_ptr && t->name_off != 0) 395 return 0; 396 397 tstate->order_state = ORDERING; 398 399 vlen = btf_vlen_of(t); 400 for (i = 0; i < vlen; i++, m++) { 401 err = btf_dump_order_type(d, m->type, false); 402 if (err < 0) 403 return err; 404 } 405 406 if (t->name_off != 0) { 407 err = btf_dump_add_emit_queue_id(d, id); 408 if (err < 0) 409 return err; 410 } 411 412 tstate->order_state = ORDERED; 413 return 1; 414 } 415 case BTF_KIND_ENUM: 416 case BTF_KIND_FWD: 417 if (t->name_off != 0) { 418 err = btf_dump_add_emit_queue_id(d, id); 419 if (err) 420 return err; 421 } 422 tstate->order_state = ORDERED; 423 return 1; 424 425 case BTF_KIND_TYPEDEF: { 426 int is_strong; 427 428 is_strong = btf_dump_order_type(d, t->type, through_ptr); 429 if (is_strong < 0) 430 return is_strong; 431 432 /* typedef is similar to struct/union w.r.t. fwd-decls */ 433 if (through_ptr && !is_strong) 434 return 0; 435 436 /* typedef is always a named definition */ 437 err = btf_dump_add_emit_queue_id(d, id); 438 if (err) 439 return err; 440 441 d->type_states[id].order_state = ORDERED; 442 return 1; 443 } 444 case BTF_KIND_VOLATILE: 445 case BTF_KIND_CONST: 446 case BTF_KIND_RESTRICT: 447 return btf_dump_order_type(d, t->type, through_ptr); 448 449 case BTF_KIND_FUNC_PROTO: { 450 const struct btf_param *p = (void *)(t + 1); 451 bool is_strong; 452 453 err = btf_dump_order_type(d, t->type, through_ptr); 454 if (err < 0) 455 return err; 456 is_strong = err > 0; 457 458 vlen = btf_vlen_of(t); 459 for (i = 0; i < vlen; i++, p++) { 460 err = btf_dump_order_type(d, p->type, through_ptr); 461 if (err < 0) 462 return err; 463 if (err > 0) 464 is_strong = true; 465 } 466 return is_strong; 467 } 468 case BTF_KIND_FUNC: 469 case BTF_KIND_VAR: 470 case BTF_KIND_DATASEC: 471 d->type_states[id].order_state = ORDERED; 472 return 0; 473 474 default: 475 return -EINVAL; 476 } 477 } 478 479 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, 480 const struct btf_type *t); 481 static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, 482 const struct btf_type *t, int lvl); 483 484 static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, 485 const struct btf_type *t); 486 static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, 487 const struct btf_type *t, int lvl); 488 489 static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, 490 const struct btf_type *t); 491 492 static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, 493 const struct btf_type *t, int lvl); 494 495 /* a local view into a shared stack */ 496 struct id_stack { 497 const __u32 *ids; 498 int cnt; 499 }; 500 501 static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, 502 const char *fname, int lvl); 503 static void btf_dump_emit_type_chain(struct btf_dump *d, 504 struct id_stack *decl_stack, 505 const char *fname, int lvl); 506 507 static const char *btf_dump_type_name(struct btf_dump *d, __u32 id); 508 static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id); 509 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, 510 const char *orig_name); 511 512 static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id) 513 { 514 const struct btf_type *t = btf__type_by_id(d->btf, id); 515 516 /* __builtin_va_list is a compiler built-in, which causes compilation 517 * errors, when compiling w/ different compiler, then used to compile 518 * original code (e.g., GCC to compile kernel, Clang to use generated 519 * C header from BTF). As it is built-in, it should be already defined 520 * properly internally in compiler. 521 */ 522 if (t->name_off == 0) 523 return false; 524 return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0; 525 } 526 527 /* 528 * Emit C-syntax definitions of types from chains of BTF types. 529 * 530 * High-level handling of determining necessary forward declarations are handled 531 * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type 532 * declarations/definitions in C syntax are handled by a combo of 533 * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to 534 * corresponding btf_dump_emit_*_{def,fwd}() functions. 535 * 536 * We also keep track of "containing struct/union type ID" to determine when 537 * we reference it from inside and thus can avoid emitting unnecessary forward 538 * declaration. 539 * 540 * This algorithm is designed in such a way, that even if some error occurs 541 * (either technical, e.g., out of memory, or logical, i.e., malformed BTF 542 * that doesn't comply to C rules completely), algorithm will try to proceed 543 * and produce as much meaningful output as possible. 544 */ 545 static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) 546 { 547 struct btf_dump_type_aux_state *tstate = &d->type_states[id]; 548 bool top_level_def = cont_id == 0; 549 const struct btf_type *t; 550 __u16 kind; 551 552 if (tstate->emit_state == EMITTED) 553 return; 554 555 t = btf__type_by_id(d->btf, id); 556 kind = btf_kind_of(t); 557 558 if (top_level_def && t->name_off == 0) { 559 pr_warning("unexpected nameless definition, id:[%u]\n", id); 560 return; 561 } 562 563 if (tstate->emit_state == EMITTING) { 564 if (tstate->fwd_emitted) 565 return; 566 567 switch (kind) { 568 case BTF_KIND_STRUCT: 569 case BTF_KIND_UNION: 570 /* 571 * if we are referencing a struct/union that we are 572 * part of - then no need for fwd declaration 573 */ 574 if (id == cont_id) 575 return; 576 if (t->name_off == 0) { 577 pr_warning("anonymous struct/union loop, id:[%u]\n", 578 id); 579 return; 580 } 581 btf_dump_emit_struct_fwd(d, id, t); 582 btf_dump_printf(d, ";\n\n"); 583 tstate->fwd_emitted = 1; 584 break; 585 case BTF_KIND_TYPEDEF: 586 /* 587 * for typedef fwd_emitted means typedef definition 588 * was emitted, but it can be used only for "weak" 589 * references through pointer only, not for embedding 590 */ 591 if (!btf_dump_is_blacklisted(d, id)) { 592 btf_dump_emit_typedef_def(d, id, t, 0); 593 btf_dump_printf(d, ";\n\n"); 594 }; 595 tstate->fwd_emitted = 1; 596 break; 597 default: 598 break; 599 } 600 601 return; 602 } 603 604 switch (kind) { 605 case BTF_KIND_INT: 606 tstate->emit_state = EMITTED; 607 break; 608 case BTF_KIND_ENUM: 609 if (top_level_def) { 610 btf_dump_emit_enum_def(d, id, t, 0); 611 btf_dump_printf(d, ";\n\n"); 612 } 613 tstate->emit_state = EMITTED; 614 break; 615 case BTF_KIND_PTR: 616 case BTF_KIND_VOLATILE: 617 case BTF_KIND_CONST: 618 case BTF_KIND_RESTRICT: 619 btf_dump_emit_type(d, t->type, cont_id); 620 break; 621 case BTF_KIND_ARRAY: { 622 const struct btf_array *a = (void *)(t + 1); 623 624 btf_dump_emit_type(d, a->type, cont_id); 625 break; 626 } 627 case BTF_KIND_FWD: 628 btf_dump_emit_fwd_def(d, id, t); 629 btf_dump_printf(d, ";\n\n"); 630 tstate->emit_state = EMITTED; 631 break; 632 case BTF_KIND_TYPEDEF: 633 tstate->emit_state = EMITTING; 634 btf_dump_emit_type(d, t->type, id); 635 /* 636 * typedef can server as both definition and forward 637 * declaration; at this stage someone depends on 638 * typedef as a forward declaration (refers to it 639 * through pointer), so unless we already did it, 640 * emit typedef as a forward declaration 641 */ 642 if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) { 643 btf_dump_emit_typedef_def(d, id, t, 0); 644 btf_dump_printf(d, ";\n\n"); 645 } 646 tstate->emit_state = EMITTED; 647 break; 648 case BTF_KIND_STRUCT: 649 case BTF_KIND_UNION: 650 tstate->emit_state = EMITTING; 651 /* if it's a top-level struct/union definition or struct/union 652 * is anonymous, then in C we'll be emitting all fields and 653 * their types (as opposed to just `struct X`), so we need to 654 * make sure that all types, referenced from struct/union 655 * members have necessary forward-declarations, where 656 * applicable 657 */ 658 if (top_level_def || t->name_off == 0) { 659 const struct btf_member *m = (void *)(t + 1); 660 __u16 vlen = btf_vlen_of(t); 661 int i, new_cont_id; 662 663 new_cont_id = t->name_off == 0 ? cont_id : id; 664 for (i = 0; i < vlen; i++, m++) 665 btf_dump_emit_type(d, m->type, new_cont_id); 666 } else if (!tstate->fwd_emitted && id != cont_id) { 667 btf_dump_emit_struct_fwd(d, id, t); 668 btf_dump_printf(d, ";\n\n"); 669 tstate->fwd_emitted = 1; 670 } 671 672 if (top_level_def) { 673 btf_dump_emit_struct_def(d, id, t, 0); 674 btf_dump_printf(d, ";\n\n"); 675 tstate->emit_state = EMITTED; 676 } else { 677 tstate->emit_state = NOT_EMITTED; 678 } 679 break; 680 case BTF_KIND_FUNC_PROTO: { 681 const struct btf_param *p = (void *)(t + 1); 682 __u16 vlen = btf_vlen_of(t); 683 int i; 684 685 btf_dump_emit_type(d, t->type, cont_id); 686 for (i = 0; i < vlen; i++, p++) 687 btf_dump_emit_type(d, p->type, cont_id); 688 689 break; 690 } 691 default: 692 break; 693 } 694 } 695 696 static int btf_align_of(const struct btf *btf, __u32 id) 697 { 698 const struct btf_type *t = btf__type_by_id(btf, id); 699 __u16 kind = btf_kind_of(t); 700 701 switch (kind) { 702 case BTF_KIND_INT: 703 case BTF_KIND_ENUM: 704 return min(sizeof(void *), t->size); 705 case BTF_KIND_PTR: 706 return sizeof(void *); 707 case BTF_KIND_TYPEDEF: 708 case BTF_KIND_VOLATILE: 709 case BTF_KIND_CONST: 710 case BTF_KIND_RESTRICT: 711 return btf_align_of(btf, t->type); 712 case BTF_KIND_ARRAY: { 713 const struct btf_array *a = (void *)(t + 1); 714 715 return btf_align_of(btf, a->type); 716 } 717 case BTF_KIND_STRUCT: 718 case BTF_KIND_UNION: { 719 const struct btf_member *m = (void *)(t + 1); 720 __u16 vlen = btf_vlen_of(t); 721 int i, align = 1; 722 723 for (i = 0; i < vlen; i++, m++) 724 align = max(align, btf_align_of(btf, m->type)); 725 726 return align; 727 } 728 default: 729 pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t)); 730 return 1; 731 } 732 } 733 734 static bool btf_is_struct_packed(const struct btf *btf, __u32 id, 735 const struct btf_type *t) 736 { 737 const struct btf_member *m; 738 int align, i, bit_sz; 739 __u16 vlen; 740 bool kflag; 741 742 align = btf_align_of(btf, id); 743 /* size of a non-packed struct has to be a multiple of its alignment*/ 744 if (t->size % align) 745 return true; 746 747 m = (void *)(t + 1); 748 kflag = btf_kflag_of(t); 749 vlen = btf_vlen_of(t); 750 /* all non-bitfield fields have to be naturally aligned */ 751 for (i = 0; i < vlen; i++, m++) { 752 align = btf_align_of(btf, m->type); 753 bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; 754 if (bit_sz == 0 && m->offset % (8 * align) != 0) 755 return true; 756 } 757 758 /* 759 * if original struct was marked as packed, but its layout is 760 * naturally aligned, we'll detect that it's not packed 761 */ 762 return false; 763 } 764 765 static int chip_away_bits(int total, int at_most) 766 { 767 return total % at_most ? : at_most; 768 } 769 770 static void btf_dump_emit_bit_padding(const struct btf_dump *d, 771 int cur_off, int m_off, int m_bit_sz, 772 int align, int lvl) 773 { 774 int off_diff = m_off - cur_off; 775 int ptr_bits = sizeof(void *) * 8; 776 777 if (off_diff <= 0) 778 /* no gap */ 779 return; 780 if (m_bit_sz == 0 && off_diff < align * 8) 781 /* natural padding will take care of a gap */ 782 return; 783 784 while (off_diff > 0) { 785 const char *pad_type; 786 int pad_bits; 787 788 if (ptr_bits > 32 && off_diff > 32) { 789 pad_type = "long"; 790 pad_bits = chip_away_bits(off_diff, ptr_bits); 791 } else if (off_diff > 16) { 792 pad_type = "int"; 793 pad_bits = chip_away_bits(off_diff, 32); 794 } else if (off_diff > 8) { 795 pad_type = "short"; 796 pad_bits = chip_away_bits(off_diff, 16); 797 } else { 798 pad_type = "char"; 799 pad_bits = chip_away_bits(off_diff, 8); 800 } 801 btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); 802 off_diff -= pad_bits; 803 } 804 } 805 806 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, 807 const struct btf_type *t) 808 { 809 btf_dump_printf(d, "%s %s", 810 btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union", 811 btf_dump_type_name(d, id)); 812 } 813 814 static void btf_dump_emit_struct_def(struct btf_dump *d, 815 __u32 id, 816 const struct btf_type *t, 817 int lvl) 818 { 819 const struct btf_member *m = (void *)(t + 1); 820 bool kflag = btf_kflag_of(t), is_struct; 821 int align, i, packed, off = 0; 822 __u16 vlen = btf_vlen_of(t); 823 824 is_struct = btf_kind_of(t) == BTF_KIND_STRUCT; 825 packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; 826 align = packed ? 1 : btf_align_of(d->btf, id); 827 828 btf_dump_printf(d, "%s%s%s {", 829 is_struct ? "struct" : "union", 830 t->name_off ? " " : "", 831 btf_dump_type_name(d, id)); 832 833 for (i = 0; i < vlen; i++, m++) { 834 const char *fname; 835 int m_off, m_sz; 836 837 fname = btf_name_of(d, m->name_off); 838 m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; 839 m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset; 840 align = packed ? 1 : btf_align_of(d->btf, m->type); 841 842 btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); 843 btf_dump_printf(d, "\n%s", pfx(lvl + 1)); 844 btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); 845 846 if (m_sz) { 847 btf_dump_printf(d, ": %d", m_sz); 848 off = m_off + m_sz; 849 } else { 850 m_sz = max(0, btf__resolve_size(d->btf, m->type)); 851 off = m_off + m_sz * 8; 852 } 853 btf_dump_printf(d, ";"); 854 } 855 856 if (vlen) 857 btf_dump_printf(d, "\n"); 858 btf_dump_printf(d, "%s}", pfx(lvl)); 859 if (packed) 860 btf_dump_printf(d, " __attribute__((packed))"); 861 } 862 863 static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, 864 const struct btf_type *t) 865 { 866 btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); 867 } 868 869 static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, 870 const struct btf_type *t, 871 int lvl) 872 { 873 const struct btf_enum *v = (void *)(t+1); 874 __u16 vlen = btf_vlen_of(t); 875 const char *name; 876 size_t dup_cnt; 877 int i; 878 879 btf_dump_printf(d, "enum%s%s", 880 t->name_off ? " " : "", 881 btf_dump_type_name(d, id)); 882 883 if (vlen) { 884 btf_dump_printf(d, " {"); 885 for (i = 0; i < vlen; i++, v++) { 886 name = btf_name_of(d, v->name_off); 887 /* enumerators share namespace with typedef idents */ 888 dup_cnt = btf_dump_name_dups(d, d->ident_names, name); 889 if (dup_cnt > 1) { 890 btf_dump_printf(d, "\n%s%s___%zu = %d,", 891 pfx(lvl + 1), name, dup_cnt, 892 (__s32)v->val); 893 } else { 894 btf_dump_printf(d, "\n%s%s = %d,", 895 pfx(lvl + 1), name, 896 (__s32)v->val); 897 } 898 } 899 btf_dump_printf(d, "\n%s}", pfx(lvl)); 900 } 901 } 902 903 static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, 904 const struct btf_type *t) 905 { 906 const char *name = btf_dump_type_name(d, id); 907 908 if (btf_kflag_of(t)) 909 btf_dump_printf(d, "union %s", name); 910 else 911 btf_dump_printf(d, "struct %s", name); 912 } 913 914 static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, 915 const struct btf_type *t, int lvl) 916 { 917 const char *name = btf_dump_ident_name(d, id); 918 919 btf_dump_printf(d, "typedef "); 920 btf_dump_emit_type_decl(d, t->type, name, lvl); 921 } 922 923 static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) 924 { 925 __u32 *new_stack; 926 size_t new_cap; 927 928 if (d->decl_stack_cnt >= d->decl_stack_cap) { 929 new_cap = max(16, d->decl_stack_cap * 3 / 2); 930 new_stack = realloc(d->decl_stack, 931 new_cap * sizeof(new_stack[0])); 932 if (!new_stack) 933 return -ENOMEM; 934 d->decl_stack = new_stack; 935 d->decl_stack_cap = new_cap; 936 } 937 938 d->decl_stack[d->decl_stack_cnt++] = id; 939 940 return 0; 941 } 942 943 /* 944 * Emit type declaration (e.g., field type declaration in a struct or argument 945 * declaration in function prototype) in correct C syntax. 946 * 947 * For most types it's trivial, but there are few quirky type declaration 948 * cases worth mentioning: 949 * - function prototypes (especially nesting of function prototypes); 950 * - arrays; 951 * - const/volatile/restrict for pointers vs other types. 952 * 953 * For a good discussion of *PARSING* C syntax (as a human), see 954 * Peter van der Linden's "Expert C Programming: Deep C Secrets", 955 * Ch.3 "Unscrambling Declarations in C". 956 * 957 * It won't help with BTF to C conversion much, though, as it's an opposite 958 * problem. So we came up with this algorithm in reverse to van der Linden's 959 * parsing algorithm. It goes from structured BTF representation of type 960 * declaration to a valid compilable C syntax. 961 * 962 * For instance, consider this C typedef: 963 * typedef const int * const * arr[10] arr_t; 964 * It will be represented in BTF with this chain of BTF types: 965 * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int] 966 * 967 * Notice how [const] modifier always goes before type it modifies in BTF type 968 * graph, but in C syntax, const/volatile/restrict modifiers are written to 969 * the right of pointers, but to the left of other types. There are also other 970 * quirks, like function pointers, arrays of them, functions returning other 971 * functions, etc. 972 * 973 * We handle that by pushing all the types to a stack, until we hit "terminal" 974 * type (int/enum/struct/union/fwd). Then depending on the kind of a type on 975 * top of a stack, modifiers are handled differently. Array/function pointers 976 * have also wildly different syntax and how nesting of them are done. See 977 * code for authoritative definition. 978 * 979 * To avoid allocating new stack for each independent chain of BTF types, we 980 * share one bigger stack, with each chain working only on its own local view 981 * of a stack frame. Some care is required to "pop" stack frames after 982 * processing type declaration chain. 983 */ 984 static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, 985 const char *fname, int lvl) 986 { 987 struct id_stack decl_stack; 988 const struct btf_type *t; 989 int err, stack_start; 990 __u16 kind; 991 992 stack_start = d->decl_stack_cnt; 993 for (;;) { 994 err = btf_dump_push_decl_stack_id(d, id); 995 if (err < 0) { 996 /* 997 * if we don't have enough memory for entire type decl 998 * chain, restore stack, emit warning, and try to 999 * proceed nevertheless 1000 */ 1001 pr_warning("not enough memory for decl stack:%d", err); 1002 d->decl_stack_cnt = stack_start; 1003 return; 1004 } 1005 1006 /* VOID */ 1007 if (id == 0) 1008 break; 1009 1010 t = btf__type_by_id(d->btf, id); 1011 kind = btf_kind_of(t); 1012 switch (kind) { 1013 case BTF_KIND_PTR: 1014 case BTF_KIND_VOLATILE: 1015 case BTF_KIND_CONST: 1016 case BTF_KIND_RESTRICT: 1017 case BTF_KIND_FUNC_PROTO: 1018 id = t->type; 1019 break; 1020 case BTF_KIND_ARRAY: { 1021 const struct btf_array *a = (void *)(t + 1); 1022 1023 id = a->type; 1024 break; 1025 } 1026 case BTF_KIND_INT: 1027 case BTF_KIND_ENUM: 1028 case BTF_KIND_FWD: 1029 case BTF_KIND_STRUCT: 1030 case BTF_KIND_UNION: 1031 case BTF_KIND_TYPEDEF: 1032 goto done; 1033 default: 1034 pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", 1035 kind, id); 1036 goto done; 1037 } 1038 } 1039 done: 1040 /* 1041 * We might be inside a chain of declarations (e.g., array of function 1042 * pointers returning anonymous (so inlined) structs, having another 1043 * array field). Each of those needs its own "stack frame" to handle 1044 * emitting of declarations. Those stack frames are non-overlapping 1045 * portions of shared btf_dump->decl_stack. To make it a bit nicer to 1046 * handle this set of nested stacks, we create a view corresponding to 1047 * our own "stack frame" and work with it as an independent stack. 1048 * We'll need to clean up after emit_type_chain() returns, though. 1049 */ 1050 decl_stack.ids = d->decl_stack + stack_start; 1051 decl_stack.cnt = d->decl_stack_cnt - stack_start; 1052 btf_dump_emit_type_chain(d, &decl_stack, fname, lvl); 1053 /* 1054 * emit_type_chain() guarantees that it will pop its entire decl_stack 1055 * frame before returning. But it works with a read-only view into 1056 * decl_stack, so it doesn't actually pop anything from the 1057 * perspective of shared btf_dump->decl_stack, per se. We need to 1058 * reset decl_stack state to how it was before us to avoid it growing 1059 * all the time. 1060 */ 1061 d->decl_stack_cnt = stack_start; 1062 } 1063 1064 static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) 1065 { 1066 const struct btf_type *t; 1067 __u32 id; 1068 1069 while (decl_stack->cnt) { 1070 id = decl_stack->ids[decl_stack->cnt - 1]; 1071 t = btf__type_by_id(d->btf, id); 1072 1073 switch (btf_kind_of(t)) { 1074 case BTF_KIND_VOLATILE: 1075 btf_dump_printf(d, "volatile "); 1076 break; 1077 case BTF_KIND_CONST: 1078 btf_dump_printf(d, "const "); 1079 break; 1080 case BTF_KIND_RESTRICT: 1081 btf_dump_printf(d, "restrict "); 1082 break; 1083 default: 1084 return; 1085 } 1086 decl_stack->cnt--; 1087 } 1088 } 1089 1090 static bool btf_is_mod_kind(const struct btf *btf, __u32 id) 1091 { 1092 const struct btf_type *t = btf__type_by_id(btf, id); 1093 1094 switch (btf_kind_of(t)) { 1095 case BTF_KIND_VOLATILE: 1096 case BTF_KIND_CONST: 1097 case BTF_KIND_RESTRICT: 1098 return true; 1099 default: 1100 return false; 1101 } 1102 } 1103 1104 static void btf_dump_emit_name(const struct btf_dump *d, 1105 const char *name, bool last_was_ptr) 1106 { 1107 bool separate = name[0] && !last_was_ptr; 1108 1109 btf_dump_printf(d, "%s%s", separate ? " " : "", name); 1110 } 1111 1112 static void btf_dump_emit_type_chain(struct btf_dump *d, 1113 struct id_stack *decls, 1114 const char *fname, int lvl) 1115 { 1116 /* 1117 * last_was_ptr is used to determine if we need to separate pointer 1118 * asterisk (*) from previous part of type signature with space, so 1119 * that we get `int ***`, instead of `int * * *`. We default to true 1120 * for cases where we have single pointer in a chain. E.g., in ptr -> 1121 * func_proto case. func_proto will start a new emit_type_chain call 1122 * with just ptr, which should be emitted as (*) or (*<fname>), so we 1123 * don't want to prepend space for that last pointer. 1124 */ 1125 bool last_was_ptr = true; 1126 const struct btf_type *t; 1127 const char *name; 1128 __u16 kind; 1129 __u32 id; 1130 1131 while (decls->cnt) { 1132 id = decls->ids[--decls->cnt]; 1133 if (id == 0) { 1134 /* VOID is a special snowflake */ 1135 btf_dump_emit_mods(d, decls); 1136 btf_dump_printf(d, "void"); 1137 last_was_ptr = false; 1138 continue; 1139 } 1140 1141 t = btf__type_by_id(d->btf, id); 1142 kind = btf_kind_of(t); 1143 1144 switch (kind) { 1145 case BTF_KIND_INT: 1146 btf_dump_emit_mods(d, decls); 1147 name = btf_name_of(d, t->name_off); 1148 btf_dump_printf(d, "%s", name); 1149 break; 1150 case BTF_KIND_STRUCT: 1151 case BTF_KIND_UNION: 1152 btf_dump_emit_mods(d, decls); 1153 /* inline anonymous struct/union */ 1154 if (t->name_off == 0) 1155 btf_dump_emit_struct_def(d, id, t, lvl); 1156 else 1157 btf_dump_emit_struct_fwd(d, id, t); 1158 break; 1159 case BTF_KIND_ENUM: 1160 btf_dump_emit_mods(d, decls); 1161 /* inline anonymous enum */ 1162 if (t->name_off == 0) 1163 btf_dump_emit_enum_def(d, id, t, lvl); 1164 else 1165 btf_dump_emit_enum_fwd(d, id, t); 1166 break; 1167 case BTF_KIND_FWD: 1168 btf_dump_emit_mods(d, decls); 1169 btf_dump_emit_fwd_def(d, id, t); 1170 break; 1171 case BTF_KIND_TYPEDEF: 1172 btf_dump_emit_mods(d, decls); 1173 btf_dump_printf(d, "%s", btf_dump_ident_name(d, id)); 1174 break; 1175 case BTF_KIND_PTR: 1176 btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *"); 1177 break; 1178 case BTF_KIND_VOLATILE: 1179 btf_dump_printf(d, " volatile"); 1180 break; 1181 case BTF_KIND_CONST: 1182 btf_dump_printf(d, " const"); 1183 break; 1184 case BTF_KIND_RESTRICT: 1185 btf_dump_printf(d, " restrict"); 1186 break; 1187 case BTF_KIND_ARRAY: { 1188 const struct btf_array *a = (void *)(t + 1); 1189 const struct btf_type *next_t; 1190 __u32 next_id; 1191 bool multidim; 1192 /* 1193 * GCC has a bug 1194 * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354) 1195 * which causes it to emit extra const/volatile 1196 * modifiers for an array, if array's element type has 1197 * const/volatile modifiers. Clang doesn't do that. 1198 * In general, it doesn't seem very meaningful to have 1199 * a const/volatile modifier for array, so we are 1200 * going to silently skip them here. 1201 */ 1202 while (decls->cnt) { 1203 next_id = decls->ids[decls->cnt - 1]; 1204 if (btf_is_mod_kind(d->btf, next_id)) 1205 decls->cnt--; 1206 else 1207 break; 1208 } 1209 1210 if (decls->cnt == 0) { 1211 btf_dump_emit_name(d, fname, last_was_ptr); 1212 btf_dump_printf(d, "[%u]", a->nelems); 1213 return; 1214 } 1215 1216 next_t = btf__type_by_id(d->btf, next_id); 1217 multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY; 1218 /* we need space if we have named non-pointer */ 1219 if (fname[0] && !last_was_ptr) 1220 btf_dump_printf(d, " "); 1221 /* no parentheses for multi-dimensional array */ 1222 if (!multidim) 1223 btf_dump_printf(d, "("); 1224 btf_dump_emit_type_chain(d, decls, fname, lvl); 1225 if (!multidim) 1226 btf_dump_printf(d, ")"); 1227 btf_dump_printf(d, "[%u]", a->nelems); 1228 return; 1229 } 1230 case BTF_KIND_FUNC_PROTO: { 1231 const struct btf_param *p = (void *)(t + 1); 1232 __u16 vlen = btf_vlen_of(t); 1233 int i; 1234 1235 btf_dump_emit_mods(d, decls); 1236 if (decls->cnt) { 1237 btf_dump_printf(d, " ("); 1238 btf_dump_emit_type_chain(d, decls, fname, lvl); 1239 btf_dump_printf(d, ")"); 1240 } else { 1241 btf_dump_emit_name(d, fname, last_was_ptr); 1242 } 1243 btf_dump_printf(d, "("); 1244 /* 1245 * Clang for BPF target generates func_proto with no 1246 * args as a func_proto with a single void arg (e.g., 1247 * `int (*f)(void)` vs just `int (*f)()`). We are 1248 * going to pretend there are no args for such case. 1249 */ 1250 if (vlen == 1 && p->type == 0) { 1251 btf_dump_printf(d, ")"); 1252 return; 1253 } 1254 1255 for (i = 0; i < vlen; i++, p++) { 1256 if (i > 0) 1257 btf_dump_printf(d, ", "); 1258 1259 /* last arg of type void is vararg */ 1260 if (i == vlen - 1 && p->type == 0) { 1261 btf_dump_printf(d, "..."); 1262 break; 1263 } 1264 1265 name = btf_name_of(d, p->name_off); 1266 btf_dump_emit_type_decl(d, p->type, name, lvl); 1267 } 1268 1269 btf_dump_printf(d, ")"); 1270 return; 1271 } 1272 default: 1273 pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", 1274 kind, id); 1275 return; 1276 } 1277 1278 last_was_ptr = kind == BTF_KIND_PTR; 1279 } 1280 1281 btf_dump_emit_name(d, fname, last_was_ptr); 1282 } 1283 1284 /* return number of duplicates (occurrences) of a given name */ 1285 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, 1286 const char *orig_name) 1287 { 1288 size_t dup_cnt = 0; 1289 1290 hashmap__find(name_map, orig_name, (void **)&dup_cnt); 1291 dup_cnt++; 1292 hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); 1293 1294 return dup_cnt; 1295 } 1296 1297 static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, 1298 struct hashmap *name_map) 1299 { 1300 struct btf_dump_type_aux_state *s = &d->type_states[id]; 1301 const struct btf_type *t = btf__type_by_id(d->btf, id); 1302 const char *orig_name = btf_name_of(d, t->name_off); 1303 const char **cached_name = &d->cached_names[id]; 1304 size_t dup_cnt; 1305 1306 if (t->name_off == 0) 1307 return ""; 1308 1309 if (s->name_resolved) 1310 return *cached_name ? *cached_name : orig_name; 1311 1312 dup_cnt = btf_dump_name_dups(d, name_map, orig_name); 1313 if (dup_cnt > 1) { 1314 const size_t max_len = 256; 1315 char new_name[max_len]; 1316 1317 snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt); 1318 *cached_name = strdup(new_name); 1319 } 1320 1321 s->name_resolved = 1; 1322 return *cached_name ? *cached_name : orig_name; 1323 } 1324 1325 static const char *btf_dump_type_name(struct btf_dump *d, __u32 id) 1326 { 1327 return btf_dump_resolve_name(d, id, d->type_names); 1328 } 1329 1330 static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) 1331 { 1332 return btf_dump_resolve_name(d, id, d->ident_names); 1333 } 1334