1fd045f6cSArd Biesheuvel /* 224af6c4eSArd Biesheuvel * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 3fd045f6cSArd Biesheuvel * 4fd045f6cSArd Biesheuvel * This program is free software; you can redistribute it and/or modify 5fd045f6cSArd Biesheuvel * it under the terms of the GNU General Public License version 2 as 6fd045f6cSArd Biesheuvel * published by the Free Software Foundation. 7fd045f6cSArd Biesheuvel */ 8fd045f6cSArd Biesheuvel 9fd045f6cSArd Biesheuvel #include <linux/elf.h> 10fd045f6cSArd Biesheuvel #include <linux/kernel.h> 11fd045f6cSArd Biesheuvel #include <linux/module.h> 12fd045f6cSArd Biesheuvel #include <linux/sort.h> 13fd045f6cSArd Biesheuvel 14fd045f6cSArd Biesheuvel struct plt_entry { 15fd045f6cSArd Biesheuvel /* 16fd045f6cSArd Biesheuvel * A program that conforms to the AArch64 Procedure Call Standard 17fd045f6cSArd Biesheuvel * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or 18fd045f6cSArd Biesheuvel * IP1 (x17) may be inserted at any branch instruction that is 19fd045f6cSArd Biesheuvel * exposed to a relocation that supports long branches. Since that 20fd045f6cSArd Biesheuvel * is exactly what we are dealing with here, we are free to use x16 21fd045f6cSArd Biesheuvel * as a scratch register in the PLT veneers. 22fd045f6cSArd Biesheuvel */ 23fd045f6cSArd Biesheuvel __le32 mov0; /* movn x16, #0x.... */ 24fd045f6cSArd Biesheuvel __le32 mov1; /* movk x16, #0x...., lsl #16 */ 25fd045f6cSArd Biesheuvel __le32 mov2; /* movk x16, #0x...., lsl #32 */ 26fd045f6cSArd Biesheuvel __le32 br; /* br x16 */ 27fd045f6cSArd Biesheuvel }; 28fd045f6cSArd Biesheuvel 2924af6c4eSArd Biesheuvel static bool in_init(const struct module *mod, void *loc) 30fd045f6cSArd Biesheuvel { 3124af6c4eSArd Biesheuvel return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; 32fd045f6cSArd Biesheuvel } 33fd045f6cSArd Biesheuvel 3424af6c4eSArd Biesheuvel u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, 3524af6c4eSArd Biesheuvel Elf64_Sym *sym) 3624af6c4eSArd Biesheuvel { 3724af6c4eSArd Biesheuvel struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : 3824af6c4eSArd Biesheuvel &mod->arch.init; 3924af6c4eSArd Biesheuvel struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; 4024af6c4eSArd Biesheuvel int i = pltsec->plt_num_entries; 4124af6c4eSArd Biesheuvel u64 val = sym->st_value + rela->r_addend; 42fd045f6cSArd Biesheuvel 43fd045f6cSArd Biesheuvel /* 44fd045f6cSArd Biesheuvel * MOVK/MOVN/MOVZ opcode: 45fd045f6cSArd Biesheuvel * +--------+------------+--------+-----------+-------------+---------+ 46fd045f6cSArd Biesheuvel * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | 47fd045f6cSArd Biesheuvel * +--------+------------+--------+-----------+-------------+---------+ 48fd045f6cSArd Biesheuvel * 49fd045f6cSArd Biesheuvel * Rd := 0x10 (x16) 50fd045f6cSArd Biesheuvel * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) 51fd045f6cSArd Biesheuvel * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) 52fd045f6cSArd Biesheuvel * sf := 1 (64-bit variant) 53fd045f6cSArd Biesheuvel */ 54fd045f6cSArd Biesheuvel plt[i] = (struct plt_entry){ 55fd045f6cSArd Biesheuvel cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), 56fd045f6cSArd Biesheuvel cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), 57fd045f6cSArd Biesheuvel cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), 58fd045f6cSArd Biesheuvel cpu_to_le32(0xd61f0200) 59fd045f6cSArd Biesheuvel }; 60fd045f6cSArd Biesheuvel 6124af6c4eSArd Biesheuvel /* 6224af6c4eSArd Biesheuvel * Check if the entry we just created is a duplicate. Given that the 6324af6c4eSArd Biesheuvel * relocations are sorted, this will be the last entry we allocated. 6424af6c4eSArd Biesheuvel * (if one exists). 6524af6c4eSArd Biesheuvel */ 6624af6c4eSArd Biesheuvel if (i > 0 && 6724af6c4eSArd Biesheuvel plt[i].mov0 == plt[i - 1].mov0 && 6824af6c4eSArd Biesheuvel plt[i].mov1 == plt[i - 1].mov1 && 6924af6c4eSArd Biesheuvel plt[i].mov2 == plt[i - 1].mov2) 7024af6c4eSArd Biesheuvel return (u64)&plt[i - 1]; 7124af6c4eSArd Biesheuvel 7224af6c4eSArd Biesheuvel pltsec->plt_num_entries++; 7324af6c4eSArd Biesheuvel BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); 74fd045f6cSArd Biesheuvel 75fd045f6cSArd Biesheuvel return (u64)&plt[i]; 76fd045f6cSArd Biesheuvel } 77fd045f6cSArd Biesheuvel 78fd045f6cSArd Biesheuvel #define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) 79fd045f6cSArd Biesheuvel 80fd045f6cSArd Biesheuvel static int cmp_rela(const void *a, const void *b) 81fd045f6cSArd Biesheuvel { 82fd045f6cSArd Biesheuvel const Elf64_Rela *x = a, *y = b; 83fd045f6cSArd Biesheuvel int i; 84fd045f6cSArd Biesheuvel 85fd045f6cSArd Biesheuvel /* sort by type, symbol index and addend */ 86fd045f6cSArd Biesheuvel i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); 87fd045f6cSArd Biesheuvel if (i == 0) 88fd045f6cSArd Biesheuvel i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); 89fd045f6cSArd Biesheuvel if (i == 0) 90fd045f6cSArd Biesheuvel i = cmp_3way(x->r_addend, y->r_addend); 91fd045f6cSArd Biesheuvel return i; 92fd045f6cSArd Biesheuvel } 93fd045f6cSArd Biesheuvel 94fd045f6cSArd Biesheuvel static bool duplicate_rel(const Elf64_Rela *rela, int num) 95fd045f6cSArd Biesheuvel { 96fd045f6cSArd Biesheuvel /* 97fd045f6cSArd Biesheuvel * Entries are sorted by type, symbol index and addend. That means 98fd045f6cSArd Biesheuvel * that, if a duplicate entry exists, it must be in the preceding 99fd045f6cSArd Biesheuvel * slot. 100fd045f6cSArd Biesheuvel */ 101fd045f6cSArd Biesheuvel return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; 102fd045f6cSArd Biesheuvel } 103fd045f6cSArd Biesheuvel 10424af6c4eSArd Biesheuvel static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, 10524af6c4eSArd Biesheuvel Elf64_Word dstidx) 106fd045f6cSArd Biesheuvel { 107fd045f6cSArd Biesheuvel unsigned int ret = 0; 108fd045f6cSArd Biesheuvel Elf64_Sym *s; 109fd045f6cSArd Biesheuvel int i; 110fd045f6cSArd Biesheuvel 111fd045f6cSArd Biesheuvel for (i = 0; i < num; i++) { 112fd045f6cSArd Biesheuvel switch (ELF64_R_TYPE(rela[i].r_info)) { 113fd045f6cSArd Biesheuvel case R_AARCH64_JUMP26: 114fd045f6cSArd Biesheuvel case R_AARCH64_CALL26: 115fd045f6cSArd Biesheuvel /* 116fd045f6cSArd Biesheuvel * We only have to consider branch targets that resolve 11724af6c4eSArd Biesheuvel * to symbols that are defined in a different section. 11824af6c4eSArd Biesheuvel * This is not simply a heuristic, it is a fundamental 11924af6c4eSArd Biesheuvel * limitation, since there is no guaranteed way to emit 12024af6c4eSArd Biesheuvel * PLT entries sufficiently close to the branch if the 12124af6c4eSArd Biesheuvel * section size exceeds the range of a branch 12224af6c4eSArd Biesheuvel * instruction. So ignore relocations against defined 12324af6c4eSArd Biesheuvel * symbols if they live in the same section as the 12424af6c4eSArd Biesheuvel * relocation target. 125fd045f6cSArd Biesheuvel */ 126fd045f6cSArd Biesheuvel s = syms + ELF64_R_SYM(rela[i].r_info); 12724af6c4eSArd Biesheuvel if (s->st_shndx == dstidx) 128fd045f6cSArd Biesheuvel break; 129fd045f6cSArd Biesheuvel 130fd045f6cSArd Biesheuvel /* 131fd045f6cSArd Biesheuvel * Jump relocations with non-zero addends against 132fd045f6cSArd Biesheuvel * undefined symbols are supported by the ELF spec, but 133fd045f6cSArd Biesheuvel * do not occur in practice (e.g., 'jump n bytes past 134fd045f6cSArd Biesheuvel * the entry point of undefined function symbol f'). 135fd045f6cSArd Biesheuvel * So we need to support them, but there is no need to 136fd045f6cSArd Biesheuvel * take them into consideration when trying to optimize 137fd045f6cSArd Biesheuvel * this code. So let's only check for duplicates when 138fd045f6cSArd Biesheuvel * the addend is zero: this allows us to record the PLT 139fd045f6cSArd Biesheuvel * entry address in the symbol table itself, rather than 140fd045f6cSArd Biesheuvel * having to search the list for duplicates each time we 141fd045f6cSArd Biesheuvel * emit one. 142fd045f6cSArd Biesheuvel */ 143fd045f6cSArd Biesheuvel if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) 144fd045f6cSArd Biesheuvel ret++; 145fd045f6cSArd Biesheuvel break; 146fd045f6cSArd Biesheuvel } 147fd045f6cSArd Biesheuvel } 148fd045f6cSArd Biesheuvel return ret; 149fd045f6cSArd Biesheuvel } 150fd045f6cSArd Biesheuvel 151fd045f6cSArd Biesheuvel int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, 152fd045f6cSArd Biesheuvel char *secstrings, struct module *mod) 153fd045f6cSArd Biesheuvel { 15424af6c4eSArd Biesheuvel unsigned long core_plts = 0; 15524af6c4eSArd Biesheuvel unsigned long init_plts = 0; 156fd045f6cSArd Biesheuvel Elf64_Sym *syms = NULL; 157fd045f6cSArd Biesheuvel int i; 158fd045f6cSArd Biesheuvel 159fd045f6cSArd Biesheuvel /* 160fd045f6cSArd Biesheuvel * Find the empty .plt section so we can expand it to store the PLT 161fd045f6cSArd Biesheuvel * entries. Record the symtab address as well. 162fd045f6cSArd Biesheuvel */ 163fd045f6cSArd Biesheuvel for (i = 0; i < ehdr->e_shnum; i++) { 16424af6c4eSArd Biesheuvel if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) 16524af6c4eSArd Biesheuvel mod->arch.core.plt = sechdrs + i; 16624af6c4eSArd Biesheuvel else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) 16724af6c4eSArd Biesheuvel mod->arch.init.plt = sechdrs + i; 168fd045f6cSArd Biesheuvel else if (sechdrs[i].sh_type == SHT_SYMTAB) 169fd045f6cSArd Biesheuvel syms = (Elf64_Sym *)sechdrs[i].sh_addr; 170fd045f6cSArd Biesheuvel } 171fd045f6cSArd Biesheuvel 17224af6c4eSArd Biesheuvel if (!mod->arch.core.plt || !mod->arch.init.plt) { 17324af6c4eSArd Biesheuvel pr_err("%s: module PLT section(s) missing\n", mod->name); 174fd045f6cSArd Biesheuvel return -ENOEXEC; 175fd045f6cSArd Biesheuvel } 176fd045f6cSArd Biesheuvel if (!syms) { 177fd045f6cSArd Biesheuvel pr_err("%s: module symtab section missing\n", mod->name); 178fd045f6cSArd Biesheuvel return -ENOEXEC; 179fd045f6cSArd Biesheuvel } 180fd045f6cSArd Biesheuvel 181fd045f6cSArd Biesheuvel for (i = 0; i < ehdr->e_shnum; i++) { 182fd045f6cSArd Biesheuvel Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; 183fd045f6cSArd Biesheuvel int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); 184fd045f6cSArd Biesheuvel Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; 185fd045f6cSArd Biesheuvel 186fd045f6cSArd Biesheuvel if (sechdrs[i].sh_type != SHT_RELA) 187fd045f6cSArd Biesheuvel continue; 188fd045f6cSArd Biesheuvel 189fd045f6cSArd Biesheuvel /* ignore relocations that operate on non-exec sections */ 190fd045f6cSArd Biesheuvel if (!(dstsec->sh_flags & SHF_EXECINSTR)) 191fd045f6cSArd Biesheuvel continue; 192fd045f6cSArd Biesheuvel 193fd045f6cSArd Biesheuvel /* sort by type, symbol index and addend */ 194fd045f6cSArd Biesheuvel sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); 195fd045f6cSArd Biesheuvel 19624af6c4eSArd Biesheuvel if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) 19724af6c4eSArd Biesheuvel core_plts += count_plts(syms, rels, numrels, 19824af6c4eSArd Biesheuvel sechdrs[i].sh_info); 19924af6c4eSArd Biesheuvel else 20024af6c4eSArd Biesheuvel init_plts += count_plts(syms, rels, numrels, 20124af6c4eSArd Biesheuvel sechdrs[i].sh_info); 202fd045f6cSArd Biesheuvel } 203fd045f6cSArd Biesheuvel 20424af6c4eSArd Biesheuvel mod->arch.core.plt->sh_type = SHT_NOBITS; 20524af6c4eSArd Biesheuvel mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 20624af6c4eSArd Biesheuvel mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES; 20724af6c4eSArd Biesheuvel mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry); 20824af6c4eSArd Biesheuvel mod->arch.core.plt_num_entries = 0; 20924af6c4eSArd Biesheuvel mod->arch.core.plt_max_entries = core_plts; 21024af6c4eSArd Biesheuvel 21124af6c4eSArd Biesheuvel mod->arch.init.plt->sh_type = SHT_NOBITS; 21224af6c4eSArd Biesheuvel mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 21324af6c4eSArd Biesheuvel mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES; 21424af6c4eSArd Biesheuvel mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry); 21524af6c4eSArd Biesheuvel mod->arch.init.plt_num_entries = 0; 21624af6c4eSArd Biesheuvel mod->arch.init.plt_max_entries = init_plts; 21724af6c4eSArd Biesheuvel 218fd045f6cSArd Biesheuvel return 0; 219fd045f6cSArd Biesheuvel } 220