1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 4 */ 5 6 #include <linux/elf.h> 7 #include <linux/ftrace.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/sort.h> 11 12 static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc, 13 enum aarch64_insn_register reg) 14 { 15 u32 adrp, add; 16 17 adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP); 18 add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K, 19 AARCH64_INSN_VARIANT_64BIT, 20 AARCH64_INSN_ADSB_ADD); 21 22 return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) }; 23 } 24 25 struct plt_entry get_plt_entry(u64 dst, void *pc) 26 { 27 struct plt_entry plt; 28 static u32 br; 29 30 if (!br) 31 br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16, 32 AARCH64_INSN_BRANCH_NOLINK); 33 34 plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16); 35 plt.br = cpu_to_le32(br); 36 37 return plt; 38 } 39 40 bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b) 41 { 42 u64 p, q; 43 44 /* 45 * Check whether both entries refer to the same target: 46 * do the cheapest checks first. 47 * If the 'add' or 'br' opcodes are different, then the target 48 * cannot be the same. 49 */ 50 if (a->add != b->add || a->br != b->br) 51 return false; 52 53 p = ALIGN_DOWN((u64)a, SZ_4K); 54 q = ALIGN_DOWN((u64)b, SZ_4K); 55 56 /* 57 * If the 'adrp' opcodes are the same then we just need to check 58 * that they refer to the same 4k region. 59 */ 60 if (a->adrp == b->adrp && p == q) 61 return true; 62 63 return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) == 64 (q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp))); 65 } 66 67 static bool in_init(const struct module *mod, void *loc) 68 { 69 return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; 70 } 71 72 u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, 73 void *loc, const Elf64_Rela *rela, 74 Elf64_Sym *sym) 75 { 76 struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : 77 &mod->arch.init; 78 struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr; 79 int i = pltsec->plt_num_entries; 80 int j = i - 1; 81 u64 val = sym->st_value + rela->r_addend; 82 83 if (is_forbidden_offset_for_adrp(&plt[i].adrp)) 84 i++; 85 86 plt[i] = get_plt_entry(val, &plt[i]); 87 88 /* 89 * Check if the entry we just created is a duplicate. Given that the 90 * relocations are sorted, this will be the last entry we allocated. 91 * (if one exists). 92 */ 93 if (j >= 0 && plt_entries_equal(plt + i, plt + j)) 94 return (u64)&plt[j]; 95 96 pltsec->plt_num_entries += i - j; 97 if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) 98 return 0; 99 100 return (u64)&plt[i]; 101 } 102 103 #ifdef CONFIG_ARM64_ERRATUM_843419 104 u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, 105 void *loc, u64 val) 106 { 107 struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : 108 &mod->arch.init; 109 struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr; 110 int i = pltsec->plt_num_entries++; 111 u32 br; 112 int rd; 113 114 if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) 115 return 0; 116 117 if (is_forbidden_offset_for_adrp(&plt[i].adrp)) 118 i = pltsec->plt_num_entries++; 119 120 /* get the destination register of the ADRP instruction */ 121 rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, 122 le32_to_cpup((__le32 *)loc)); 123 124 br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4, 125 AARCH64_INSN_BRANCH_NOLINK); 126 127 plt[i] = __get_adrp_add_pair(val, (u64)&plt[i], rd); 128 plt[i].br = cpu_to_le32(br); 129 130 return (u64)&plt[i]; 131 } 132 #endif 133 134 #define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) 135 136 static int cmp_rela(const void *a, const void *b) 137 { 138 const Elf64_Rela *x = a, *y = b; 139 int i; 140 141 /* sort by type, symbol index and addend */ 142 i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); 143 if (i == 0) 144 i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); 145 if (i == 0) 146 i = cmp_3way(x->r_addend, y->r_addend); 147 return i; 148 } 149 150 static bool duplicate_rel(const Elf64_Rela *rela, int num) 151 { 152 /* 153 * Entries are sorted by type, symbol index and addend. That means 154 * that, if a duplicate entry exists, it must be in the preceding 155 * slot. 156 */ 157 return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; 158 } 159 160 static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, 161 Elf64_Word dstidx, Elf_Shdr *dstsec) 162 { 163 unsigned int ret = 0; 164 Elf64_Sym *s; 165 int i; 166 167 for (i = 0; i < num; i++) { 168 u64 min_align; 169 170 switch (ELF64_R_TYPE(rela[i].r_info)) { 171 case R_AARCH64_JUMP26: 172 case R_AARCH64_CALL26: 173 if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) 174 break; 175 176 /* 177 * We only have to consider branch targets that resolve 178 * to symbols that are defined in a different section. 179 * This is not simply a heuristic, it is a fundamental 180 * limitation, since there is no guaranteed way to emit 181 * PLT entries sufficiently close to the branch if the 182 * section size exceeds the range of a branch 183 * instruction. So ignore relocations against defined 184 * symbols if they live in the same section as the 185 * relocation target. 186 */ 187 s = syms + ELF64_R_SYM(rela[i].r_info); 188 if (s->st_shndx == dstidx) 189 break; 190 191 /* 192 * Jump relocations with non-zero addends against 193 * undefined symbols are supported by the ELF spec, but 194 * do not occur in practice (e.g., 'jump n bytes past 195 * the entry point of undefined function symbol f'). 196 * So we need to support them, but there is no need to 197 * take them into consideration when trying to optimize 198 * this code. So let's only check for duplicates when 199 * the addend is zero: this allows us to record the PLT 200 * entry address in the symbol table itself, rather than 201 * having to search the list for duplicates each time we 202 * emit one. 203 */ 204 if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) 205 ret++; 206 break; 207 case R_AARCH64_ADR_PREL_PG_HI21_NC: 208 case R_AARCH64_ADR_PREL_PG_HI21: 209 if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || 210 !cpus_have_const_cap(ARM64_WORKAROUND_843419)) 211 break; 212 213 /* 214 * Determine the minimal safe alignment for this ADRP 215 * instruction: the section alignment at which it is 216 * guaranteed not to appear at a vulnerable offset. 217 * 218 * This comes down to finding the least significant zero 219 * bit in bits [11:3] of the section offset, and 220 * increasing the section's alignment so that the 221 * resulting address of this instruction is guaranteed 222 * to equal the offset in that particular bit (as well 223 * as all less signficant bits). This ensures that the 224 * address modulo 4 KB != 0xfff8 or 0xfffc (which would 225 * have all ones in bits [11:3]) 226 */ 227 min_align = 2ULL << ffz(rela[i].r_offset | 0x7); 228 229 /* 230 * Allocate veneer space for each ADRP that may appear 231 * at a vulnerable offset nonetheless. At relocation 232 * time, some of these will remain unused since some 233 * ADRP instructions can be patched to ADR instructions 234 * instead. 235 */ 236 if (min_align > SZ_4K) 237 ret++; 238 else 239 dstsec->sh_addralign = max(dstsec->sh_addralign, 240 min_align); 241 break; 242 } 243 } 244 245 if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) && 246 cpus_have_const_cap(ARM64_WORKAROUND_843419)) 247 /* 248 * Add some slack so we can skip PLT slots that may trigger 249 * the erratum due to the placement of the ADRP instruction. 250 */ 251 ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry))); 252 253 return ret; 254 } 255 256 static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela, 257 Elf64_Word dstidx) 258 { 259 260 Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info); 261 262 if (s->st_shndx == dstidx) 263 return false; 264 265 return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 || 266 ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26; 267 } 268 269 /* Group branch PLT relas at the front end of the array. */ 270 static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, 271 int numrels, Elf64_Word dstidx) 272 { 273 int i = 0, j = numrels - 1; 274 275 if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) 276 return 0; 277 278 while (i < j) { 279 if (branch_rela_needs_plt(syms, &rela[i], dstidx)) 280 i++; 281 else if (branch_rela_needs_plt(syms, &rela[j], dstidx)) 282 swap(rela[i], rela[j]); 283 else 284 j--; 285 } 286 287 return i; 288 } 289 290 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, 291 char *secstrings, struct module *mod) 292 { 293 unsigned long core_plts = 0; 294 unsigned long init_plts = 0; 295 Elf64_Sym *syms = NULL; 296 Elf_Shdr *pltsec, *tramp = NULL; 297 int i; 298 299 /* 300 * Find the empty .plt section so we can expand it to store the PLT 301 * entries. Record the symtab address as well. 302 */ 303 for (i = 0; i < ehdr->e_shnum; i++) { 304 if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) 305 mod->arch.core.plt_shndx = i; 306 else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) 307 mod->arch.init.plt_shndx = i; 308 else if (!strcmp(secstrings + sechdrs[i].sh_name, 309 ".text.ftrace_trampoline")) 310 tramp = sechdrs + i; 311 else if (sechdrs[i].sh_type == SHT_SYMTAB) 312 syms = (Elf64_Sym *)sechdrs[i].sh_addr; 313 } 314 315 if (!mod->arch.core.plt_shndx || !mod->arch.init.plt_shndx) { 316 pr_err("%s: module PLT section(s) missing\n", mod->name); 317 return -ENOEXEC; 318 } 319 if (!syms) { 320 pr_err("%s: module symtab section missing\n", mod->name); 321 return -ENOEXEC; 322 } 323 324 for (i = 0; i < ehdr->e_shnum; i++) { 325 Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; 326 int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); 327 Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; 328 329 if (sechdrs[i].sh_type != SHT_RELA) 330 continue; 331 332 /* ignore relocations that operate on non-exec sections */ 333 if (!(dstsec->sh_flags & SHF_EXECINSTR)) 334 continue; 335 336 /* 337 * sort branch relocations requiring a PLT by type, symbol index 338 * and addend 339 */ 340 nents = partition_branch_plt_relas(syms, rels, numrels, 341 sechdrs[i].sh_info); 342 if (nents) 343 sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL); 344 345 if (!str_has_prefix(secstrings + dstsec->sh_name, ".init")) 346 core_plts += count_plts(syms, rels, numrels, 347 sechdrs[i].sh_info, dstsec); 348 else 349 init_plts += count_plts(syms, rels, numrels, 350 sechdrs[i].sh_info, dstsec); 351 } 352 353 pltsec = sechdrs + mod->arch.core.plt_shndx; 354 pltsec->sh_type = SHT_NOBITS; 355 pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 356 pltsec->sh_addralign = L1_CACHE_BYTES; 357 pltsec->sh_size = (core_plts + 1) * sizeof(struct plt_entry); 358 mod->arch.core.plt_num_entries = 0; 359 mod->arch.core.plt_max_entries = core_plts; 360 361 pltsec = sechdrs + mod->arch.init.plt_shndx; 362 pltsec->sh_type = SHT_NOBITS; 363 pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 364 pltsec->sh_addralign = L1_CACHE_BYTES; 365 pltsec->sh_size = (init_plts + 1) * sizeof(struct plt_entry); 366 mod->arch.init.plt_num_entries = 0; 367 mod->arch.init.plt_max_entries = init_plts; 368 369 if (tramp) { 370 tramp->sh_type = SHT_NOBITS; 371 tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 372 tramp->sh_addralign = __alignof__(struct plt_entry); 373 tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); 374 } 375 376 return 0; 377 } 378