11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2675a0813SHarvey Harrison /* 3675a0813SHarvey Harrison * Flexible mmap layout support 4675a0813SHarvey Harrison * 5675a0813SHarvey Harrison * Based on code by Ingo Molnar and Andi Kleen, copyrighted 6675a0813SHarvey Harrison * as follows: 7675a0813SHarvey Harrison * 88f47e163SIngo Molnar * Copyright 2003-2009 Red Hat Inc. 9675a0813SHarvey Harrison * All Rights Reserved. 10675a0813SHarvey Harrison * Copyright 2005 Andi Kleen, SUSE Labs. 11675a0813SHarvey Harrison * Copyright 2007 Jiri Kosina, SUSE Labs. 12675a0813SHarvey Harrison */ 13675a0813SHarvey Harrison 14675a0813SHarvey Harrison #include <linux/personality.h> 15675a0813SHarvey Harrison #include <linux/mm.h> 16675a0813SHarvey Harrison #include <linux/random.h> 17675a0813SHarvey Harrison #include <linux/limits.h> 183f07c014SIngo Molnar #include <linux/sched/signal.h> 1901042607SIngo Molnar #include <linux/sched/mm.h> 20e13b73ddSDmitry Safonov #include <linux/compat.h> 21*60abfd08SBenjamin Thiel #include <linux/elf-randomize.h> 2280938332SMichal Hocko #include <asm/elf.h> 23*60abfd08SBenjamin Thiel #include <asm/io.h> 2480938332SMichal Hocko 25be62a320SCraig Bergstrom #include "physaddr.h" 26be62a320SCraig Bergstrom 27cc99535eSJan-Simon Möller struct va_alignment __read_mostly va_align = { 289387f774SBorislav Petkov .flags = -1, 299387f774SBorislav Petkov }; 309387f774SBorislav Petkov 31e8f01a8dSKirill A. Shutemov unsigned long task_size_32bit(void) 328f3e474fSDmitry Safonov { 338f3e474fSDmitry Safonov return IA32_PAGE_OFFSET; 348f3e474fSDmitry Safonov } 358f3e474fSDmitry Safonov 36b569bab7SKirill A. Shutemov unsigned long task_size_64bit(int full_addr_space) 371b028f78SDmitry Safonov { 38b569bab7SKirill A. Shutemov return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW; 391b028f78SDmitry Safonov } 401b028f78SDmitry Safonov 418f3e474fSDmitry Safonov static unsigned long stack_maxrandom_size(unsigned long task_size) 4280938332SMichal Hocko { 434e7c22d4SHector Marco-Gisbert unsigned long max = 0; 4401578e36SOleg Nesterov if (current->flags & PF_RANDOMIZE) { 45e8f01a8dSKirill A. Shutemov max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit()); 468f3e474fSDmitry Safonov max <<= PAGE_SHIFT; 4780938332SMichal Hocko } 4880938332SMichal Hocko 4980938332SMichal Hocko return max; 5080938332SMichal Hocko } 5180938332SMichal Hocko 526a0b41d1SDmitry Safonov #ifdef CONFIG_COMPAT 536a0b41d1SDmitry Safonov # define mmap32_rnd_bits mmap_rnd_compat_bits 546a0b41d1SDmitry Safonov # define mmap64_rnd_bits mmap_rnd_bits 556a0b41d1SDmitry Safonov #else 566a0b41d1SDmitry Safonov # define mmap32_rnd_bits mmap_rnd_bits 576a0b41d1SDmitry Safonov # define mmap64_rnd_bits mmap_rnd_bits 586a0b41d1SDmitry Safonov #endif 596a0b41d1SDmitry Safonov 608f3e474fSDmitry Safonov #define SIZE_128M (128 * 1024 * 1024UL) 618f3e474fSDmitry Safonov 62675a0813SHarvey Harrison static int mmap_is_legacy(void) 63675a0813SHarvey Harrison { 64675a0813SHarvey Harrison if (current->personality & ADDR_COMPAT_LAYOUT) 65675a0813SHarvey Harrison return 1; 66675a0813SHarvey Harrison 67675a0813SHarvey Harrison return sysctl_legacy_va_layout; 68675a0813SHarvey Harrison } 69675a0813SHarvey Harrison 706a0b41d1SDmitry Safonov static unsigned long arch_rnd(unsigned int rndbits) 716a0b41d1SDmitry Safonov { 7247ac5484SOleg Nesterov if (!(current->flags & PF_RANDOMIZE)) 7347ac5484SOleg Nesterov return 0; 746a0b41d1SDmitry Safonov return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; 756a0b41d1SDmitry Safonov } 766a0b41d1SDmitry Safonov 772b68f6caSKees Cook unsigned long arch_mmap_rnd(void) 78675a0813SHarvey Harrison { 796a0b41d1SDmitry Safonov return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); 80675a0813SHarvey Harrison } 81675a0813SHarvey Harrison 828f2af155SKees Cook static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, 838f2af155SKees Cook struct rlimit *rlim_stack) 84675a0813SHarvey Harrison { 858f2af155SKees Cook unsigned long gap = rlim_stack->rlim_cur; 86c204d21fSRik van Riel unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; 878f3e474fSDmitry Safonov unsigned long gap_min, gap_max; 88675a0813SHarvey Harrison 89c204d21fSRik van Riel /* Values close to RLIM_INFINITY can overflow. */ 90c204d21fSRik van Riel if (gap + pad > gap) 91c204d21fSRik van Riel gap += pad; 92c204d21fSRik van Riel 938f3e474fSDmitry Safonov /* 948f3e474fSDmitry Safonov * Top of mmap area (just below the process stack). 958f3e474fSDmitry Safonov * Leave an at least ~128 MB hole with possible stack randomization. 968f3e474fSDmitry Safonov */ 97c204d21fSRik van Riel gap_min = SIZE_128M; 988f3e474fSDmitry Safonov gap_max = (task_size / 6) * 5; 99675a0813SHarvey Harrison 1008f3e474fSDmitry Safonov if (gap < gap_min) 1018f3e474fSDmitry Safonov gap = gap_min; 1028f3e474fSDmitry Safonov else if (gap > gap_max) 1038f3e474fSDmitry Safonov gap = gap_max; 1048f3e474fSDmitry Safonov 1058f3e474fSDmitry Safonov return PAGE_ALIGN(task_size - gap - rnd); 1068f3e474fSDmitry Safonov } 1078f3e474fSDmitry Safonov 1088f3e474fSDmitry Safonov static unsigned long mmap_legacy_base(unsigned long rnd, 1098f3e474fSDmitry Safonov unsigned long task_size) 1108f3e474fSDmitry Safonov { 1118f3e474fSDmitry Safonov return __TASK_UNMAPPED_BASE(task_size) + rnd; 112675a0813SHarvey Harrison } 113675a0813SHarvey Harrison 114675a0813SHarvey Harrison /* 115675a0813SHarvey Harrison * This function, called very early during the creation of a new 116675a0813SHarvey Harrison * process VM image, sets up which VM layout function to use: 117675a0813SHarvey Harrison */ 1181b028f78SDmitry Safonov static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, 1198f2af155SKees Cook unsigned long random_factor, unsigned long task_size, 1208f2af155SKees Cook struct rlimit *rlim_stack) 1211b028f78SDmitry Safonov { 1221b028f78SDmitry Safonov *legacy_base = mmap_legacy_base(random_factor, task_size); 1231b028f78SDmitry Safonov if (mmap_is_legacy()) 1241b028f78SDmitry Safonov *base = *legacy_base; 1251b028f78SDmitry Safonov else 1268f2af155SKees Cook *base = mmap_base(random_factor, task_size, rlim_stack); 1271b028f78SDmitry Safonov } 1281b028f78SDmitry Safonov 1298f2af155SKees Cook void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) 130675a0813SHarvey Harrison { 1311b028f78SDmitry Safonov if (mmap_is_legacy()) 132675a0813SHarvey Harrison mm->get_unmapped_area = arch_get_unmapped_area; 1331b028f78SDmitry Safonov else 134675a0813SHarvey Harrison mm->get_unmapped_area = arch_get_unmapped_area_topdown; 1351b028f78SDmitry Safonov 1361b028f78SDmitry Safonov arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, 1378f2af155SKees Cook arch_rnd(mmap64_rnd_bits), task_size_64bit(0), 1388f2af155SKees Cook rlim_stack); 1391b028f78SDmitry Safonov 1401b028f78SDmitry Safonov #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES 1411b028f78SDmitry Safonov /* 1421b028f78SDmitry Safonov * The mmap syscall mapping base decision depends solely on the 1431b028f78SDmitry Safonov * syscall type (64-bit or compat). This applies for 64bit 1441b028f78SDmitry Safonov * applications and 32bit applications. The 64bit syscall uses 1451b028f78SDmitry Safonov * mmap_base, the compat syscall uses mmap_compat_base. 1461b028f78SDmitry Safonov */ 1471b028f78SDmitry Safonov arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, 1488f2af155SKees Cook arch_rnd(mmap32_rnd_bits), task_size_32bit(), 1498f2af155SKees Cook rlim_stack); 1501b028f78SDmitry Safonov #endif 151675a0813SHarvey Harrison } 152a8965276SKirill A. Shutemov 153e13b73ddSDmitry Safonov unsigned long get_mmap_base(int is_legacy) 154e13b73ddSDmitry Safonov { 155e13b73ddSDmitry Safonov struct mm_struct *mm = current->mm; 156e13b73ddSDmitry Safonov 157e13b73ddSDmitry Safonov #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES 158a846446bSDmitry Safonov if (in_32bit_syscall()) { 159e13b73ddSDmitry Safonov return is_legacy ? mm->mmap_compat_legacy_base 160e13b73ddSDmitry Safonov : mm->mmap_compat_base; 161e13b73ddSDmitry Safonov } 162e13b73ddSDmitry Safonov #endif 163e13b73ddSDmitry Safonov return is_legacy ? mm->mmap_legacy_base : mm->mmap_base; 164e13b73ddSDmitry Safonov } 165e13b73ddSDmitry Safonov 166a8965276SKirill A. Shutemov const char *arch_vma_name(struct vm_area_struct *vma) 167a8965276SKirill A. Shutemov { 168a8965276SKirill A. Shutemov return NULL; 169a8965276SKirill A. Shutemov } 1701e0f25dbSKirill A. Shutemov 1711e0f25dbSKirill A. Shutemov /** 1721e0f25dbSKirill A. Shutemov * mmap_address_hint_valid - Validate the address hint of mmap 1731e0f25dbSKirill A. Shutemov * @addr: Address hint 1741e0f25dbSKirill A. Shutemov * @len: Mapping length 1751e0f25dbSKirill A. Shutemov * 1761e0f25dbSKirill A. Shutemov * Check whether @addr and @addr + @len result in a valid mapping. 1771e0f25dbSKirill A. Shutemov * 1781e0f25dbSKirill A. Shutemov * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. 1791e0f25dbSKirill A. Shutemov * 1801e0f25dbSKirill A. Shutemov * On 64bit with 5-level page tables another sanity check is required 1811e0f25dbSKirill A. Shutemov * because mappings requested by mmap(@addr, 0) which cross the 47-bit 1821e0f25dbSKirill A. Shutemov * virtual address boundary can cause the following theoretical issue: 1831e0f25dbSKirill A. Shutemov * 1841e0f25dbSKirill A. Shutemov * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr 1851e0f25dbSKirill A. Shutemov * is below the border of the 47-bit address space and @addr + @len is 1861e0f25dbSKirill A. Shutemov * above the border. 1871e0f25dbSKirill A. Shutemov * 1881e0f25dbSKirill A. Shutemov * With 4-level paging this request succeeds, but the resulting mapping 1891e0f25dbSKirill A. Shutemov * address will always be within the 47-bit virtual address space, because 1901e0f25dbSKirill A. Shutemov * the hint address does not result in a valid mapping and is 1911e0f25dbSKirill A. Shutemov * ignored. Hence applications which are not prepared to handle virtual 1921e0f25dbSKirill A. Shutemov * addresses above 47-bit work correctly. 1931e0f25dbSKirill A. Shutemov * 1941e0f25dbSKirill A. Shutemov * With 5-level paging this request would be granted and result in a 1951e0f25dbSKirill A. Shutemov * mapping which crosses the border of the 47-bit virtual address 1961e0f25dbSKirill A. Shutemov * space. If the application cannot handle addresses above 47-bit this 1971e0f25dbSKirill A. Shutemov * will lead to misbehaviour and hard to diagnose failures. 1981e0f25dbSKirill A. Shutemov * 1991e0f25dbSKirill A. Shutemov * Therefore ignore address hints which would result in a mapping crossing 2001e0f25dbSKirill A. Shutemov * the 47-bit virtual address boundary. 2011e0f25dbSKirill A. Shutemov * 2021e0f25dbSKirill A. Shutemov * Note, that in the same scenario with MAP_FIXED the behaviour is 2031e0f25dbSKirill A. Shutemov * different. The request with @addr < 47-bit and @addr + @len > 47-bit 2041e0f25dbSKirill A. Shutemov * fails on a 4-level paging machine but succeeds on a 5-level paging 2051e0f25dbSKirill A. Shutemov * machine. It is reasonable to expect that an application does not rely on 2061e0f25dbSKirill A. Shutemov * the failure of such a fixed mapping request, so the restriction is not 2071e0f25dbSKirill A. Shutemov * applied. 2081e0f25dbSKirill A. Shutemov */ 2091e0f25dbSKirill A. Shutemov bool mmap_address_hint_valid(unsigned long addr, unsigned long len) 2101e0f25dbSKirill A. Shutemov { 2111e0f25dbSKirill A. Shutemov if (TASK_SIZE - len < addr) 2121e0f25dbSKirill A. Shutemov return false; 2131e0f25dbSKirill A. Shutemov 2141e0f25dbSKirill A. Shutemov return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); 2151e0f25dbSKirill A. Shutemov } 216be62a320SCraig Bergstrom 217be62a320SCraig Bergstrom /* Can we access it for direct reading/writing? Must be RAM: */ 218be62a320SCraig Bergstrom int valid_phys_addr_range(phys_addr_t addr, size_t count) 219be62a320SCraig Bergstrom { 22092c77f7cSRalph Campbell return addr + count - 1 <= __pa(high_memory - 1); 221be62a320SCraig Bergstrom } 222be62a320SCraig Bergstrom 223be62a320SCraig Bergstrom /* Can we access it through mmap? Must be a valid physical address: */ 224be62a320SCraig Bergstrom int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) 225be62a320SCraig Bergstrom { 226be62a320SCraig Bergstrom phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; 227be62a320SCraig Bergstrom 228be62a320SCraig Bergstrom return phys_addr_valid(addr + count - 1); 229be62a320SCraig Bergstrom } 23042e4089cSAndi Kleen 23142e4089cSAndi Kleen /* 23242e4089cSAndi Kleen * Only allow root to set high MMIO mappings to PROT_NONE. 23342e4089cSAndi Kleen * This prevents an unpriv. user to set them to PROT_NONE and invert 23442e4089cSAndi Kleen * them, then pointing to valid memory for L1TF speculation. 23542e4089cSAndi Kleen * 23642e4089cSAndi Kleen * Note: for locked down kernels may want to disable the root override. 23742e4089cSAndi Kleen */ 23842e4089cSAndi Kleen bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) 23942e4089cSAndi Kleen { 24042e4089cSAndi Kleen if (!boot_cpu_has_bug(X86_BUG_L1TF)) 24142e4089cSAndi Kleen return true; 24242e4089cSAndi Kleen if (!__pte_needs_invert(pgprot_val(prot))) 24342e4089cSAndi Kleen return true; 24442e4089cSAndi Kleen /* If it's real memory always allow */ 24542e4089cSAndi Kleen if (pfn_valid(pfn)) 24642e4089cSAndi Kleen return true; 247b0a182f8SVlastimil Babka if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) 24842e4089cSAndi Kleen return false; 24942e4089cSAndi Kleen return true; 25042e4089cSAndi Kleen } 251