1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * arch/arm64/mm/hugetlbpage.c 4 * 5 * Copyright (C) 2013 Linaro Ltd. 6 * 7 * Based on arch/x86/mm/hugetlbpage.c. 8 */ 9 10 #include <linux/init.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/hugetlb.h> 14 #include <linux/pagemap.h> 15 #include <linux/err.h> 16 #include <linux/sysctl.h> 17 #include <asm/mman.h> 18 #include <asm/tlb.h> 19 #include <asm/tlbflush.h> 20 #include <asm/pgalloc.h> 21 22 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 23 bool arch_hugetlb_migration_supported(struct hstate *h) 24 { 25 size_t pagesize = huge_page_size(h); 26 27 switch (pagesize) { 28 #ifdef CONFIG_ARM64_4K_PAGES 29 case PUD_SIZE: 30 #endif 31 case PMD_SIZE: 32 case CONT_PMD_SIZE: 33 case CONT_PTE_SIZE: 34 return true; 35 } 36 pr_warn("%s: unrecognized huge page size 0x%lx\n", 37 __func__, pagesize); 38 return false; 39 } 40 #endif 41 42 int pmd_huge(pmd_t pmd) 43 { 44 return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); 45 } 46 47 int pud_huge(pud_t pud) 48 { 49 #ifndef __PAGETABLE_PMD_FOLDED 50 return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); 51 #else 52 return 0; 53 #endif 54 } 55 56 /* 57 * Select all bits except the pfn 58 */ 59 static inline pgprot_t pte_pgprot(pte_t pte) 60 { 61 unsigned long pfn = pte_pfn(pte); 62 63 return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); 64 } 65 66 static int find_num_contig(struct mm_struct *mm, unsigned long addr, 67 pte_t *ptep, size_t *pgsize) 68 { 69 pgd_t *pgdp = pgd_offset(mm, addr); 70 p4d_t *p4dp; 71 pud_t *pudp; 72 pmd_t *pmdp; 73 74 *pgsize = PAGE_SIZE; 75 p4dp = p4d_offset(pgdp, addr); 76 pudp = pud_offset(p4dp, addr); 77 pmdp = pmd_offset(pudp, addr); 78 if ((pte_t *)pmdp == ptep) { 79 *pgsize = PMD_SIZE; 80 return CONT_PMDS; 81 } 82 return CONT_PTES; 83 } 84 85 static inline int num_contig_ptes(unsigned long size, size_t *pgsize) 86 { 87 int contig_ptes = 0; 88 89 *pgsize = size; 90 91 switch (size) { 92 #ifdef CONFIG_ARM64_4K_PAGES 93 case PUD_SIZE: 94 #endif 95 case PMD_SIZE: 96 contig_ptes = 1; 97 break; 98 case CONT_PMD_SIZE: 99 *pgsize = PMD_SIZE; 100 contig_ptes = CONT_PMDS; 101 break; 102 case CONT_PTE_SIZE: 103 *pgsize = PAGE_SIZE; 104 contig_ptes = CONT_PTES; 105 break; 106 } 107 108 return contig_ptes; 109 } 110 111 /* 112 * Changing some bits of contiguous entries requires us to follow a 113 * Break-Before-Make approach, breaking the whole contiguous set 114 * before we can change any entries. See ARM DDI 0487A.k_iss10775, 115 * "Misprogramming of the Contiguous bit", page D4-1762. 116 * 117 * This helper performs the break step. 118 */ 119 static pte_t get_clear_flush(struct mm_struct *mm, 120 unsigned long addr, 121 pte_t *ptep, 122 unsigned long pgsize, 123 unsigned long ncontig) 124 { 125 pte_t orig_pte = huge_ptep_get(ptep); 126 bool valid = pte_valid(orig_pte); 127 unsigned long i, saddr = addr; 128 129 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { 130 pte_t pte = ptep_get_and_clear(mm, addr, ptep); 131 132 /* 133 * If HW_AFDBM is enabled, then the HW could turn on 134 * the dirty or accessed bit for any page in the set, 135 * so check them all. 136 */ 137 if (pte_dirty(pte)) 138 orig_pte = pte_mkdirty(orig_pte); 139 140 if (pte_young(pte)) 141 orig_pte = pte_mkyoung(orig_pte); 142 } 143 144 if (valid) { 145 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 146 flush_tlb_range(&vma, saddr, addr); 147 } 148 return orig_pte; 149 } 150 151 /* 152 * Changing some bits of contiguous entries requires us to follow a 153 * Break-Before-Make approach, breaking the whole contiguous set 154 * before we can change any entries. See ARM DDI 0487A.k_iss10775, 155 * "Misprogramming of the Contiguous bit", page D4-1762. 156 * 157 * This helper performs the break step for use cases where the 158 * original pte is not needed. 159 */ 160 static void clear_flush(struct mm_struct *mm, 161 unsigned long addr, 162 pte_t *ptep, 163 unsigned long pgsize, 164 unsigned long ncontig) 165 { 166 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 167 unsigned long i, saddr = addr; 168 169 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 170 pte_clear(mm, addr, ptep); 171 172 flush_tlb_range(&vma, saddr, addr); 173 } 174 175 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 176 pte_t *ptep, pte_t pte) 177 { 178 size_t pgsize; 179 int i; 180 int ncontig; 181 unsigned long pfn, dpfn; 182 pgprot_t hugeprot; 183 184 /* 185 * Code needs to be expanded to handle huge swap and migration 186 * entries. Needed for HUGETLB and MEMORY_FAILURE. 187 */ 188 WARN_ON(!pte_present(pte)); 189 190 if (!pte_cont(pte)) { 191 set_pte_at(mm, addr, ptep, pte); 192 return; 193 } 194 195 ncontig = find_num_contig(mm, addr, ptep, &pgsize); 196 pfn = pte_pfn(pte); 197 dpfn = pgsize >> PAGE_SHIFT; 198 hugeprot = pte_pgprot(pte); 199 200 clear_flush(mm, addr, ptep, pgsize, ncontig); 201 202 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 203 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 204 } 205 206 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, 207 pte_t *ptep, pte_t pte, unsigned long sz) 208 { 209 int i, ncontig; 210 size_t pgsize; 211 212 ncontig = num_contig_ptes(sz, &pgsize); 213 214 for (i = 0; i < ncontig; i++, ptep++) 215 set_pte(ptep, pte); 216 } 217 218 pte_t *huge_pte_alloc(struct mm_struct *mm, 219 unsigned long addr, unsigned long sz) 220 { 221 pgd_t *pgdp; 222 p4d_t *p4dp; 223 pud_t *pudp; 224 pmd_t *pmdp; 225 pte_t *ptep = NULL; 226 227 pgdp = pgd_offset(mm, addr); 228 p4dp = p4d_offset(pgdp, addr); 229 pudp = pud_alloc(mm, p4dp, addr); 230 if (!pudp) 231 return NULL; 232 233 if (sz == PUD_SIZE) { 234 ptep = (pte_t *)pudp; 235 } else if (sz == (CONT_PTE_SIZE)) { 236 pmdp = pmd_alloc(mm, pudp, addr); 237 if (!pmdp) 238 return NULL; 239 240 WARN_ON(addr & (sz - 1)); 241 /* 242 * Note that if this code were ever ported to the 243 * 32-bit arm platform then it will cause trouble in 244 * the case where CONFIG_HIGHPTE is set, since there 245 * will be no pte_unmap() to correspond with this 246 * pte_alloc_map(). 247 */ 248 ptep = pte_alloc_map(mm, pmdp, addr); 249 } else if (sz == PMD_SIZE) { 250 if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && 251 pud_none(READ_ONCE(*pudp))) 252 ptep = huge_pmd_share(mm, addr, pudp); 253 else 254 ptep = (pte_t *)pmd_alloc(mm, pudp, addr); 255 } else if (sz == (CONT_PMD_SIZE)) { 256 pmdp = pmd_alloc(mm, pudp, addr); 257 WARN_ON(addr & (sz - 1)); 258 return (pte_t *)pmdp; 259 } 260 261 return ptep; 262 } 263 264 pte_t *huge_pte_offset(struct mm_struct *mm, 265 unsigned long addr, unsigned long sz) 266 { 267 pgd_t *pgdp; 268 p4d_t *p4dp; 269 pud_t *pudp, pud; 270 pmd_t *pmdp, pmd; 271 272 pgdp = pgd_offset(mm, addr); 273 if (!pgd_present(READ_ONCE(*pgdp))) 274 return NULL; 275 276 p4dp = p4d_offset(pgdp, addr); 277 if (!p4d_present(READ_ONCE(*p4dp))) 278 return NULL; 279 280 pudp = pud_offset(p4dp, addr); 281 pud = READ_ONCE(*pudp); 282 if (sz != PUD_SIZE && pud_none(pud)) 283 return NULL; 284 /* hugepage or swap? */ 285 if (pud_huge(pud) || !pud_present(pud)) 286 return (pte_t *)pudp; 287 /* table; check the next level */ 288 289 if (sz == CONT_PMD_SIZE) 290 addr &= CONT_PMD_MASK; 291 292 pmdp = pmd_offset(pudp, addr); 293 pmd = READ_ONCE(*pmdp); 294 if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && 295 pmd_none(pmd)) 296 return NULL; 297 if (pmd_huge(pmd) || !pmd_present(pmd)) 298 return (pte_t *)pmdp; 299 300 if (sz == CONT_PTE_SIZE) 301 return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); 302 303 return NULL; 304 } 305 306 pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, 307 struct page *page, int writable) 308 { 309 size_t pagesize = huge_page_size(hstate_vma(vma)); 310 311 if (pagesize == CONT_PTE_SIZE) { 312 entry = pte_mkcont(entry); 313 } else if (pagesize == CONT_PMD_SIZE) { 314 entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); 315 } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { 316 pr_warn("%s: unrecognized huge page size 0x%lx\n", 317 __func__, pagesize); 318 } 319 return entry; 320 } 321 322 void huge_pte_clear(struct mm_struct *mm, unsigned long addr, 323 pte_t *ptep, unsigned long sz) 324 { 325 int i, ncontig; 326 size_t pgsize; 327 328 ncontig = num_contig_ptes(sz, &pgsize); 329 330 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 331 pte_clear(mm, addr, ptep); 332 } 333 334 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 335 unsigned long addr, pte_t *ptep) 336 { 337 int ncontig; 338 size_t pgsize; 339 pte_t orig_pte = huge_ptep_get(ptep); 340 341 if (!pte_cont(orig_pte)) 342 return ptep_get_and_clear(mm, addr, ptep); 343 344 ncontig = find_num_contig(mm, addr, ptep, &pgsize); 345 346 return get_clear_flush(mm, addr, ptep, pgsize, ncontig); 347 } 348 349 /* 350 * huge_ptep_set_access_flags will update access flags (dirty, accesssed) 351 * and write permission. 352 * 353 * For a contiguous huge pte range we need to check whether or not write 354 * permission has to change only on the first pte in the set. Then for 355 * all the contiguous ptes we need to check whether or not there is a 356 * discrepancy between dirty or young. 357 */ 358 static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) 359 { 360 int i; 361 362 if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) 363 return 1; 364 365 for (i = 0; i < ncontig; i++) { 366 pte_t orig_pte = huge_ptep_get(ptep + i); 367 368 if (pte_dirty(pte) != pte_dirty(orig_pte)) 369 return 1; 370 371 if (pte_young(pte) != pte_young(orig_pte)) 372 return 1; 373 } 374 375 return 0; 376 } 377 378 int huge_ptep_set_access_flags(struct vm_area_struct *vma, 379 unsigned long addr, pte_t *ptep, 380 pte_t pte, int dirty) 381 { 382 int ncontig, i; 383 size_t pgsize = 0; 384 unsigned long pfn = pte_pfn(pte), dpfn; 385 pgprot_t hugeprot; 386 pte_t orig_pte; 387 388 if (!pte_cont(pte)) 389 return ptep_set_access_flags(vma, addr, ptep, pte, dirty); 390 391 ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); 392 dpfn = pgsize >> PAGE_SHIFT; 393 394 if (!__cont_access_flags_changed(ptep, pte, ncontig)) 395 return 0; 396 397 orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 398 399 /* Make sure we don't lose the dirty or young state */ 400 if (pte_dirty(orig_pte)) 401 pte = pte_mkdirty(pte); 402 403 if (pte_young(orig_pte)) 404 pte = pte_mkyoung(pte); 405 406 hugeprot = pte_pgprot(pte); 407 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 408 set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); 409 410 return 1; 411 } 412 413 void huge_ptep_set_wrprotect(struct mm_struct *mm, 414 unsigned long addr, pte_t *ptep) 415 { 416 unsigned long pfn, dpfn; 417 pgprot_t hugeprot; 418 int ncontig, i; 419 size_t pgsize; 420 pte_t pte; 421 422 if (!pte_cont(READ_ONCE(*ptep))) { 423 ptep_set_wrprotect(mm, addr, ptep); 424 return; 425 } 426 427 ncontig = find_num_contig(mm, addr, ptep, &pgsize); 428 dpfn = pgsize >> PAGE_SHIFT; 429 430 pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); 431 pte = pte_wrprotect(pte); 432 433 hugeprot = pte_pgprot(pte); 434 pfn = pte_pfn(pte); 435 436 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 437 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 438 } 439 440 void huge_ptep_clear_flush(struct vm_area_struct *vma, 441 unsigned long addr, pte_t *ptep) 442 { 443 size_t pgsize; 444 int ncontig; 445 446 if (!pte_cont(READ_ONCE(*ptep))) { 447 ptep_clear_flush(vma, addr, ptep); 448 return; 449 } 450 451 ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); 452 clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 453 } 454 455 static int __init hugetlbpage_init(void) 456 { 457 #ifdef CONFIG_ARM64_4K_PAGES 458 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 459 #endif 460 hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT); 461 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 462 hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT); 463 464 return 0; 465 } 466 arch_initcall(hugetlbpage_init); 467 468 bool __init arch_hugetlb_valid_size(unsigned long size) 469 { 470 switch (size) { 471 #ifdef CONFIG_ARM64_4K_PAGES 472 case PUD_SIZE: 473 #endif 474 case CONT_PMD_SIZE: 475 case PMD_SIZE: 476 case CONT_PTE_SIZE: 477 return true; 478 } 479 480 return false; 481 } 482