1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/hugetlb.h> 3 #include <linux/err.h> 4 5 #ifdef CONFIG_RISCV_ISA_SVNAPOT 6 pte_t huge_ptep_get(pte_t *ptep) 7 { 8 unsigned long pte_num; 9 int i; 10 pte_t orig_pte = ptep_get(ptep); 11 12 if (!pte_present(orig_pte) || !pte_napot(orig_pte)) 13 return orig_pte; 14 15 pte_num = napot_pte_num(napot_cont_order(orig_pte)); 16 17 for (i = 0; i < pte_num; i++, ptep++) { 18 pte_t pte = ptep_get(ptep); 19 20 if (pte_dirty(pte)) 21 orig_pte = pte_mkdirty(orig_pte); 22 23 if (pte_young(pte)) 24 orig_pte = pte_mkyoung(orig_pte); 25 } 26 27 return orig_pte; 28 } 29 30 pte_t *huge_pte_alloc(struct mm_struct *mm, 31 struct vm_area_struct *vma, 32 unsigned long addr, 33 unsigned long sz) 34 { 35 unsigned long order; 36 pte_t *pte = NULL; 37 pgd_t *pgd; 38 p4d_t *p4d; 39 pud_t *pud; 40 pmd_t *pmd; 41 42 pgd = pgd_offset(mm, addr); 43 p4d = p4d_alloc(mm, pgd, addr); 44 if (!p4d) 45 return NULL; 46 47 pud = pud_alloc(mm, p4d, addr); 48 if (!pud) 49 return NULL; 50 51 if (sz == PUD_SIZE) { 52 pte = (pte_t *)pud; 53 goto out; 54 } 55 56 if (sz == PMD_SIZE) { 57 if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud))) 58 pte = huge_pmd_share(mm, vma, addr, pud); 59 else 60 pte = (pte_t *)pmd_alloc(mm, pud, addr); 61 goto out; 62 } 63 64 pmd = pmd_alloc(mm, pud, addr); 65 if (!pmd) 66 return NULL; 67 68 for_each_napot_order(order) { 69 if (napot_cont_size(order) == sz) { 70 pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order)); 71 break; 72 } 73 } 74 75 out: 76 if (pte) { 77 pte_t pteval = ptep_get_lockless(pte); 78 79 WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval)); 80 } 81 return pte; 82 } 83 84 pte_t *huge_pte_offset(struct mm_struct *mm, 85 unsigned long addr, 86 unsigned long sz) 87 { 88 unsigned long order; 89 pte_t *pte = NULL; 90 pgd_t *pgd; 91 p4d_t *p4d; 92 pud_t *pud; 93 pmd_t *pmd; 94 95 pgd = pgd_offset(mm, addr); 96 if (!pgd_present(pgdp_get(pgd))) 97 return NULL; 98 99 p4d = p4d_offset(pgd, addr); 100 if (!p4d_present(p4dp_get(p4d))) 101 return NULL; 102 103 pud = pud_offset(p4d, addr); 104 if (sz == PUD_SIZE) 105 /* must be pud huge, non-present or none */ 106 return (pte_t *)pud; 107 108 if (!pud_present(pudp_get(pud))) 109 return NULL; 110 111 pmd = pmd_offset(pud, addr); 112 if (sz == PMD_SIZE) 113 /* must be pmd huge, non-present or none */ 114 return (pte_t *)pmd; 115 116 if (!pmd_present(pmdp_get(pmd))) 117 return NULL; 118 119 for_each_napot_order(order) { 120 if (napot_cont_size(order) == sz) { 121 pte = pte_offset_huge(pmd, addr & napot_cont_mask(order)); 122 break; 123 } 124 } 125 return pte; 126 } 127 128 unsigned long hugetlb_mask_last_page(struct hstate *h) 129 { 130 unsigned long hp_size = huge_page_size(h); 131 132 switch (hp_size) { 133 #ifndef __PAGETABLE_PMD_FOLDED 134 case PUD_SIZE: 135 return P4D_SIZE - PUD_SIZE; 136 #endif 137 case PMD_SIZE: 138 return PUD_SIZE - PMD_SIZE; 139 case napot_cont_size(NAPOT_CONT64KB_ORDER): 140 return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER); 141 default: 142 break; 143 } 144 145 return 0UL; 146 } 147 148 static pte_t get_clear_contig(struct mm_struct *mm, 149 unsigned long addr, 150 pte_t *ptep, 151 unsigned long ncontig) 152 { 153 pte_t pte, tmp_pte; 154 bool present; 155 156 pte = ptep_get_and_clear(mm, addr, ptep); 157 present = pte_present(pte); 158 while (--ncontig) { 159 ptep++; 160 addr += PAGE_SIZE; 161 tmp_pte = ptep_get_and_clear(mm, addr, ptep); 162 if (present) { 163 if (pte_dirty(tmp_pte)) 164 pte = pte_mkdirty(pte); 165 if (pte_young(tmp_pte)) 166 pte = pte_mkyoung(pte); 167 } 168 } 169 return pte; 170 } 171 172 static pte_t get_clear_contig_flush(struct mm_struct *mm, 173 unsigned long addr, 174 pte_t *ptep, 175 unsigned long pte_num) 176 { 177 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); 178 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 179 bool valid = !pte_none(orig_pte); 180 181 if (valid) 182 flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); 183 184 return orig_pte; 185 } 186 187 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) 188 { 189 unsigned long order; 190 191 for_each_napot_order(order) { 192 if (shift == napot_cont_shift(order)) { 193 entry = pte_mknapot(entry, order); 194 break; 195 } 196 } 197 if (order == NAPOT_ORDER_MAX) 198 entry = pte_mkhuge(entry); 199 200 return entry; 201 } 202 203 static void clear_flush(struct mm_struct *mm, 204 unsigned long addr, 205 pte_t *ptep, 206 unsigned long pgsize, 207 unsigned long ncontig) 208 { 209 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 210 unsigned long i, saddr = addr; 211 212 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 213 ptep_get_and_clear(mm, addr, ptep); 214 215 flush_tlb_range(&vma, saddr, addr); 216 } 217 218 static int num_contig_ptes_from_size(unsigned long sz, size_t *pgsize) 219 { 220 unsigned long hugepage_shift; 221 222 if (sz >= PGDIR_SIZE) 223 hugepage_shift = PGDIR_SHIFT; 224 else if (sz >= P4D_SIZE) 225 hugepage_shift = P4D_SHIFT; 226 else if (sz >= PUD_SIZE) 227 hugepage_shift = PUD_SHIFT; 228 else if (sz >= PMD_SIZE) 229 hugepage_shift = PMD_SHIFT; 230 else 231 hugepage_shift = PAGE_SHIFT; 232 233 *pgsize = 1 << hugepage_shift; 234 235 return sz >> hugepage_shift; 236 } 237 238 /* 239 * When dealing with NAPOT mappings, the privileged specification indicates that 240 * "if an update needs to be made, the OS generally should first mark all of the 241 * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions 242 * within the range, [...] then update the PTE(s), as described in Section 243 * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by 244 * arm64. 245 */ 246 void set_huge_pte_at(struct mm_struct *mm, 247 unsigned long addr, 248 pte_t *ptep, 249 pte_t pte, 250 unsigned long sz) 251 { 252 size_t pgsize; 253 int i, pte_num; 254 255 pte_num = num_contig_ptes_from_size(sz, &pgsize); 256 257 if (!pte_present(pte)) { 258 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) 259 set_ptes(mm, addr, ptep, pte, 1); 260 return; 261 } 262 263 if (!pte_napot(pte)) { 264 set_ptes(mm, addr, ptep, pte, 1); 265 return; 266 } 267 268 clear_flush(mm, addr, ptep, pgsize, pte_num); 269 270 for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) 271 set_pte_at(mm, addr, ptep, pte); 272 } 273 274 int huge_ptep_set_access_flags(struct vm_area_struct *vma, 275 unsigned long addr, 276 pte_t *ptep, 277 pte_t pte, 278 int dirty) 279 { 280 struct mm_struct *mm = vma->vm_mm; 281 unsigned long order; 282 pte_t orig_pte; 283 int i, pte_num; 284 285 if (!pte_napot(pte)) 286 return ptep_set_access_flags(vma, addr, ptep, pte, dirty); 287 288 order = napot_cont_order(pte); 289 pte_num = napot_pte_num(order); 290 ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); 291 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); 292 293 if (pte_dirty(orig_pte)) 294 pte = pte_mkdirty(pte); 295 296 if (pte_young(orig_pte)) 297 pte = pte_mkyoung(pte); 298 299 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) 300 set_pte_at(mm, addr, ptep, pte); 301 302 return true; 303 } 304 305 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 306 unsigned long addr, 307 pte_t *ptep, unsigned long sz) 308 { 309 size_t pgsize; 310 pte_t orig_pte = ptep_get(ptep); 311 int pte_num; 312 313 if (!pte_napot(orig_pte)) 314 return ptep_get_and_clear(mm, addr, ptep); 315 316 pte_num = num_contig_ptes_from_size(sz, &pgsize); 317 318 return get_clear_contig(mm, addr, ptep, pte_num); 319 } 320 321 void huge_ptep_set_wrprotect(struct mm_struct *mm, 322 unsigned long addr, 323 pte_t *ptep) 324 { 325 pte_t pte = ptep_get(ptep); 326 unsigned long order; 327 pte_t orig_pte; 328 int i, pte_num; 329 330 if (!pte_napot(pte)) { 331 ptep_set_wrprotect(mm, addr, ptep); 332 return; 333 } 334 335 order = napot_cont_order(pte); 336 pte_num = napot_pte_num(order); 337 ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); 338 orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); 339 340 orig_pte = pte_wrprotect(orig_pte); 341 342 for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) 343 set_pte_at(mm, addr, ptep, orig_pte); 344 } 345 346 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 347 unsigned long addr, 348 pte_t *ptep) 349 { 350 pte_t pte = ptep_get(ptep); 351 int pte_num; 352 353 if (!pte_napot(pte)) 354 return ptep_clear_flush(vma, addr, ptep); 355 356 pte_num = napot_pte_num(napot_cont_order(pte)); 357 358 return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); 359 } 360 361 void huge_pte_clear(struct mm_struct *mm, 362 unsigned long addr, 363 pte_t *ptep, 364 unsigned long sz) 365 { 366 size_t pgsize; 367 pte_t pte = ptep_get(ptep); 368 int i, pte_num; 369 370 if (!pte_napot(pte)) { 371 pte_clear(mm, addr, ptep); 372 return; 373 } 374 375 pte_num = num_contig_ptes_from_size(sz, &pgsize); 376 377 for (i = 0; i < pte_num; i++, addr += pgsize, ptep++) 378 pte_clear(mm, addr, ptep); 379 } 380 381 static bool is_napot_size(unsigned long size) 382 { 383 unsigned long order; 384 385 if (!has_svnapot()) 386 return false; 387 388 for_each_napot_order(order) { 389 if (size == napot_cont_size(order)) 390 return true; 391 } 392 return false; 393 } 394 395 static __init int napot_hugetlbpages_init(void) 396 { 397 if (has_svnapot()) { 398 unsigned long order; 399 400 for_each_napot_order(order) 401 hugetlb_add_hstate(order); 402 } 403 return 0; 404 } 405 arch_initcall(napot_hugetlbpages_init); 406 407 #else 408 409 static bool is_napot_size(unsigned long size) 410 { 411 return false; 412 } 413 414 #endif /*CONFIG_RISCV_ISA_SVNAPOT*/ 415 416 int pud_huge(pud_t pud) 417 { 418 return pud_leaf(pud); 419 } 420 421 int pmd_huge(pmd_t pmd) 422 { 423 return pmd_leaf(pmd); 424 } 425 426 static bool __hugetlb_valid_size(unsigned long size) 427 { 428 if (size == HPAGE_SIZE) 429 return true; 430 else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) 431 return true; 432 else if (is_napot_size(size)) 433 return true; 434 else 435 return false; 436 } 437 438 bool __init arch_hugetlb_valid_size(unsigned long size) 439 { 440 return __hugetlb_valid_size(size); 441 } 442 443 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 444 bool arch_hugetlb_migration_supported(struct hstate *h) 445 { 446 return __hugetlb_valid_size(huge_page_size(h)); 447 } 448 #endif 449 450 #ifdef CONFIG_CONTIG_ALLOC 451 static __init int gigantic_pages_init(void) 452 { 453 /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */ 454 if (IS_ENABLED(CONFIG_64BIT)) 455 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 456 return 0; 457 } 458 arch_initcall(gigantic_pages_init); 459 #endif 460