1 /* 2 * SPARC64 Huge TLB page support. 3 * 4 * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/sched/mm.h> 10 #include <linux/hugetlb.h> 11 #include <linux/pagemap.h> 12 #include <linux/sysctl.h> 13 14 #include <asm/mman.h> 15 #include <asm/pgalloc.h> 16 #include <asm/pgtable.h> 17 #include <asm/tlb.h> 18 #include <asm/tlbflush.h> 19 #include <asm/cacheflush.h> 20 #include <asm/mmu_context.h> 21 22 /* Slightly simplified from the non-hugepage variant because by 23 * definition we don't have to worry about any page coloring stuff 24 */ 25 26 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, 27 unsigned long addr, 28 unsigned long len, 29 unsigned long pgoff, 30 unsigned long flags) 31 { 32 struct hstate *h = hstate_file(filp); 33 unsigned long task_size = TASK_SIZE; 34 struct vm_unmapped_area_info info; 35 36 if (test_thread_flag(TIF_32BIT)) 37 task_size = STACK_TOP32; 38 39 info.flags = 0; 40 info.length = len; 41 info.low_limit = TASK_UNMAPPED_BASE; 42 info.high_limit = min(task_size, VA_EXCLUDE_START); 43 info.align_mask = PAGE_MASK & ~huge_page_mask(h); 44 info.align_offset = 0; 45 addr = vm_unmapped_area(&info); 46 47 if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { 48 VM_BUG_ON(addr != -ENOMEM); 49 info.low_limit = VA_EXCLUDE_END; 50 info.high_limit = task_size; 51 addr = vm_unmapped_area(&info); 52 } 53 54 return addr; 55 } 56 57 static unsigned long 58 hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 59 const unsigned long len, 60 const unsigned long pgoff, 61 const unsigned long flags) 62 { 63 struct hstate *h = hstate_file(filp); 64 struct mm_struct *mm = current->mm; 65 unsigned long addr = addr0; 66 struct vm_unmapped_area_info info; 67 68 /* This should only ever run for 32-bit processes. */ 69 BUG_ON(!test_thread_flag(TIF_32BIT)); 70 71 info.flags = VM_UNMAPPED_AREA_TOPDOWN; 72 info.length = len; 73 info.low_limit = PAGE_SIZE; 74 info.high_limit = mm->mmap_base; 75 info.align_mask = PAGE_MASK & ~huge_page_mask(h); 76 info.align_offset = 0; 77 addr = vm_unmapped_area(&info); 78 79 /* 80 * A failed mmap() very likely causes application failure, 81 * so fall back to the bottom-up function here. This scenario 82 * can happen with large stack limits and large mmap() 83 * allocations. 84 */ 85 if (addr & ~PAGE_MASK) { 86 VM_BUG_ON(addr != -ENOMEM); 87 info.flags = 0; 88 info.low_limit = TASK_UNMAPPED_BASE; 89 info.high_limit = STACK_TOP32; 90 addr = vm_unmapped_area(&info); 91 } 92 93 return addr; 94 } 95 96 unsigned long 97 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 98 unsigned long len, unsigned long pgoff, unsigned long flags) 99 { 100 struct hstate *h = hstate_file(file); 101 struct mm_struct *mm = current->mm; 102 struct vm_area_struct *vma; 103 unsigned long task_size = TASK_SIZE; 104 105 if (test_thread_flag(TIF_32BIT)) 106 task_size = STACK_TOP32; 107 108 if (len & ~huge_page_mask(h)) 109 return -EINVAL; 110 if (len > task_size) 111 return -ENOMEM; 112 113 if (flags & MAP_FIXED) { 114 if (prepare_hugepage_range(file, addr, len)) 115 return -EINVAL; 116 return addr; 117 } 118 119 if (addr) { 120 addr = ALIGN(addr, huge_page_size(h)); 121 vma = find_vma(mm, addr); 122 if (task_size - len >= addr && 123 (!vma || addr + len <= vma->vm_start)) 124 return addr; 125 } 126 if (mm->get_unmapped_area == arch_get_unmapped_area) 127 return hugetlb_get_unmapped_area_bottomup(file, addr, len, 128 pgoff, flags); 129 else 130 return hugetlb_get_unmapped_area_topdown(file, addr, len, 131 pgoff, flags); 132 } 133 134 static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) 135 { 136 return entry; 137 } 138 139 static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) 140 { 141 unsigned long hugepage_size = _PAGE_SZ4MB_4V; 142 143 pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; 144 145 switch (shift) { 146 case HPAGE_2GB_SHIFT: 147 hugepage_size = _PAGE_SZ2GB_4V; 148 pte_val(entry) |= _PAGE_PMD_HUGE; 149 break; 150 case HPAGE_256MB_SHIFT: 151 hugepage_size = _PAGE_SZ256MB_4V; 152 pte_val(entry) |= _PAGE_PMD_HUGE; 153 break; 154 case HPAGE_SHIFT: 155 pte_val(entry) |= _PAGE_PMD_HUGE; 156 break; 157 case HPAGE_64K_SHIFT: 158 hugepage_size = _PAGE_SZ64K_4V; 159 break; 160 default: 161 WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); 162 } 163 164 pte_val(entry) = pte_val(entry) | hugepage_size; 165 return entry; 166 } 167 168 static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift) 169 { 170 if (tlb_type == hypervisor) 171 return sun4v_hugepage_shift_to_tte(entry, shift); 172 else 173 return sun4u_hugepage_shift_to_tte(entry, shift); 174 } 175 176 pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, 177 struct page *page, int writeable) 178 { 179 unsigned int shift = huge_page_shift(hstate_vma(vma)); 180 181 return hugepage_shift_to_tte(entry, shift); 182 } 183 184 static unsigned int sun4v_huge_tte_to_shift(pte_t entry) 185 { 186 unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V; 187 unsigned int shift; 188 189 switch (tte_szbits) { 190 case _PAGE_SZ2GB_4V: 191 shift = HPAGE_2GB_SHIFT; 192 break; 193 case _PAGE_SZ256MB_4V: 194 shift = HPAGE_256MB_SHIFT; 195 break; 196 case _PAGE_SZ4MB_4V: 197 shift = REAL_HPAGE_SHIFT; 198 break; 199 case _PAGE_SZ64K_4V: 200 shift = HPAGE_64K_SHIFT; 201 break; 202 default: 203 shift = PAGE_SHIFT; 204 break; 205 } 206 return shift; 207 } 208 209 static unsigned int sun4u_huge_tte_to_shift(pte_t entry) 210 { 211 unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U; 212 unsigned int shift; 213 214 switch (tte_szbits) { 215 case _PAGE_SZ256MB_4U: 216 shift = HPAGE_256MB_SHIFT; 217 break; 218 case _PAGE_SZ4MB_4U: 219 shift = REAL_HPAGE_SHIFT; 220 break; 221 case _PAGE_SZ64K_4U: 222 shift = HPAGE_64K_SHIFT; 223 break; 224 default: 225 shift = PAGE_SHIFT; 226 break; 227 } 228 return shift; 229 } 230 231 static unsigned int huge_tte_to_shift(pte_t entry) 232 { 233 unsigned long shift; 234 235 if (tlb_type == hypervisor) 236 shift = sun4v_huge_tte_to_shift(entry); 237 else 238 shift = sun4u_huge_tte_to_shift(entry); 239 240 if (shift == PAGE_SHIFT) 241 WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n", 242 pte_val(entry)); 243 244 return shift; 245 } 246 247 static unsigned long huge_tte_to_size(pte_t pte) 248 { 249 unsigned long size = 1UL << huge_tte_to_shift(pte); 250 251 if (size == REAL_HPAGE_SIZE) 252 size = HPAGE_SIZE; 253 return size; 254 } 255 256 pte_t *huge_pte_alloc(struct mm_struct *mm, 257 unsigned long addr, unsigned long sz) 258 { 259 pgd_t *pgd; 260 pud_t *pud; 261 pmd_t *pmd; 262 pte_t *pte = NULL; 263 264 pgd = pgd_offset(mm, addr); 265 pud = pud_alloc(mm, pgd, addr); 266 if (pud) { 267 pmd = pmd_alloc(mm, pud, addr); 268 if (!pmd) 269 return NULL; 270 271 if (sz >= PMD_SIZE) 272 pte = (pte_t *)pmd; 273 else 274 pte = pte_alloc_map(mm, pmd, addr); 275 } 276 277 return pte; 278 } 279 280 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 281 { 282 pgd_t *pgd; 283 pud_t *pud; 284 pmd_t *pmd; 285 pte_t *pte = NULL; 286 287 pgd = pgd_offset(mm, addr); 288 if (!pgd_none(*pgd)) { 289 pud = pud_offset(pgd, addr); 290 if (!pud_none(*pud)) { 291 pmd = pmd_offset(pud, addr); 292 if (!pmd_none(*pmd)) { 293 if (is_hugetlb_pmd(*pmd)) 294 pte = (pte_t *)pmd; 295 else 296 pte = pte_offset_map(pmd, addr); 297 } 298 } 299 } 300 301 return pte; 302 } 303 304 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 305 pte_t *ptep, pte_t entry) 306 { 307 unsigned int i, nptes, orig_shift, shift; 308 unsigned long size; 309 pte_t orig; 310 311 size = huge_tte_to_size(entry); 312 shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; 313 nptes = size >> shift; 314 315 if (!pte_present(*ptep) && pte_present(entry)) 316 mm->context.hugetlb_pte_count += nptes; 317 318 addr &= ~(size - 1); 319 orig = *ptep; 320 orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig); 321 322 for (i = 0; i < nptes; i++) 323 ptep[i] = __pte(pte_val(entry) + (i << shift)); 324 325 maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift); 326 /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ 327 if (size == HPAGE_SIZE) 328 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0, 329 orig_shift); 330 } 331 332 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 333 pte_t *ptep) 334 { 335 unsigned int i, nptes, hugepage_shift; 336 unsigned long size; 337 pte_t entry; 338 339 entry = *ptep; 340 size = huge_tte_to_size(entry); 341 if (size >= HPAGE_SIZE) 342 nptes = size >> PMD_SHIFT; 343 else 344 nptes = size >> PAGE_SHIFT; 345 346 hugepage_shift = pte_none(entry) ? PAGE_SHIFT : 347 huge_tte_to_shift(entry); 348 349 if (pte_present(entry)) 350 mm->context.hugetlb_pte_count -= nptes; 351 352 addr &= ~(size - 1); 353 for (i = 0; i < nptes; i++) 354 ptep[i] = __pte(0UL); 355 356 maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); 357 /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ 358 if (size == HPAGE_SIZE) 359 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, 360 hugepage_shift); 361 362 return entry; 363 } 364 365 int pmd_huge(pmd_t pmd) 366 { 367 return !pmd_none(pmd) && 368 (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID; 369 } 370 371 int pud_huge(pud_t pud) 372 { 373 return 0; 374 } 375 376 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, 377 unsigned long addr) 378 { 379 pgtable_t token = pmd_pgtable(*pmd); 380 381 pmd_clear(pmd); 382 pte_free_tlb(tlb, token, addr); 383 atomic_long_dec(&tlb->mm->nr_ptes); 384 } 385 386 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 387 unsigned long addr, unsigned long end, 388 unsigned long floor, unsigned long ceiling) 389 { 390 pmd_t *pmd; 391 unsigned long next; 392 unsigned long start; 393 394 start = addr; 395 pmd = pmd_offset(pud, addr); 396 do { 397 next = pmd_addr_end(addr, end); 398 if (pmd_none(*pmd)) 399 continue; 400 if (is_hugetlb_pmd(*pmd)) 401 pmd_clear(pmd); 402 else 403 hugetlb_free_pte_range(tlb, pmd, addr); 404 } while (pmd++, addr = next, addr != end); 405 406 start &= PUD_MASK; 407 if (start < floor) 408 return; 409 if (ceiling) { 410 ceiling &= PUD_MASK; 411 if (!ceiling) 412 return; 413 } 414 if (end - 1 > ceiling - 1) 415 return; 416 417 pmd = pmd_offset(pud, start); 418 pud_clear(pud); 419 pmd_free_tlb(tlb, pmd, start); 420 mm_dec_nr_pmds(tlb->mm); 421 } 422 423 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 424 unsigned long addr, unsigned long end, 425 unsigned long floor, unsigned long ceiling) 426 { 427 pud_t *pud; 428 unsigned long next; 429 unsigned long start; 430 431 start = addr; 432 pud = pud_offset(pgd, addr); 433 do { 434 next = pud_addr_end(addr, end); 435 if (pud_none_or_clear_bad(pud)) 436 continue; 437 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, 438 ceiling); 439 } while (pud++, addr = next, addr != end); 440 441 start &= PGDIR_MASK; 442 if (start < floor) 443 return; 444 if (ceiling) { 445 ceiling &= PGDIR_MASK; 446 if (!ceiling) 447 return; 448 } 449 if (end - 1 > ceiling - 1) 450 return; 451 452 pud = pud_offset(pgd, start); 453 pgd_clear(pgd); 454 pud_free_tlb(tlb, pud, start); 455 } 456 457 void hugetlb_free_pgd_range(struct mmu_gather *tlb, 458 unsigned long addr, unsigned long end, 459 unsigned long floor, unsigned long ceiling) 460 { 461 pgd_t *pgd; 462 unsigned long next; 463 464 addr &= PMD_MASK; 465 if (addr < floor) { 466 addr += PMD_SIZE; 467 if (!addr) 468 return; 469 } 470 if (ceiling) { 471 ceiling &= PMD_MASK; 472 if (!ceiling) 473 return; 474 } 475 if (end - 1 > ceiling - 1) 476 end -= PMD_SIZE; 477 if (addr > end - 1) 478 return; 479 480 pgd = pgd_offset(tlb->mm, addr); 481 do { 482 next = pgd_addr_end(addr, end); 483 if (pgd_none_or_clear_bad(pgd)) 484 continue; 485 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 486 } while (pgd++, addr = next, addr != end); 487 } 488