1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <linux/mm_types.h> 10 11 #include <asm/page.h> 12 #include <asm/pgtable.h> 13 #include <asm/mmu_context.h> 14 #include <asm/setup.h> 15 #include <asm/tsb.h> 16 #include <asm/tlb.h> 17 #include <asm/oplib.h> 18 19 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 20 21 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 22 { 23 vaddr >>= hash_shift; 24 return vaddr & (nentries - 1); 25 } 26 27 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 28 { 29 return (tag == (vaddr >> 22)); 30 } 31 32 static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) 33 { 34 unsigned long idx; 35 36 for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { 37 struct tsb *ent = &swapper_tsb[idx]; 38 unsigned long match = idx << 13; 39 40 match |= (ent->tag << 22); 41 if (match >= start && match < end) 42 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 43 } 44 } 45 46 /* TSB flushes need only occur on the processor initiating the address 47 * space modification, not on each cpu the address space has run on. 48 * Only the TLB flush needs that treatment. 49 */ 50 51 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 52 { 53 unsigned long v; 54 55 if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) 56 return flush_tsb_kernel_range_scan(start, end); 57 58 for (v = start; v < end; v += PAGE_SIZE) { 59 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 60 KERNEL_TSB_NENTRIES); 61 struct tsb *ent = &swapper_tsb[hash]; 62 63 if (tag_compare(ent->tag, v)) 64 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 65 } 66 } 67 68 static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, 69 unsigned long hash_shift, 70 unsigned long nentries) 71 { 72 unsigned long tag, ent, hash; 73 74 v &= ~0x1UL; 75 hash = tsb_hash(v, hash_shift, nentries); 76 ent = tsb + (hash * sizeof(struct tsb)); 77 tag = (v >> 22UL); 78 79 tsb_flush(ent, tag); 80 } 81 82 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 83 unsigned long tsb, unsigned long nentries) 84 { 85 unsigned long i; 86 87 for (i = 0; i < tb->tlb_nr; i++) 88 __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); 89 } 90 91 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 92 static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v, 93 unsigned long hash_shift, 94 unsigned long nentries, 95 unsigned int hugepage_shift) 96 { 97 unsigned int hpage_entries; 98 unsigned int i; 99 100 hpage_entries = 1 << (hugepage_shift - hash_shift); 101 for (i = 0; i < hpage_entries; i++) 102 __flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift, 103 nentries); 104 } 105 106 static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 107 unsigned long tsb, unsigned long nentries, 108 unsigned int hugepage_shift) 109 { 110 unsigned long i; 111 112 for (i = 0; i < tb->tlb_nr; i++) 113 __flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, 114 nentries, hugepage_shift); 115 } 116 #endif 117 118 void flush_tsb_user(struct tlb_batch *tb) 119 { 120 struct mm_struct *mm = tb->mm; 121 unsigned long nentries, base, flags; 122 123 spin_lock_irqsave(&mm->context.lock, flags); 124 125 if (tb->hugepage_shift < REAL_HPAGE_SHIFT) { 126 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 127 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 128 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 129 base = __pa(base); 130 if (tb->hugepage_shift == PAGE_SHIFT) 131 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 132 #if defined(CONFIG_HUGETLB_PAGE) 133 else 134 __flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries, 135 tb->hugepage_shift); 136 #endif 137 } 138 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 139 else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 140 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 141 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 142 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 143 base = __pa(base); 144 __flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries, 145 tb->hugepage_shift); 146 } 147 #endif 148 spin_unlock_irqrestore(&mm->context.lock, flags); 149 } 150 151 void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, 152 unsigned int hugepage_shift) 153 { 154 unsigned long nentries, base, flags; 155 156 spin_lock_irqsave(&mm->context.lock, flags); 157 158 if (hugepage_shift < REAL_HPAGE_SHIFT) { 159 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 160 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 161 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 162 base = __pa(base); 163 if (hugepage_shift == PAGE_SHIFT) 164 __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, 165 nentries); 166 #if defined(CONFIG_HUGETLB_PAGE) 167 else 168 __flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT, 169 nentries, hugepage_shift); 170 #endif 171 } 172 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 173 else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 174 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 175 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 176 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 177 base = __pa(base); 178 __flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, 179 nentries, hugepage_shift); 180 } 181 #endif 182 spin_unlock_irqrestore(&mm->context.lock, flags); 183 } 184 185 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 186 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 187 188 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 189 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 190 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 191 #endif 192 193 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 194 { 195 unsigned long tsb_reg, base, tsb_paddr; 196 unsigned long page_sz, tte; 197 198 mm->context.tsb_block[tsb_idx].tsb_nentries = 199 tsb_bytes / sizeof(struct tsb); 200 201 switch (tsb_idx) { 202 case MM_TSB_BASE: 203 base = TSBMAP_8K_BASE; 204 break; 205 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 206 case MM_TSB_HUGE: 207 base = TSBMAP_4M_BASE; 208 break; 209 #endif 210 default: 211 BUG(); 212 } 213 214 tte = pgprot_val(PAGE_KERNEL_LOCKED); 215 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 216 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 217 218 /* Use the smallest page size that can map the whole TSB 219 * in one TLB entry. 220 */ 221 switch (tsb_bytes) { 222 case 8192 << 0: 223 tsb_reg = 0x0UL; 224 #ifdef DCACHE_ALIASING_POSSIBLE 225 base += (tsb_paddr & 8192); 226 #endif 227 page_sz = 8192; 228 break; 229 230 case 8192 << 1: 231 tsb_reg = 0x1UL; 232 page_sz = 64 * 1024; 233 break; 234 235 case 8192 << 2: 236 tsb_reg = 0x2UL; 237 page_sz = 64 * 1024; 238 break; 239 240 case 8192 << 3: 241 tsb_reg = 0x3UL; 242 page_sz = 64 * 1024; 243 break; 244 245 case 8192 << 4: 246 tsb_reg = 0x4UL; 247 page_sz = 512 * 1024; 248 break; 249 250 case 8192 << 5: 251 tsb_reg = 0x5UL; 252 page_sz = 512 * 1024; 253 break; 254 255 case 8192 << 6: 256 tsb_reg = 0x6UL; 257 page_sz = 512 * 1024; 258 break; 259 260 case 8192 << 7: 261 tsb_reg = 0x7UL; 262 page_sz = 4 * 1024 * 1024; 263 break; 264 265 default: 266 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 267 current->comm, current->pid, tsb_bytes); 268 do_exit(SIGSEGV); 269 } 270 tte |= pte_sz_bits(page_sz); 271 272 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 273 /* Physical mapping, no locked TLB entry for TSB. */ 274 tsb_reg |= tsb_paddr; 275 276 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 277 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 278 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 279 } else { 280 tsb_reg |= base; 281 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 282 tte |= (tsb_paddr & ~(page_sz - 1UL)); 283 284 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 285 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 286 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 287 } 288 289 /* Setup the Hypervisor TSB descriptor. */ 290 if (tlb_type == hypervisor) { 291 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 292 293 switch (tsb_idx) { 294 case MM_TSB_BASE: 295 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 296 break; 297 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 298 case MM_TSB_HUGE: 299 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 300 break; 301 #endif 302 default: 303 BUG(); 304 } 305 hp->assoc = 1; 306 hp->num_ttes = tsb_bytes / 16; 307 hp->ctx_idx = 0; 308 switch (tsb_idx) { 309 case MM_TSB_BASE: 310 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 311 break; 312 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 313 case MM_TSB_HUGE: 314 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 315 break; 316 #endif 317 default: 318 BUG(); 319 } 320 hp->tsb_base = tsb_paddr; 321 hp->resv = 0; 322 } 323 } 324 325 struct kmem_cache *pgtable_cache __read_mostly; 326 327 static struct kmem_cache *tsb_caches[8] __read_mostly; 328 329 static const char *tsb_cache_names[8] = { 330 "tsb_8KB", 331 "tsb_16KB", 332 "tsb_32KB", 333 "tsb_64KB", 334 "tsb_128KB", 335 "tsb_256KB", 336 "tsb_512KB", 337 "tsb_1MB", 338 }; 339 340 void __init pgtable_cache_init(void) 341 { 342 unsigned long i; 343 344 pgtable_cache = kmem_cache_create("pgtable_cache", 345 PAGE_SIZE, PAGE_SIZE, 346 0, 347 _clear_page); 348 if (!pgtable_cache) { 349 prom_printf("pgtable_cache_init(): Could not create!\n"); 350 prom_halt(); 351 } 352 353 for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { 354 unsigned long size = 8192 << i; 355 const char *name = tsb_cache_names[i]; 356 357 tsb_caches[i] = kmem_cache_create(name, 358 size, size, 359 0, NULL); 360 if (!tsb_caches[i]) { 361 prom_printf("Could not create %s cache\n", name); 362 prom_halt(); 363 } 364 } 365 } 366 367 int sysctl_tsb_ratio = -2; 368 369 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 370 { 371 unsigned long num_ents = (new_size / sizeof(struct tsb)); 372 373 if (sysctl_tsb_ratio < 0) 374 return num_ents - (num_ents >> -sysctl_tsb_ratio); 375 else 376 return num_ents + (num_ents >> sysctl_tsb_ratio); 377 } 378 379 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 380 * do_sparc64_fault() invokes this routine to try and grow it. 381 * 382 * When we reach the maximum TSB size supported, we stick ~0UL into 383 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 384 * will not trigger any longer. 385 * 386 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 387 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 388 * must be 512K aligned. It also must be physically contiguous, so we 389 * cannot use vmalloc(). 390 * 391 * The idea here is to grow the TSB when the RSS of the process approaches 392 * the number of entries that the current TSB can hold at once. Currently, 393 * we trigger when the RSS hits 3/4 of the TSB capacity. 394 */ 395 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 396 { 397 unsigned long max_tsb_size = 1 * 1024 * 1024; 398 unsigned long new_size, old_size, flags; 399 struct tsb *old_tsb, *new_tsb; 400 unsigned long new_cache_index, old_cache_index; 401 unsigned long new_rss_limit; 402 gfp_t gfp_flags; 403 404 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 405 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 406 407 new_cache_index = 0; 408 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 409 new_rss_limit = tsb_size_to_rss_limit(new_size); 410 if (new_rss_limit > rss) 411 break; 412 new_cache_index++; 413 } 414 415 if (new_size == max_tsb_size) 416 new_rss_limit = ~0UL; 417 418 retry_tsb_alloc: 419 gfp_flags = GFP_KERNEL; 420 if (new_size > (PAGE_SIZE * 2)) 421 gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; 422 423 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 424 gfp_flags, numa_node_id()); 425 if (unlikely(!new_tsb)) { 426 /* Not being able to fork due to a high-order TSB 427 * allocation failure is very bad behavior. Just back 428 * down to a 0-order allocation and force no TSB 429 * growing for this address space. 430 */ 431 if (mm->context.tsb_block[tsb_index].tsb == NULL && 432 new_cache_index > 0) { 433 new_cache_index = 0; 434 new_size = 8192; 435 new_rss_limit = ~0UL; 436 goto retry_tsb_alloc; 437 } 438 439 /* If we failed on a TSB grow, we are under serious 440 * memory pressure so don't try to grow any more. 441 */ 442 if (mm->context.tsb_block[tsb_index].tsb != NULL) 443 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 444 return; 445 } 446 447 /* Mark all tags as invalid. */ 448 tsb_init(new_tsb, new_size); 449 450 /* Ok, we are about to commit the changes. If we are 451 * growing an existing TSB the locking is very tricky, 452 * so WATCH OUT! 453 * 454 * We have to hold mm->context.lock while committing to the 455 * new TSB, this synchronizes us with processors in 456 * flush_tsb_user() and switch_mm() for this address space. 457 * 458 * But even with that lock held, processors run asynchronously 459 * accessing the old TSB via TLB miss handling. This is OK 460 * because those actions are just propagating state from the 461 * Linux page tables into the TSB, page table mappings are not 462 * being changed. If a real fault occurs, the processor will 463 * synchronize with us when it hits flush_tsb_user(), this is 464 * also true for the case where vmscan is modifying the page 465 * tables. The only thing we need to be careful with is to 466 * skip any locked TSB entries during copy_tsb(). 467 * 468 * When we finish committing to the new TSB, we have to drop 469 * the lock and ask all other cpus running this address space 470 * to run tsb_context_switch() to see the new TSB table. 471 */ 472 spin_lock_irqsave(&mm->context.lock, flags); 473 474 old_tsb = mm->context.tsb_block[tsb_index].tsb; 475 old_cache_index = 476 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 477 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 478 sizeof(struct tsb)); 479 480 481 /* Handle multiple threads trying to grow the TSB at the same time. 482 * One will get in here first, and bump the size and the RSS limit. 483 * The others will get in here next and hit this check. 484 */ 485 if (unlikely(old_tsb && 486 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 487 spin_unlock_irqrestore(&mm->context.lock, flags); 488 489 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 490 return; 491 } 492 493 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 494 495 if (old_tsb) { 496 extern void copy_tsb(unsigned long old_tsb_base, 497 unsigned long old_tsb_size, 498 unsigned long new_tsb_base, 499 unsigned long new_tsb_size, 500 unsigned long page_size_shift); 501 unsigned long old_tsb_base = (unsigned long) old_tsb; 502 unsigned long new_tsb_base = (unsigned long) new_tsb; 503 504 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 505 old_tsb_base = __pa(old_tsb_base); 506 new_tsb_base = __pa(new_tsb_base); 507 } 508 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size, 509 tsb_index == MM_TSB_BASE ? 510 PAGE_SHIFT : REAL_HPAGE_SHIFT); 511 } 512 513 mm->context.tsb_block[tsb_index].tsb = new_tsb; 514 setup_tsb_params(mm, tsb_index, new_size); 515 516 spin_unlock_irqrestore(&mm->context.lock, flags); 517 518 /* If old_tsb is NULL, we're being invoked for the first time 519 * from init_new_context(). 520 */ 521 if (old_tsb) { 522 /* Reload it on the local cpu. */ 523 tsb_context_switch(mm); 524 525 /* Now force other processors to do the same. */ 526 preempt_disable(); 527 smp_tsb_sync(mm); 528 preempt_enable(); 529 530 /* Now it is safe to free the old tsb. */ 531 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 532 } 533 } 534 535 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 536 { 537 unsigned long mm_rss = get_mm_rss(mm); 538 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 539 unsigned long saved_hugetlb_pte_count; 540 unsigned long saved_thp_pte_count; 541 #endif 542 unsigned int i; 543 544 spin_lock_init(&mm->context.lock); 545 546 mm->context.sparc64_ctx_val = 0UL; 547 548 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 549 /* We reset them to zero because the fork() page copying 550 * will re-increment the counters as the parent PTEs are 551 * copied into the child address space. 552 */ 553 saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; 554 saved_thp_pte_count = mm->context.thp_pte_count; 555 mm->context.hugetlb_pte_count = 0; 556 mm->context.thp_pte_count = 0; 557 558 mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); 559 #endif 560 561 /* copy_mm() copies over the parent's mm_struct before calling 562 * us, so we need to zero out the TSB pointer or else tsb_grow() 563 * will be confused and think there is an older TSB to free up. 564 */ 565 for (i = 0; i < MM_NUM_TSBS; i++) 566 mm->context.tsb_block[i].tsb = NULL; 567 568 /* If this is fork, inherit the parent's TSB size. We would 569 * grow it to that size on the first page fault anyways. 570 */ 571 tsb_grow(mm, MM_TSB_BASE, mm_rss); 572 573 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 574 if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) 575 tsb_grow(mm, MM_TSB_HUGE, 576 (saved_hugetlb_pte_count + saved_thp_pte_count) * 577 REAL_HPAGE_PER_HPAGE); 578 #endif 579 580 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 581 return -ENOMEM; 582 583 return 0; 584 } 585 586 static void tsb_destroy_one(struct tsb_config *tp) 587 { 588 unsigned long cache_index; 589 590 if (!tp->tsb) 591 return; 592 cache_index = tp->tsb_reg_val & 0x7UL; 593 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 594 tp->tsb = NULL; 595 tp->tsb_reg_val = 0UL; 596 } 597 598 void destroy_context(struct mm_struct *mm) 599 { 600 unsigned long flags, i; 601 602 for (i = 0; i < MM_NUM_TSBS; i++) 603 tsb_destroy_one(&mm->context.tsb_block[i]); 604 605 spin_lock_irqsave(&ctx_alloc_lock, flags); 606 607 if (CTX_VALID(mm->context)) { 608 unsigned long nr = CTX_NRBITS(mm->context); 609 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 610 } 611 612 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 613 } 614