1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <asm/page.h> 10 #include <asm/pgtable.h> 11 #include <asm/mmu_context.h> 12 #include <asm/setup.h> 13 #include <asm/tsb.h> 14 #include <asm/tlb.h> 15 #include <asm/oplib.h> 16 17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 18 19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 20 { 21 vaddr >>= hash_shift; 22 return vaddr & (nentries - 1); 23 } 24 25 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 26 { 27 return (tag == (vaddr >> 22)); 28 } 29 30 static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) 31 { 32 unsigned long idx; 33 34 for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { 35 struct tsb *ent = &swapper_tsb[idx]; 36 unsigned long match = idx << 13; 37 38 match |= (ent->tag << 22); 39 if (match >= start && match < end) 40 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 41 } 42 } 43 44 /* TSB flushes need only occur on the processor initiating the address 45 * space modification, not on each cpu the address space has run on. 46 * Only the TLB flush needs that treatment. 47 */ 48 49 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 50 { 51 unsigned long v; 52 53 if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) 54 return flush_tsb_kernel_range_scan(start, end); 55 56 for (v = start; v < end; v += PAGE_SIZE) { 57 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 58 KERNEL_TSB_NENTRIES); 59 struct tsb *ent = &swapper_tsb[hash]; 60 61 if (tag_compare(ent->tag, v)) 62 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 63 } 64 } 65 66 static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, 67 unsigned long hash_shift, 68 unsigned long nentries) 69 { 70 unsigned long tag, ent, hash; 71 72 v &= ~0x1UL; 73 hash = tsb_hash(v, hash_shift, nentries); 74 ent = tsb + (hash * sizeof(struct tsb)); 75 tag = (v >> 22UL); 76 77 tsb_flush(ent, tag); 78 } 79 80 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 81 unsigned long tsb, unsigned long nentries) 82 { 83 unsigned long i; 84 85 for (i = 0; i < tb->tlb_nr; i++) 86 __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); 87 } 88 89 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 90 static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v, 91 unsigned long hash_shift, 92 unsigned long nentries, 93 unsigned int hugepage_shift) 94 { 95 unsigned int hpage_entries; 96 unsigned int i; 97 98 hpage_entries = 1 << (hugepage_shift - hash_shift); 99 for (i = 0; i < hpage_entries; i++) 100 __flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift, 101 nentries); 102 } 103 104 static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 105 unsigned long tsb, unsigned long nentries, 106 unsigned int hugepage_shift) 107 { 108 unsigned long i; 109 110 for (i = 0; i < tb->tlb_nr; i++) 111 __flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, 112 nentries, hugepage_shift); 113 } 114 #endif 115 116 void flush_tsb_user(struct tlb_batch *tb) 117 { 118 struct mm_struct *mm = tb->mm; 119 unsigned long nentries, base, flags; 120 121 spin_lock_irqsave(&mm->context.lock, flags); 122 123 if (tb->hugepage_shift < HPAGE_SHIFT) { 124 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 125 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 126 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 127 base = __pa(base); 128 if (tb->hugepage_shift == PAGE_SHIFT) 129 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 130 #if defined(CONFIG_HUGETLB_PAGE) 131 else 132 __flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries, 133 tb->hugepage_shift); 134 #endif 135 } 136 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 137 else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 138 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 139 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 140 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 141 base = __pa(base); 142 __flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries, 143 tb->hugepage_shift); 144 } 145 #endif 146 spin_unlock_irqrestore(&mm->context.lock, flags); 147 } 148 149 void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, 150 unsigned int hugepage_shift) 151 { 152 unsigned long nentries, base, flags; 153 154 spin_lock_irqsave(&mm->context.lock, flags); 155 156 if (hugepage_shift < HPAGE_SHIFT) { 157 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 158 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 159 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 160 base = __pa(base); 161 if (hugepage_shift == PAGE_SHIFT) 162 __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, 163 nentries); 164 #if defined(CONFIG_HUGETLB_PAGE) 165 else 166 __flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT, 167 nentries, hugepage_shift); 168 #endif 169 } 170 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 171 else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 172 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 173 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 174 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 175 base = __pa(base); 176 __flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, 177 nentries, hugepage_shift); 178 } 179 #endif 180 spin_unlock_irqrestore(&mm->context.lock, flags); 181 } 182 183 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 184 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 185 186 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 187 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 188 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 189 #endif 190 191 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 192 { 193 unsigned long tsb_reg, base, tsb_paddr; 194 unsigned long page_sz, tte; 195 196 mm->context.tsb_block[tsb_idx].tsb_nentries = 197 tsb_bytes / sizeof(struct tsb); 198 199 switch (tsb_idx) { 200 case MM_TSB_BASE: 201 base = TSBMAP_8K_BASE; 202 break; 203 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 204 case MM_TSB_HUGE: 205 base = TSBMAP_4M_BASE; 206 break; 207 #endif 208 default: 209 BUG(); 210 } 211 212 tte = pgprot_val(PAGE_KERNEL_LOCKED); 213 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 214 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 215 216 /* Use the smallest page size that can map the whole TSB 217 * in one TLB entry. 218 */ 219 switch (tsb_bytes) { 220 case 8192 << 0: 221 tsb_reg = 0x0UL; 222 #ifdef DCACHE_ALIASING_POSSIBLE 223 base += (tsb_paddr & 8192); 224 #endif 225 page_sz = 8192; 226 break; 227 228 case 8192 << 1: 229 tsb_reg = 0x1UL; 230 page_sz = 64 * 1024; 231 break; 232 233 case 8192 << 2: 234 tsb_reg = 0x2UL; 235 page_sz = 64 * 1024; 236 break; 237 238 case 8192 << 3: 239 tsb_reg = 0x3UL; 240 page_sz = 64 * 1024; 241 break; 242 243 case 8192 << 4: 244 tsb_reg = 0x4UL; 245 page_sz = 512 * 1024; 246 break; 247 248 case 8192 << 5: 249 tsb_reg = 0x5UL; 250 page_sz = 512 * 1024; 251 break; 252 253 case 8192 << 6: 254 tsb_reg = 0x6UL; 255 page_sz = 512 * 1024; 256 break; 257 258 case 8192 << 7: 259 tsb_reg = 0x7UL; 260 page_sz = 4 * 1024 * 1024; 261 break; 262 263 default: 264 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 265 current->comm, current->pid, tsb_bytes); 266 do_exit(SIGSEGV); 267 } 268 tte |= pte_sz_bits(page_sz); 269 270 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 271 /* Physical mapping, no locked TLB entry for TSB. */ 272 tsb_reg |= tsb_paddr; 273 274 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 275 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 276 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 277 } else { 278 tsb_reg |= base; 279 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 280 tte |= (tsb_paddr & ~(page_sz - 1UL)); 281 282 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 283 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 284 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 285 } 286 287 /* Setup the Hypervisor TSB descriptor. */ 288 if (tlb_type == hypervisor) { 289 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 290 291 switch (tsb_idx) { 292 case MM_TSB_BASE: 293 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 294 break; 295 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 296 case MM_TSB_HUGE: 297 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 298 break; 299 #endif 300 default: 301 BUG(); 302 } 303 hp->assoc = 1; 304 hp->num_ttes = tsb_bytes / 16; 305 hp->ctx_idx = 0; 306 switch (tsb_idx) { 307 case MM_TSB_BASE: 308 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 309 break; 310 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 311 case MM_TSB_HUGE: 312 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 313 break; 314 #endif 315 default: 316 BUG(); 317 } 318 hp->tsb_base = tsb_paddr; 319 hp->resv = 0; 320 } 321 } 322 323 struct kmem_cache *pgtable_cache __read_mostly; 324 325 static struct kmem_cache *tsb_caches[8] __read_mostly; 326 327 static const char *tsb_cache_names[8] = { 328 "tsb_8KB", 329 "tsb_16KB", 330 "tsb_32KB", 331 "tsb_64KB", 332 "tsb_128KB", 333 "tsb_256KB", 334 "tsb_512KB", 335 "tsb_1MB", 336 }; 337 338 void __init pgtable_cache_init(void) 339 { 340 unsigned long i; 341 342 pgtable_cache = kmem_cache_create("pgtable_cache", 343 PAGE_SIZE, PAGE_SIZE, 344 0, 345 _clear_page); 346 if (!pgtable_cache) { 347 prom_printf("pgtable_cache_init(): Could not create!\n"); 348 prom_halt(); 349 } 350 351 for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { 352 unsigned long size = 8192 << i; 353 const char *name = tsb_cache_names[i]; 354 355 tsb_caches[i] = kmem_cache_create(name, 356 size, size, 357 0, NULL); 358 if (!tsb_caches[i]) { 359 prom_printf("Could not create %s cache\n", name); 360 prom_halt(); 361 } 362 } 363 } 364 365 int sysctl_tsb_ratio = -2; 366 367 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 368 { 369 unsigned long num_ents = (new_size / sizeof(struct tsb)); 370 371 if (sysctl_tsb_ratio < 0) 372 return num_ents - (num_ents >> -sysctl_tsb_ratio); 373 else 374 return num_ents + (num_ents >> sysctl_tsb_ratio); 375 } 376 377 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 378 * do_sparc64_fault() invokes this routine to try and grow it. 379 * 380 * When we reach the maximum TSB size supported, we stick ~0UL into 381 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 382 * will not trigger any longer. 383 * 384 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 385 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 386 * must be 512K aligned. It also must be physically contiguous, so we 387 * cannot use vmalloc(). 388 * 389 * The idea here is to grow the TSB when the RSS of the process approaches 390 * the number of entries that the current TSB can hold at once. Currently, 391 * we trigger when the RSS hits 3/4 of the TSB capacity. 392 */ 393 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 394 { 395 unsigned long max_tsb_size = 1 * 1024 * 1024; 396 unsigned long new_size, old_size, flags; 397 struct tsb *old_tsb, *new_tsb; 398 unsigned long new_cache_index, old_cache_index; 399 unsigned long new_rss_limit; 400 gfp_t gfp_flags; 401 402 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 403 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 404 405 new_cache_index = 0; 406 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 407 new_rss_limit = tsb_size_to_rss_limit(new_size); 408 if (new_rss_limit > rss) 409 break; 410 new_cache_index++; 411 } 412 413 if (new_size == max_tsb_size) 414 new_rss_limit = ~0UL; 415 416 retry_tsb_alloc: 417 gfp_flags = GFP_KERNEL; 418 if (new_size > (PAGE_SIZE * 2)) 419 gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; 420 421 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 422 gfp_flags, numa_node_id()); 423 if (unlikely(!new_tsb)) { 424 /* Not being able to fork due to a high-order TSB 425 * allocation failure is very bad behavior. Just back 426 * down to a 0-order allocation and force no TSB 427 * growing for this address space. 428 */ 429 if (mm->context.tsb_block[tsb_index].tsb == NULL && 430 new_cache_index > 0) { 431 new_cache_index = 0; 432 new_size = 8192; 433 new_rss_limit = ~0UL; 434 goto retry_tsb_alloc; 435 } 436 437 /* If we failed on a TSB grow, we are under serious 438 * memory pressure so don't try to grow any more. 439 */ 440 if (mm->context.tsb_block[tsb_index].tsb != NULL) 441 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 442 return; 443 } 444 445 /* Mark all tags as invalid. */ 446 tsb_init(new_tsb, new_size); 447 448 /* Ok, we are about to commit the changes. If we are 449 * growing an existing TSB the locking is very tricky, 450 * so WATCH OUT! 451 * 452 * We have to hold mm->context.lock while committing to the 453 * new TSB, this synchronizes us with processors in 454 * flush_tsb_user() and switch_mm() for this address space. 455 * 456 * But even with that lock held, processors run asynchronously 457 * accessing the old TSB via TLB miss handling. This is OK 458 * because those actions are just propagating state from the 459 * Linux page tables into the TSB, page table mappings are not 460 * being changed. If a real fault occurs, the processor will 461 * synchronize with us when it hits flush_tsb_user(), this is 462 * also true for the case where vmscan is modifying the page 463 * tables. The only thing we need to be careful with is to 464 * skip any locked TSB entries during copy_tsb(). 465 * 466 * When we finish committing to the new TSB, we have to drop 467 * the lock and ask all other cpus running this address space 468 * to run tsb_context_switch() to see the new TSB table. 469 */ 470 spin_lock_irqsave(&mm->context.lock, flags); 471 472 old_tsb = mm->context.tsb_block[tsb_index].tsb; 473 old_cache_index = 474 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 475 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 476 sizeof(struct tsb)); 477 478 479 /* Handle multiple threads trying to grow the TSB at the same time. 480 * One will get in here first, and bump the size and the RSS limit. 481 * The others will get in here next and hit this check. 482 */ 483 if (unlikely(old_tsb && 484 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 485 spin_unlock_irqrestore(&mm->context.lock, flags); 486 487 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 488 return; 489 } 490 491 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 492 493 if (old_tsb) { 494 extern void copy_tsb(unsigned long old_tsb_base, 495 unsigned long old_tsb_size, 496 unsigned long new_tsb_base, 497 unsigned long new_tsb_size); 498 unsigned long old_tsb_base = (unsigned long) old_tsb; 499 unsigned long new_tsb_base = (unsigned long) new_tsb; 500 501 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 502 old_tsb_base = __pa(old_tsb_base); 503 new_tsb_base = __pa(new_tsb_base); 504 } 505 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 506 } 507 508 mm->context.tsb_block[tsb_index].tsb = new_tsb; 509 setup_tsb_params(mm, tsb_index, new_size); 510 511 spin_unlock_irqrestore(&mm->context.lock, flags); 512 513 /* If old_tsb is NULL, we're being invoked for the first time 514 * from init_new_context(). 515 */ 516 if (old_tsb) { 517 /* Reload it on the local cpu. */ 518 tsb_context_switch(mm); 519 520 /* Now force other processors to do the same. */ 521 preempt_disable(); 522 smp_tsb_sync(mm); 523 preempt_enable(); 524 525 /* Now it is safe to free the old tsb. */ 526 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 527 } 528 } 529 530 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 531 { 532 unsigned long mm_rss = get_mm_rss(mm); 533 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 534 unsigned long saved_hugetlb_pte_count; 535 unsigned long saved_thp_pte_count; 536 #endif 537 unsigned int i; 538 539 spin_lock_init(&mm->context.lock); 540 541 mm->context.sparc64_ctx_val = 0UL; 542 543 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 544 /* We reset them to zero because the fork() page copying 545 * will re-increment the counters as the parent PTEs are 546 * copied into the child address space. 547 */ 548 saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; 549 saved_thp_pte_count = mm->context.thp_pte_count; 550 mm->context.hugetlb_pte_count = 0; 551 mm->context.thp_pte_count = 0; 552 553 mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); 554 #endif 555 556 /* copy_mm() copies over the parent's mm_struct before calling 557 * us, so we need to zero out the TSB pointer or else tsb_grow() 558 * will be confused and think there is an older TSB to free up. 559 */ 560 for (i = 0; i < MM_NUM_TSBS; i++) 561 mm->context.tsb_block[i].tsb = NULL; 562 563 /* If this is fork, inherit the parent's TSB size. We would 564 * grow it to that size on the first page fault anyways. 565 */ 566 tsb_grow(mm, MM_TSB_BASE, mm_rss); 567 568 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 569 if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) 570 tsb_grow(mm, MM_TSB_HUGE, 571 (saved_hugetlb_pte_count + saved_thp_pte_count) * 572 REAL_HPAGE_PER_HPAGE); 573 #endif 574 575 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 576 return -ENOMEM; 577 578 return 0; 579 } 580 581 static void tsb_destroy_one(struct tsb_config *tp) 582 { 583 unsigned long cache_index; 584 585 if (!tp->tsb) 586 return; 587 cache_index = tp->tsb_reg_val & 0x7UL; 588 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 589 tp->tsb = NULL; 590 tp->tsb_reg_val = 0UL; 591 } 592 593 void destroy_context(struct mm_struct *mm) 594 { 595 unsigned long flags, i; 596 597 for (i = 0; i < MM_NUM_TSBS; i++) 598 tsb_destroy_one(&mm->context.tsb_block[i]); 599 600 spin_lock_irqsave(&ctx_alloc_lock, flags); 601 602 if (CTX_VALID(mm->context)) { 603 unsigned long nr = CTX_NRBITS(mm->context); 604 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 605 } 606 607 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 608 } 609