1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <asm/page.h> 10 #include <asm/pgtable.h> 11 #include <asm/mmu_context.h> 12 #include <asm/setup.h> 13 #include <asm/tsb.h> 14 #include <asm/tlb.h> 15 #include <asm/oplib.h> 16 17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 18 19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 20 { 21 vaddr >>= hash_shift; 22 return vaddr & (nentries - 1); 23 } 24 25 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 26 { 27 return (tag == (vaddr >> 22)); 28 } 29 30 /* TSB flushes need only occur on the processor initiating the address 31 * space modification, not on each cpu the address space has run on. 32 * Only the TLB flush needs that treatment. 33 */ 34 35 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 36 { 37 unsigned long v; 38 39 for (v = start; v < end; v += PAGE_SIZE) { 40 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 41 KERNEL_TSB_NENTRIES); 42 struct tsb *ent = &swapper_tsb[hash]; 43 44 if (tag_compare(ent->tag, v)) 45 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 46 } 47 } 48 49 static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, 50 unsigned long hash_shift, 51 unsigned long nentries) 52 { 53 unsigned long tag, ent, hash; 54 55 v &= ~0x1UL; 56 hash = tsb_hash(v, hash_shift, nentries); 57 ent = tsb + (hash * sizeof(struct tsb)); 58 tag = (v >> 22UL); 59 60 tsb_flush(ent, tag); 61 } 62 63 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 64 unsigned long tsb, unsigned long nentries) 65 { 66 unsigned long i; 67 68 for (i = 0; i < tb->tlb_nr; i++) 69 __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); 70 } 71 72 void flush_tsb_user(struct tlb_batch *tb) 73 { 74 struct mm_struct *mm = tb->mm; 75 unsigned long nentries, base, flags; 76 77 spin_lock_irqsave(&mm->context.lock, flags); 78 79 if (!tb->huge) { 80 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 81 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 82 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 83 base = __pa(base); 84 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 85 } 86 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 87 if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { 88 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 89 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 90 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 91 base = __pa(base); 92 __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); 93 } 94 #endif 95 spin_unlock_irqrestore(&mm->context.lock, flags); 96 } 97 98 void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge) 99 { 100 unsigned long nentries, base, flags; 101 102 spin_lock_irqsave(&mm->context.lock, flags); 103 104 if (!huge) { 105 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 106 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 107 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 108 base = __pa(base); 109 __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); 110 } 111 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 112 if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { 113 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 114 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 115 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 116 base = __pa(base); 117 __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); 118 } 119 #endif 120 spin_unlock_irqrestore(&mm->context.lock, flags); 121 } 122 123 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 124 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 125 126 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 127 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 128 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 129 #endif 130 131 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 132 { 133 unsigned long tsb_reg, base, tsb_paddr; 134 unsigned long page_sz, tte; 135 136 mm->context.tsb_block[tsb_idx].tsb_nentries = 137 tsb_bytes / sizeof(struct tsb); 138 139 switch (tsb_idx) { 140 case MM_TSB_BASE: 141 base = TSBMAP_8K_BASE; 142 break; 143 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 144 case MM_TSB_HUGE: 145 base = TSBMAP_4M_BASE; 146 break; 147 #endif 148 default: 149 BUG(); 150 } 151 152 tte = pgprot_val(PAGE_KERNEL_LOCKED); 153 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 154 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 155 156 /* Use the smallest page size that can map the whole TSB 157 * in one TLB entry. 158 */ 159 switch (tsb_bytes) { 160 case 8192 << 0: 161 tsb_reg = 0x0UL; 162 #ifdef DCACHE_ALIASING_POSSIBLE 163 base += (tsb_paddr & 8192); 164 #endif 165 page_sz = 8192; 166 break; 167 168 case 8192 << 1: 169 tsb_reg = 0x1UL; 170 page_sz = 64 * 1024; 171 break; 172 173 case 8192 << 2: 174 tsb_reg = 0x2UL; 175 page_sz = 64 * 1024; 176 break; 177 178 case 8192 << 3: 179 tsb_reg = 0x3UL; 180 page_sz = 64 * 1024; 181 break; 182 183 case 8192 << 4: 184 tsb_reg = 0x4UL; 185 page_sz = 512 * 1024; 186 break; 187 188 case 8192 << 5: 189 tsb_reg = 0x5UL; 190 page_sz = 512 * 1024; 191 break; 192 193 case 8192 << 6: 194 tsb_reg = 0x6UL; 195 page_sz = 512 * 1024; 196 break; 197 198 case 8192 << 7: 199 tsb_reg = 0x7UL; 200 page_sz = 4 * 1024 * 1024; 201 break; 202 203 default: 204 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 205 current->comm, current->pid, tsb_bytes); 206 do_exit(SIGSEGV); 207 } 208 tte |= pte_sz_bits(page_sz); 209 210 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 211 /* Physical mapping, no locked TLB entry for TSB. */ 212 tsb_reg |= tsb_paddr; 213 214 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 215 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 216 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 217 } else { 218 tsb_reg |= base; 219 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 220 tte |= (tsb_paddr & ~(page_sz - 1UL)); 221 222 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 223 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 224 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 225 } 226 227 /* Setup the Hypervisor TSB descriptor. */ 228 if (tlb_type == hypervisor) { 229 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 230 231 switch (tsb_idx) { 232 case MM_TSB_BASE: 233 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 234 break; 235 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 236 case MM_TSB_HUGE: 237 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 238 break; 239 #endif 240 default: 241 BUG(); 242 } 243 hp->assoc = 1; 244 hp->num_ttes = tsb_bytes / 16; 245 hp->ctx_idx = 0; 246 switch (tsb_idx) { 247 case MM_TSB_BASE: 248 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 249 break; 250 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 251 case MM_TSB_HUGE: 252 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 253 break; 254 #endif 255 default: 256 BUG(); 257 } 258 hp->tsb_base = tsb_paddr; 259 hp->resv = 0; 260 } 261 } 262 263 struct kmem_cache *pgtable_cache __read_mostly; 264 265 static struct kmem_cache *tsb_caches[8] __read_mostly; 266 267 static const char *tsb_cache_names[8] = { 268 "tsb_8KB", 269 "tsb_16KB", 270 "tsb_32KB", 271 "tsb_64KB", 272 "tsb_128KB", 273 "tsb_256KB", 274 "tsb_512KB", 275 "tsb_1MB", 276 }; 277 278 void __init pgtable_cache_init(void) 279 { 280 unsigned long i; 281 282 pgtable_cache = kmem_cache_create("pgtable_cache", 283 PAGE_SIZE, PAGE_SIZE, 284 0, 285 _clear_page); 286 if (!pgtable_cache) { 287 prom_printf("pgtable_cache_init(): Could not create!\n"); 288 prom_halt(); 289 } 290 291 for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { 292 unsigned long size = 8192 << i; 293 const char *name = tsb_cache_names[i]; 294 295 tsb_caches[i] = kmem_cache_create(name, 296 size, size, 297 0, NULL); 298 if (!tsb_caches[i]) { 299 prom_printf("Could not create %s cache\n", name); 300 prom_halt(); 301 } 302 } 303 } 304 305 int sysctl_tsb_ratio = -2; 306 307 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 308 { 309 unsigned long num_ents = (new_size / sizeof(struct tsb)); 310 311 if (sysctl_tsb_ratio < 0) 312 return num_ents - (num_ents >> -sysctl_tsb_ratio); 313 else 314 return num_ents + (num_ents >> sysctl_tsb_ratio); 315 } 316 317 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 318 * do_sparc64_fault() invokes this routine to try and grow it. 319 * 320 * When we reach the maximum TSB size supported, we stick ~0UL into 321 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 322 * will not trigger any longer. 323 * 324 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 325 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 326 * must be 512K aligned. It also must be physically contiguous, so we 327 * cannot use vmalloc(). 328 * 329 * The idea here is to grow the TSB when the RSS of the process approaches 330 * the number of entries that the current TSB can hold at once. Currently, 331 * we trigger when the RSS hits 3/4 of the TSB capacity. 332 */ 333 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 334 { 335 unsigned long max_tsb_size = 1 * 1024 * 1024; 336 unsigned long new_size, old_size, flags; 337 struct tsb *old_tsb, *new_tsb; 338 unsigned long new_cache_index, old_cache_index; 339 unsigned long new_rss_limit; 340 gfp_t gfp_flags; 341 342 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 343 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 344 345 new_cache_index = 0; 346 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 347 new_rss_limit = tsb_size_to_rss_limit(new_size); 348 if (new_rss_limit > rss) 349 break; 350 new_cache_index++; 351 } 352 353 if (new_size == max_tsb_size) 354 new_rss_limit = ~0UL; 355 356 retry_tsb_alloc: 357 gfp_flags = GFP_KERNEL; 358 if (new_size > (PAGE_SIZE * 2)) 359 gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; 360 361 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 362 gfp_flags, numa_node_id()); 363 if (unlikely(!new_tsb)) { 364 /* Not being able to fork due to a high-order TSB 365 * allocation failure is very bad behavior. Just back 366 * down to a 0-order allocation and force no TSB 367 * growing for this address space. 368 */ 369 if (mm->context.tsb_block[tsb_index].tsb == NULL && 370 new_cache_index > 0) { 371 new_cache_index = 0; 372 new_size = 8192; 373 new_rss_limit = ~0UL; 374 goto retry_tsb_alloc; 375 } 376 377 /* If we failed on a TSB grow, we are under serious 378 * memory pressure so don't try to grow any more. 379 */ 380 if (mm->context.tsb_block[tsb_index].tsb != NULL) 381 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 382 return; 383 } 384 385 /* Mark all tags as invalid. */ 386 tsb_init(new_tsb, new_size); 387 388 /* Ok, we are about to commit the changes. If we are 389 * growing an existing TSB the locking is very tricky, 390 * so WATCH OUT! 391 * 392 * We have to hold mm->context.lock while committing to the 393 * new TSB, this synchronizes us with processors in 394 * flush_tsb_user() and switch_mm() for this address space. 395 * 396 * But even with that lock held, processors run asynchronously 397 * accessing the old TSB via TLB miss handling. This is OK 398 * because those actions are just propagating state from the 399 * Linux page tables into the TSB, page table mappings are not 400 * being changed. If a real fault occurs, the processor will 401 * synchronize with us when it hits flush_tsb_user(), this is 402 * also true for the case where vmscan is modifying the page 403 * tables. The only thing we need to be careful with is to 404 * skip any locked TSB entries during copy_tsb(). 405 * 406 * When we finish committing to the new TSB, we have to drop 407 * the lock and ask all other cpus running this address space 408 * to run tsb_context_switch() to see the new TSB table. 409 */ 410 spin_lock_irqsave(&mm->context.lock, flags); 411 412 old_tsb = mm->context.tsb_block[tsb_index].tsb; 413 old_cache_index = 414 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 415 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 416 sizeof(struct tsb)); 417 418 419 /* Handle multiple threads trying to grow the TSB at the same time. 420 * One will get in here first, and bump the size and the RSS limit. 421 * The others will get in here next and hit this check. 422 */ 423 if (unlikely(old_tsb && 424 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 425 spin_unlock_irqrestore(&mm->context.lock, flags); 426 427 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 428 return; 429 } 430 431 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 432 433 if (old_tsb) { 434 extern void copy_tsb(unsigned long old_tsb_base, 435 unsigned long old_tsb_size, 436 unsigned long new_tsb_base, 437 unsigned long new_tsb_size); 438 unsigned long old_tsb_base = (unsigned long) old_tsb; 439 unsigned long new_tsb_base = (unsigned long) new_tsb; 440 441 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 442 old_tsb_base = __pa(old_tsb_base); 443 new_tsb_base = __pa(new_tsb_base); 444 } 445 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 446 } 447 448 mm->context.tsb_block[tsb_index].tsb = new_tsb; 449 setup_tsb_params(mm, tsb_index, new_size); 450 451 spin_unlock_irqrestore(&mm->context.lock, flags); 452 453 /* If old_tsb is NULL, we're being invoked for the first time 454 * from init_new_context(). 455 */ 456 if (old_tsb) { 457 /* Reload it on the local cpu. */ 458 tsb_context_switch(mm); 459 460 /* Now force other processors to do the same. */ 461 preempt_disable(); 462 smp_tsb_sync(mm); 463 preempt_enable(); 464 465 /* Now it is safe to free the old tsb. */ 466 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 467 } 468 } 469 470 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 471 { 472 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 473 unsigned long huge_pte_count; 474 #endif 475 unsigned int i; 476 477 spin_lock_init(&mm->context.lock); 478 479 mm->context.sparc64_ctx_val = 0UL; 480 481 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 482 /* We reset it to zero because the fork() page copying 483 * will re-increment the counters as the parent PTEs are 484 * copied into the child address space. 485 */ 486 huge_pte_count = mm->context.huge_pte_count; 487 mm->context.huge_pte_count = 0; 488 #endif 489 490 /* copy_mm() copies over the parent's mm_struct before calling 491 * us, so we need to zero out the TSB pointer or else tsb_grow() 492 * will be confused and think there is an older TSB to free up. 493 */ 494 for (i = 0; i < MM_NUM_TSBS; i++) 495 mm->context.tsb_block[i].tsb = NULL; 496 497 /* If this is fork, inherit the parent's TSB size. We would 498 * grow it to that size on the first page fault anyways. 499 */ 500 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 501 502 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 503 if (unlikely(huge_pte_count)) 504 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); 505 #endif 506 507 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 508 return -ENOMEM; 509 510 return 0; 511 } 512 513 static void tsb_destroy_one(struct tsb_config *tp) 514 { 515 unsigned long cache_index; 516 517 if (!tp->tsb) 518 return; 519 cache_index = tp->tsb_reg_val & 0x7UL; 520 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 521 tp->tsb = NULL; 522 tp->tsb_reg_val = 0UL; 523 } 524 525 void destroy_context(struct mm_struct *mm) 526 { 527 unsigned long flags, i; 528 529 for (i = 0; i < MM_NUM_TSBS; i++) 530 tsb_destroy_one(&mm->context.tsb_block[i]); 531 532 spin_lock_irqsave(&ctx_alloc_lock, flags); 533 534 if (CTX_VALID(mm->context)) { 535 unsigned long nr = CTX_NRBITS(mm->context); 536 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 537 } 538 539 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 540 } 541