1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <asm/page.h> 10 #include <asm/pgtable.h> 11 #include <asm/mmu_context.h> 12 #include <asm/tsb.h> 13 #include <asm/tlb.h> 14 #include <asm/oplib.h> 15 16 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 17 18 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 19 { 20 vaddr >>= hash_shift; 21 return vaddr & (nentries - 1); 22 } 23 24 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 25 { 26 return (tag == (vaddr >> 22)); 27 } 28 29 /* TSB flushes need only occur on the processor initiating the address 30 * space modification, not on each cpu the address space has run on. 31 * Only the TLB flush needs that treatment. 32 */ 33 34 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 35 { 36 unsigned long v; 37 38 for (v = start; v < end; v += PAGE_SIZE) { 39 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 40 KERNEL_TSB_NENTRIES); 41 struct tsb *ent = &swapper_tsb[hash]; 42 43 if (tag_compare(ent->tag, v)) 44 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 45 } 46 } 47 48 static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, 49 unsigned long hash_shift, 50 unsigned long nentries) 51 { 52 unsigned long tag, ent, hash; 53 54 v &= ~0x1UL; 55 hash = tsb_hash(v, hash_shift, nentries); 56 ent = tsb + (hash * sizeof(struct tsb)); 57 tag = (v >> 22UL); 58 59 tsb_flush(ent, tag); 60 } 61 62 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 63 unsigned long tsb, unsigned long nentries) 64 { 65 unsigned long i; 66 67 for (i = 0; i < tb->tlb_nr; i++) 68 __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); 69 } 70 71 void flush_tsb_user(struct tlb_batch *tb) 72 { 73 struct mm_struct *mm = tb->mm; 74 unsigned long nentries, base, flags; 75 76 spin_lock_irqsave(&mm->context.lock, flags); 77 78 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 79 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 80 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 81 base = __pa(base); 82 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 83 84 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 85 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 86 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 87 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 88 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 89 base = __pa(base); 90 __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); 91 } 92 #endif 93 spin_unlock_irqrestore(&mm->context.lock, flags); 94 } 95 96 void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) 97 { 98 unsigned long nentries, base, flags; 99 100 spin_lock_irqsave(&mm->context.lock, flags); 101 102 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 103 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 104 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 105 base = __pa(base); 106 __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); 107 108 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 109 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 110 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 111 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 112 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 113 base = __pa(base); 114 __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); 115 } 116 #endif 117 spin_unlock_irqrestore(&mm->context.lock, flags); 118 } 119 120 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 121 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 122 123 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 124 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 125 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 126 #endif 127 128 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 129 { 130 unsigned long tsb_reg, base, tsb_paddr; 131 unsigned long page_sz, tte; 132 133 mm->context.tsb_block[tsb_idx].tsb_nentries = 134 tsb_bytes / sizeof(struct tsb); 135 136 base = TSBMAP_BASE; 137 tte = pgprot_val(PAGE_KERNEL_LOCKED); 138 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 139 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 140 141 /* Use the smallest page size that can map the whole TSB 142 * in one TLB entry. 143 */ 144 switch (tsb_bytes) { 145 case 8192 << 0: 146 tsb_reg = 0x0UL; 147 #ifdef DCACHE_ALIASING_POSSIBLE 148 base += (tsb_paddr & 8192); 149 #endif 150 page_sz = 8192; 151 break; 152 153 case 8192 << 1: 154 tsb_reg = 0x1UL; 155 page_sz = 64 * 1024; 156 break; 157 158 case 8192 << 2: 159 tsb_reg = 0x2UL; 160 page_sz = 64 * 1024; 161 break; 162 163 case 8192 << 3: 164 tsb_reg = 0x3UL; 165 page_sz = 64 * 1024; 166 break; 167 168 case 8192 << 4: 169 tsb_reg = 0x4UL; 170 page_sz = 512 * 1024; 171 break; 172 173 case 8192 << 5: 174 tsb_reg = 0x5UL; 175 page_sz = 512 * 1024; 176 break; 177 178 case 8192 << 6: 179 tsb_reg = 0x6UL; 180 page_sz = 512 * 1024; 181 break; 182 183 case 8192 << 7: 184 tsb_reg = 0x7UL; 185 page_sz = 4 * 1024 * 1024; 186 break; 187 188 default: 189 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 190 current->comm, current->pid, tsb_bytes); 191 do_exit(SIGSEGV); 192 } 193 tte |= pte_sz_bits(page_sz); 194 195 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 196 /* Physical mapping, no locked TLB entry for TSB. */ 197 tsb_reg |= tsb_paddr; 198 199 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 200 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 201 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 202 } else { 203 tsb_reg |= base; 204 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 205 tte |= (tsb_paddr & ~(page_sz - 1UL)); 206 207 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 208 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 209 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 210 } 211 212 /* Setup the Hypervisor TSB descriptor. */ 213 if (tlb_type == hypervisor) { 214 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 215 216 switch (tsb_idx) { 217 case MM_TSB_BASE: 218 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 219 break; 220 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 221 case MM_TSB_HUGE: 222 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 223 break; 224 #endif 225 default: 226 BUG(); 227 } 228 hp->assoc = 1; 229 hp->num_ttes = tsb_bytes / 16; 230 hp->ctx_idx = 0; 231 switch (tsb_idx) { 232 case MM_TSB_BASE: 233 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 234 break; 235 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 236 case MM_TSB_HUGE: 237 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 238 break; 239 #endif 240 default: 241 BUG(); 242 } 243 hp->tsb_base = tsb_paddr; 244 hp->resv = 0; 245 } 246 } 247 248 struct kmem_cache *pgtable_cache __read_mostly; 249 250 static struct kmem_cache *tsb_caches[8] __read_mostly; 251 252 static const char *tsb_cache_names[8] = { 253 "tsb_8KB", 254 "tsb_16KB", 255 "tsb_32KB", 256 "tsb_64KB", 257 "tsb_128KB", 258 "tsb_256KB", 259 "tsb_512KB", 260 "tsb_1MB", 261 }; 262 263 void __init pgtable_cache_init(void) 264 { 265 unsigned long i; 266 267 pgtable_cache = kmem_cache_create("pgtable_cache", 268 PAGE_SIZE, PAGE_SIZE, 269 0, 270 _clear_page); 271 if (!pgtable_cache) { 272 prom_printf("pgtable_cache_init(): Could not create!\n"); 273 prom_halt(); 274 } 275 276 for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { 277 unsigned long size = 8192 << i; 278 const char *name = tsb_cache_names[i]; 279 280 tsb_caches[i] = kmem_cache_create(name, 281 size, size, 282 0, NULL); 283 if (!tsb_caches[i]) { 284 prom_printf("Could not create %s cache\n", name); 285 prom_halt(); 286 } 287 } 288 } 289 290 int sysctl_tsb_ratio = -2; 291 292 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 293 { 294 unsigned long num_ents = (new_size / sizeof(struct tsb)); 295 296 if (sysctl_tsb_ratio < 0) 297 return num_ents - (num_ents >> -sysctl_tsb_ratio); 298 else 299 return num_ents + (num_ents >> sysctl_tsb_ratio); 300 } 301 302 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 303 * do_sparc64_fault() invokes this routine to try and grow it. 304 * 305 * When we reach the maximum TSB size supported, we stick ~0UL into 306 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 307 * will not trigger any longer. 308 * 309 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 310 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 311 * must be 512K aligned. It also must be physically contiguous, so we 312 * cannot use vmalloc(). 313 * 314 * The idea here is to grow the TSB when the RSS of the process approaches 315 * the number of entries that the current TSB can hold at once. Currently, 316 * we trigger when the RSS hits 3/4 of the TSB capacity. 317 */ 318 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 319 { 320 unsigned long max_tsb_size = 1 * 1024 * 1024; 321 unsigned long new_size, old_size, flags; 322 struct tsb *old_tsb, *new_tsb; 323 unsigned long new_cache_index, old_cache_index; 324 unsigned long new_rss_limit; 325 gfp_t gfp_flags; 326 327 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 328 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 329 330 new_cache_index = 0; 331 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 332 new_rss_limit = tsb_size_to_rss_limit(new_size); 333 if (new_rss_limit > rss) 334 break; 335 new_cache_index++; 336 } 337 338 if (new_size == max_tsb_size) 339 new_rss_limit = ~0UL; 340 341 retry_tsb_alloc: 342 gfp_flags = GFP_KERNEL; 343 if (new_size > (PAGE_SIZE * 2)) 344 gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; 345 346 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 347 gfp_flags, numa_node_id()); 348 if (unlikely(!new_tsb)) { 349 /* Not being able to fork due to a high-order TSB 350 * allocation failure is very bad behavior. Just back 351 * down to a 0-order allocation and force no TSB 352 * growing for this address space. 353 */ 354 if (mm->context.tsb_block[tsb_index].tsb == NULL && 355 new_cache_index > 0) { 356 new_cache_index = 0; 357 new_size = 8192; 358 new_rss_limit = ~0UL; 359 goto retry_tsb_alloc; 360 } 361 362 /* If we failed on a TSB grow, we are under serious 363 * memory pressure so don't try to grow any more. 364 */ 365 if (mm->context.tsb_block[tsb_index].tsb != NULL) 366 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 367 return; 368 } 369 370 /* Mark all tags as invalid. */ 371 tsb_init(new_tsb, new_size); 372 373 /* Ok, we are about to commit the changes. If we are 374 * growing an existing TSB the locking is very tricky, 375 * so WATCH OUT! 376 * 377 * We have to hold mm->context.lock while committing to the 378 * new TSB, this synchronizes us with processors in 379 * flush_tsb_user() and switch_mm() for this address space. 380 * 381 * But even with that lock held, processors run asynchronously 382 * accessing the old TSB via TLB miss handling. This is OK 383 * because those actions are just propagating state from the 384 * Linux page tables into the TSB, page table mappings are not 385 * being changed. If a real fault occurs, the processor will 386 * synchronize with us when it hits flush_tsb_user(), this is 387 * also true for the case where vmscan is modifying the page 388 * tables. The only thing we need to be careful with is to 389 * skip any locked TSB entries during copy_tsb(). 390 * 391 * When we finish committing to the new TSB, we have to drop 392 * the lock and ask all other cpus running this address space 393 * to run tsb_context_switch() to see the new TSB table. 394 */ 395 spin_lock_irqsave(&mm->context.lock, flags); 396 397 old_tsb = mm->context.tsb_block[tsb_index].tsb; 398 old_cache_index = 399 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 400 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 401 sizeof(struct tsb)); 402 403 404 /* Handle multiple threads trying to grow the TSB at the same time. 405 * One will get in here first, and bump the size and the RSS limit. 406 * The others will get in here next and hit this check. 407 */ 408 if (unlikely(old_tsb && 409 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 410 spin_unlock_irqrestore(&mm->context.lock, flags); 411 412 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 413 return; 414 } 415 416 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 417 418 if (old_tsb) { 419 extern void copy_tsb(unsigned long old_tsb_base, 420 unsigned long old_tsb_size, 421 unsigned long new_tsb_base, 422 unsigned long new_tsb_size); 423 unsigned long old_tsb_base = (unsigned long) old_tsb; 424 unsigned long new_tsb_base = (unsigned long) new_tsb; 425 426 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 427 old_tsb_base = __pa(old_tsb_base); 428 new_tsb_base = __pa(new_tsb_base); 429 } 430 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 431 } 432 433 mm->context.tsb_block[tsb_index].tsb = new_tsb; 434 setup_tsb_params(mm, tsb_index, new_size); 435 436 spin_unlock_irqrestore(&mm->context.lock, flags); 437 438 /* If old_tsb is NULL, we're being invoked for the first time 439 * from init_new_context(). 440 */ 441 if (old_tsb) { 442 /* Reload it on the local cpu. */ 443 tsb_context_switch(mm); 444 445 /* Now force other processors to do the same. */ 446 preempt_disable(); 447 smp_tsb_sync(mm); 448 preempt_enable(); 449 450 /* Now it is safe to free the old tsb. */ 451 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 452 } 453 } 454 455 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 456 { 457 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 458 unsigned long huge_pte_count; 459 #endif 460 unsigned int i; 461 462 spin_lock_init(&mm->context.lock); 463 464 mm->context.sparc64_ctx_val = 0UL; 465 466 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 467 /* We reset it to zero because the fork() page copying 468 * will re-increment the counters as the parent PTEs are 469 * copied into the child address space. 470 */ 471 huge_pte_count = mm->context.huge_pte_count; 472 mm->context.huge_pte_count = 0; 473 #endif 474 475 /* copy_mm() copies over the parent's mm_struct before calling 476 * us, so we need to zero out the TSB pointer or else tsb_grow() 477 * will be confused and think there is an older TSB to free up. 478 */ 479 for (i = 0; i < MM_NUM_TSBS; i++) 480 mm->context.tsb_block[i].tsb = NULL; 481 482 /* If this is fork, inherit the parent's TSB size. We would 483 * grow it to that size on the first page fault anyways. 484 */ 485 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 486 487 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 488 if (unlikely(huge_pte_count)) 489 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); 490 #endif 491 492 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 493 return -ENOMEM; 494 495 return 0; 496 } 497 498 static void tsb_destroy_one(struct tsb_config *tp) 499 { 500 unsigned long cache_index; 501 502 if (!tp->tsb) 503 return; 504 cache_index = tp->tsb_reg_val & 0x7UL; 505 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 506 tp->tsb = NULL; 507 tp->tsb_reg_val = 0UL; 508 } 509 510 void destroy_context(struct mm_struct *mm) 511 { 512 unsigned long flags, i; 513 514 for (i = 0; i < MM_NUM_TSBS; i++) 515 tsb_destroy_one(&mm->context.tsb_block[i]); 516 517 spin_lock_irqsave(&ctx_alloc_lock, flags); 518 519 if (CTX_VALID(mm->context)) { 520 unsigned long nr = CTX_NRBITS(mm->context); 521 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 522 } 523 524 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 525 } 526