1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <asm/page.h> 10 #include <asm/tlbflush.h> 11 #include <asm/tlb.h> 12 #include <asm/mmu_context.h> 13 #include <asm/pgtable.h> 14 #include <asm/tsb.h> 15 #include <asm/oplib.h> 16 17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 18 19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 20 { 21 vaddr >>= hash_shift; 22 return vaddr & (nentries - 1); 23 } 24 25 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 26 { 27 return (tag == (vaddr >> 22)); 28 } 29 30 /* TSB flushes need only occur on the processor initiating the address 31 * space modification, not on each cpu the address space has run on. 32 * Only the TLB flush needs that treatment. 33 */ 34 35 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 36 { 37 unsigned long v; 38 39 for (v = start; v < end; v += PAGE_SIZE) { 40 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 41 KERNEL_TSB_NENTRIES); 42 struct tsb *ent = &swapper_tsb[hash]; 43 44 if (tag_compare(ent->tag, v)) 45 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 46 } 47 } 48 49 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, 50 unsigned long tsb, unsigned long nentries) 51 { 52 unsigned long i; 53 54 for (i = 0; i < tb->tlb_nr; i++) { 55 unsigned long v = tb->vaddrs[i]; 56 unsigned long tag, ent, hash; 57 58 v &= ~0x1UL; 59 60 hash = tsb_hash(v, hash_shift, nentries); 61 ent = tsb + (hash * sizeof(struct tsb)); 62 tag = (v >> 22UL); 63 64 tsb_flush(ent, tag); 65 } 66 } 67 68 void flush_tsb_user(struct tlb_batch *tb) 69 { 70 struct mm_struct *mm = tb->mm; 71 unsigned long nentries, base, flags; 72 73 spin_lock_irqsave(&mm->context.lock, flags); 74 75 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 76 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 77 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 78 base = __pa(base); 79 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); 80 81 #ifdef CONFIG_HUGETLB_PAGE 82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 85 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 86 base = __pa(base); 87 __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries); 88 } 89 #endif 90 spin_unlock_irqrestore(&mm->context.lock, flags); 91 } 92 93 #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) 94 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 95 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 96 #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) 97 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K 98 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K 99 #else 100 #error Broken base page size setting... 101 #endif 102 103 #ifdef CONFIG_HUGETLB_PAGE 104 #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) 105 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K 106 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K 107 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) 108 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K 109 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K 110 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) 111 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 112 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 113 #else 114 #error Broken huge page size setting... 115 #endif 116 #endif 117 118 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 119 { 120 unsigned long tsb_reg, base, tsb_paddr; 121 unsigned long page_sz, tte; 122 123 mm->context.tsb_block[tsb_idx].tsb_nentries = 124 tsb_bytes / sizeof(struct tsb); 125 126 base = TSBMAP_BASE; 127 tte = pgprot_val(PAGE_KERNEL_LOCKED); 128 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 129 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 130 131 /* Use the smallest page size that can map the whole TSB 132 * in one TLB entry. 133 */ 134 switch (tsb_bytes) { 135 case 8192 << 0: 136 tsb_reg = 0x0UL; 137 #ifdef DCACHE_ALIASING_POSSIBLE 138 base += (tsb_paddr & 8192); 139 #endif 140 page_sz = 8192; 141 break; 142 143 case 8192 << 1: 144 tsb_reg = 0x1UL; 145 page_sz = 64 * 1024; 146 break; 147 148 case 8192 << 2: 149 tsb_reg = 0x2UL; 150 page_sz = 64 * 1024; 151 break; 152 153 case 8192 << 3: 154 tsb_reg = 0x3UL; 155 page_sz = 64 * 1024; 156 break; 157 158 case 8192 << 4: 159 tsb_reg = 0x4UL; 160 page_sz = 512 * 1024; 161 break; 162 163 case 8192 << 5: 164 tsb_reg = 0x5UL; 165 page_sz = 512 * 1024; 166 break; 167 168 case 8192 << 6: 169 tsb_reg = 0x6UL; 170 page_sz = 512 * 1024; 171 break; 172 173 case 8192 << 7: 174 tsb_reg = 0x7UL; 175 page_sz = 4 * 1024 * 1024; 176 break; 177 178 default: 179 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 180 current->comm, current->pid, tsb_bytes); 181 do_exit(SIGSEGV); 182 } 183 tte |= pte_sz_bits(page_sz); 184 185 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 186 /* Physical mapping, no locked TLB entry for TSB. */ 187 tsb_reg |= tsb_paddr; 188 189 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 190 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 191 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 192 } else { 193 tsb_reg |= base; 194 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 195 tte |= (tsb_paddr & ~(page_sz - 1UL)); 196 197 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 198 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 199 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 200 } 201 202 /* Setup the Hypervisor TSB descriptor. */ 203 if (tlb_type == hypervisor) { 204 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 205 206 switch (tsb_idx) { 207 case MM_TSB_BASE: 208 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 209 break; 210 #ifdef CONFIG_HUGETLB_PAGE 211 case MM_TSB_HUGE: 212 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 213 break; 214 #endif 215 default: 216 BUG(); 217 } 218 hp->assoc = 1; 219 hp->num_ttes = tsb_bytes / 16; 220 hp->ctx_idx = 0; 221 switch (tsb_idx) { 222 case MM_TSB_BASE: 223 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 224 break; 225 #ifdef CONFIG_HUGETLB_PAGE 226 case MM_TSB_HUGE: 227 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 228 break; 229 #endif 230 default: 231 BUG(); 232 } 233 hp->tsb_base = tsb_paddr; 234 hp->resv = 0; 235 } 236 } 237 238 struct kmem_cache *pgtable_cache __read_mostly; 239 240 static struct kmem_cache *tsb_caches[8] __read_mostly; 241 242 static const char *tsb_cache_names[8] = { 243 "tsb_8KB", 244 "tsb_16KB", 245 "tsb_32KB", 246 "tsb_64KB", 247 "tsb_128KB", 248 "tsb_256KB", 249 "tsb_512KB", 250 "tsb_1MB", 251 }; 252 253 void __init pgtable_cache_init(void) 254 { 255 unsigned long i; 256 257 pgtable_cache = kmem_cache_create("pgtable_cache", 258 PAGE_SIZE, PAGE_SIZE, 259 0, 260 _clear_page); 261 if (!pgtable_cache) { 262 prom_printf("pgtable_cache_init(): Could not create!\n"); 263 prom_halt(); 264 } 265 266 for (i = 0; i < 8; i++) { 267 unsigned long size = 8192 << i; 268 const char *name = tsb_cache_names[i]; 269 270 tsb_caches[i] = kmem_cache_create(name, 271 size, size, 272 0, NULL); 273 if (!tsb_caches[i]) { 274 prom_printf("Could not create %s cache\n", name); 275 prom_halt(); 276 } 277 } 278 } 279 280 int sysctl_tsb_ratio = -2; 281 282 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 283 { 284 unsigned long num_ents = (new_size / sizeof(struct tsb)); 285 286 if (sysctl_tsb_ratio < 0) 287 return num_ents - (num_ents >> -sysctl_tsb_ratio); 288 else 289 return num_ents + (num_ents >> sysctl_tsb_ratio); 290 } 291 292 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 293 * do_sparc64_fault() invokes this routine to try and grow it. 294 * 295 * When we reach the maximum TSB size supported, we stick ~0UL into 296 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 297 * will not trigger any longer. 298 * 299 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 300 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 301 * must be 512K aligned. It also must be physically contiguous, so we 302 * cannot use vmalloc(). 303 * 304 * The idea here is to grow the TSB when the RSS of the process approaches 305 * the number of entries that the current TSB can hold at once. Currently, 306 * we trigger when the RSS hits 3/4 of the TSB capacity. 307 */ 308 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 309 { 310 unsigned long max_tsb_size = 1 * 1024 * 1024; 311 unsigned long new_size, old_size, flags; 312 struct tsb *old_tsb, *new_tsb; 313 unsigned long new_cache_index, old_cache_index; 314 unsigned long new_rss_limit; 315 gfp_t gfp_flags; 316 317 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 318 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 319 320 new_cache_index = 0; 321 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 322 new_rss_limit = tsb_size_to_rss_limit(new_size); 323 if (new_rss_limit > rss) 324 break; 325 new_cache_index++; 326 } 327 328 if (new_size == max_tsb_size) 329 new_rss_limit = ~0UL; 330 331 retry_tsb_alloc: 332 gfp_flags = GFP_KERNEL; 333 if (new_size > (PAGE_SIZE * 2)) 334 gfp_flags = __GFP_NOWARN | __GFP_NORETRY; 335 336 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 337 gfp_flags, numa_node_id()); 338 if (unlikely(!new_tsb)) { 339 /* Not being able to fork due to a high-order TSB 340 * allocation failure is very bad behavior. Just back 341 * down to a 0-order allocation and force no TSB 342 * growing for this address space. 343 */ 344 if (mm->context.tsb_block[tsb_index].tsb == NULL && 345 new_cache_index > 0) { 346 new_cache_index = 0; 347 new_size = 8192; 348 new_rss_limit = ~0UL; 349 goto retry_tsb_alloc; 350 } 351 352 /* If we failed on a TSB grow, we are under serious 353 * memory pressure so don't try to grow any more. 354 */ 355 if (mm->context.tsb_block[tsb_index].tsb != NULL) 356 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 357 return; 358 } 359 360 /* Mark all tags as invalid. */ 361 tsb_init(new_tsb, new_size); 362 363 /* Ok, we are about to commit the changes. If we are 364 * growing an existing TSB the locking is very tricky, 365 * so WATCH OUT! 366 * 367 * We have to hold mm->context.lock while committing to the 368 * new TSB, this synchronizes us with processors in 369 * flush_tsb_user() and switch_mm() for this address space. 370 * 371 * But even with that lock held, processors run asynchronously 372 * accessing the old TSB via TLB miss handling. This is OK 373 * because those actions are just propagating state from the 374 * Linux page tables into the TSB, page table mappings are not 375 * being changed. If a real fault occurs, the processor will 376 * synchronize with us when it hits flush_tsb_user(), this is 377 * also true for the case where vmscan is modifying the page 378 * tables. The only thing we need to be careful with is to 379 * skip any locked TSB entries during copy_tsb(). 380 * 381 * When we finish committing to the new TSB, we have to drop 382 * the lock and ask all other cpus running this address space 383 * to run tsb_context_switch() to see the new TSB table. 384 */ 385 spin_lock_irqsave(&mm->context.lock, flags); 386 387 old_tsb = mm->context.tsb_block[tsb_index].tsb; 388 old_cache_index = 389 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 390 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 391 sizeof(struct tsb)); 392 393 394 /* Handle multiple threads trying to grow the TSB at the same time. 395 * One will get in here first, and bump the size and the RSS limit. 396 * The others will get in here next and hit this check. 397 */ 398 if (unlikely(old_tsb && 399 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 400 spin_unlock_irqrestore(&mm->context.lock, flags); 401 402 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 403 return; 404 } 405 406 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 407 408 if (old_tsb) { 409 extern void copy_tsb(unsigned long old_tsb_base, 410 unsigned long old_tsb_size, 411 unsigned long new_tsb_base, 412 unsigned long new_tsb_size); 413 unsigned long old_tsb_base = (unsigned long) old_tsb; 414 unsigned long new_tsb_base = (unsigned long) new_tsb; 415 416 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 417 old_tsb_base = __pa(old_tsb_base); 418 new_tsb_base = __pa(new_tsb_base); 419 } 420 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 421 } 422 423 mm->context.tsb_block[tsb_index].tsb = new_tsb; 424 setup_tsb_params(mm, tsb_index, new_size); 425 426 spin_unlock_irqrestore(&mm->context.lock, flags); 427 428 /* If old_tsb is NULL, we're being invoked for the first time 429 * from init_new_context(). 430 */ 431 if (old_tsb) { 432 /* Reload it on the local cpu. */ 433 tsb_context_switch(mm); 434 435 /* Now force other processors to do the same. */ 436 preempt_disable(); 437 smp_tsb_sync(mm); 438 preempt_enable(); 439 440 /* Now it is safe to free the old tsb. */ 441 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 442 } 443 } 444 445 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 446 { 447 #ifdef CONFIG_HUGETLB_PAGE 448 unsigned long huge_pte_count; 449 #endif 450 unsigned int i; 451 452 spin_lock_init(&mm->context.lock); 453 454 mm->context.sparc64_ctx_val = 0UL; 455 456 #ifdef CONFIG_HUGETLB_PAGE 457 /* We reset it to zero because the fork() page copying 458 * will re-increment the counters as the parent PTEs are 459 * copied into the child address space. 460 */ 461 huge_pte_count = mm->context.huge_pte_count; 462 mm->context.huge_pte_count = 0; 463 #endif 464 465 /* copy_mm() copies over the parent's mm_struct before calling 466 * us, so we need to zero out the TSB pointer or else tsb_grow() 467 * will be confused and think there is an older TSB to free up. 468 */ 469 for (i = 0; i < MM_NUM_TSBS; i++) 470 mm->context.tsb_block[i].tsb = NULL; 471 472 /* If this is fork, inherit the parent's TSB size. We would 473 * grow it to that size on the first page fault anyways. 474 */ 475 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 476 477 #ifdef CONFIG_HUGETLB_PAGE 478 if (unlikely(huge_pte_count)) 479 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); 480 #endif 481 482 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 483 return -ENOMEM; 484 485 return 0; 486 } 487 488 static void tsb_destroy_one(struct tsb_config *tp) 489 { 490 unsigned long cache_index; 491 492 if (!tp->tsb) 493 return; 494 cache_index = tp->tsb_reg_val & 0x7UL; 495 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 496 tp->tsb = NULL; 497 tp->tsb_reg_val = 0UL; 498 } 499 500 void destroy_context(struct mm_struct *mm) 501 { 502 unsigned long flags, i; 503 504 for (i = 0; i < MM_NUM_TSBS; i++) 505 tsb_destroy_one(&mm->context.tsb_block[i]); 506 507 spin_lock_irqsave(&ctx_alloc_lock, flags); 508 509 if (CTX_VALID(mm->context)) { 510 unsigned long nr = CTX_NRBITS(mm->context); 511 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 512 } 513 514 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 515 } 516