1 /* arch/sparc64/mm/tsb.c 2 * 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/preempt.h> 8 #include <linux/slab.h> 9 #include <asm/system.h> 10 #include <asm/page.h> 11 #include <asm/tlbflush.h> 12 #include <asm/tlb.h> 13 #include <asm/mmu_context.h> 14 #include <asm/pgtable.h> 15 #include <asm/tsb.h> 16 #include <asm/oplib.h> 17 18 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 19 20 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) 21 { 22 vaddr >>= hash_shift; 23 return vaddr & (nentries - 1); 24 } 25 26 static inline int tag_compare(unsigned long tag, unsigned long vaddr) 27 { 28 return (tag == (vaddr >> 22)); 29 } 30 31 /* TSB flushes need only occur on the processor initiating the address 32 * space modification, not on each cpu the address space has run on. 33 * Only the TLB flush needs that treatment. 34 */ 35 36 void flush_tsb_kernel_range(unsigned long start, unsigned long end) 37 { 38 unsigned long v; 39 40 for (v = start; v < end; v += PAGE_SIZE) { 41 unsigned long hash = tsb_hash(v, PAGE_SHIFT, 42 KERNEL_TSB_NENTRIES); 43 struct tsb *ent = &swapper_tsb[hash]; 44 45 if (tag_compare(ent->tag, v)) 46 ent->tag = (1UL << TSB_TAG_INVALID_BIT); 47 } 48 } 49 50 static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries) 51 { 52 unsigned long i; 53 54 for (i = 0; i < mp->tlb_nr; i++) { 55 unsigned long v = mp->vaddrs[i]; 56 unsigned long tag, ent, hash; 57 58 v &= ~0x1UL; 59 60 hash = tsb_hash(v, hash_shift, nentries); 61 ent = tsb + (hash * sizeof(struct tsb)); 62 tag = (v >> 22UL); 63 64 tsb_flush(ent, tag); 65 } 66 } 67 68 void flush_tsb_user(struct mmu_gather *mp) 69 { 70 struct mm_struct *mm = mp->mm; 71 unsigned long nentries, base, flags; 72 73 spin_lock_irqsave(&mm->context.lock, flags); 74 75 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; 76 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; 77 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 78 base = __pa(base); 79 __flush_tsb_one(mp, PAGE_SHIFT, base, nentries); 80 81 #ifdef CONFIG_HUGETLB_PAGE 82 if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { 83 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; 84 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; 85 if (tlb_type == cheetah_plus || tlb_type == hypervisor) 86 base = __pa(base); 87 __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries); 88 } 89 #endif 90 spin_unlock_irqrestore(&mm->context.lock, flags); 91 } 92 93 #if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) 94 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K 95 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K 96 #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) 97 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K 98 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K 99 #else 100 #error Broken base page size setting... 101 #endif 102 103 #ifdef CONFIG_HUGETLB_PAGE 104 #if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) 105 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K 106 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K 107 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) 108 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K 109 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K 110 #elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) 111 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB 112 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB 113 #else 114 #error Broken huge page size setting... 115 #endif 116 #endif 117 118 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) 119 { 120 unsigned long tsb_reg, base, tsb_paddr; 121 unsigned long page_sz, tte; 122 123 mm->context.tsb_block[tsb_idx].tsb_nentries = 124 tsb_bytes / sizeof(struct tsb); 125 126 base = TSBMAP_BASE; 127 tte = pgprot_val(PAGE_KERNEL_LOCKED); 128 tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); 129 BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); 130 131 /* Use the smallest page size that can map the whole TSB 132 * in one TLB entry. 133 */ 134 switch (tsb_bytes) { 135 case 8192 << 0: 136 tsb_reg = 0x0UL; 137 #ifdef DCACHE_ALIASING_POSSIBLE 138 base += (tsb_paddr & 8192); 139 #endif 140 page_sz = 8192; 141 break; 142 143 case 8192 << 1: 144 tsb_reg = 0x1UL; 145 page_sz = 64 * 1024; 146 break; 147 148 case 8192 << 2: 149 tsb_reg = 0x2UL; 150 page_sz = 64 * 1024; 151 break; 152 153 case 8192 << 3: 154 tsb_reg = 0x3UL; 155 page_sz = 64 * 1024; 156 break; 157 158 case 8192 << 4: 159 tsb_reg = 0x4UL; 160 page_sz = 512 * 1024; 161 break; 162 163 case 8192 << 5: 164 tsb_reg = 0x5UL; 165 page_sz = 512 * 1024; 166 break; 167 168 case 8192 << 6: 169 tsb_reg = 0x6UL; 170 page_sz = 512 * 1024; 171 break; 172 173 case 8192 << 7: 174 tsb_reg = 0x7UL; 175 page_sz = 4 * 1024 * 1024; 176 break; 177 178 default: 179 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", 180 current->comm, current->pid, tsb_bytes); 181 do_exit(SIGSEGV); 182 }; 183 tte |= pte_sz_bits(page_sz); 184 185 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 186 /* Physical mapping, no locked TLB entry for TSB. */ 187 tsb_reg |= tsb_paddr; 188 189 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 190 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; 191 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; 192 } else { 193 tsb_reg |= base; 194 tsb_reg |= (tsb_paddr & (page_sz - 1UL)); 195 tte |= (tsb_paddr & ~(page_sz - 1UL)); 196 197 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; 198 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; 199 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; 200 } 201 202 /* Setup the Hypervisor TSB descriptor. */ 203 if (tlb_type == hypervisor) { 204 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; 205 206 switch (tsb_idx) { 207 case MM_TSB_BASE: 208 hp->pgsz_idx = HV_PGSZ_IDX_BASE; 209 break; 210 #ifdef CONFIG_HUGETLB_PAGE 211 case MM_TSB_HUGE: 212 hp->pgsz_idx = HV_PGSZ_IDX_HUGE; 213 break; 214 #endif 215 default: 216 BUG(); 217 }; 218 hp->assoc = 1; 219 hp->num_ttes = tsb_bytes / 16; 220 hp->ctx_idx = 0; 221 switch (tsb_idx) { 222 case MM_TSB_BASE: 223 hp->pgsz_mask = HV_PGSZ_MASK_BASE; 224 break; 225 #ifdef CONFIG_HUGETLB_PAGE 226 case MM_TSB_HUGE: 227 hp->pgsz_mask = HV_PGSZ_MASK_HUGE; 228 break; 229 #endif 230 default: 231 BUG(); 232 }; 233 hp->tsb_base = tsb_paddr; 234 hp->resv = 0; 235 } 236 } 237 238 static struct kmem_cache *tsb_caches[8] __read_mostly; 239 240 static const char *tsb_cache_names[8] = { 241 "tsb_8KB", 242 "tsb_16KB", 243 "tsb_32KB", 244 "tsb_64KB", 245 "tsb_128KB", 246 "tsb_256KB", 247 "tsb_512KB", 248 "tsb_1MB", 249 }; 250 251 void __init pgtable_cache_init(void) 252 { 253 unsigned long i; 254 255 for (i = 0; i < 8; i++) { 256 unsigned long size = 8192 << i; 257 const char *name = tsb_cache_names[i]; 258 259 tsb_caches[i] = kmem_cache_create(name, 260 size, size, 261 0, NULL); 262 if (!tsb_caches[i]) { 263 prom_printf("Could not create %s cache\n", name); 264 prom_halt(); 265 } 266 } 267 } 268 269 int sysctl_tsb_ratio = -2; 270 271 static unsigned long tsb_size_to_rss_limit(unsigned long new_size) 272 { 273 unsigned long num_ents = (new_size / sizeof(struct tsb)); 274 275 if (sysctl_tsb_ratio < 0) 276 return num_ents - (num_ents >> -sysctl_tsb_ratio); 277 else 278 return num_ents + (num_ents >> sysctl_tsb_ratio); 279 } 280 281 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB, 282 * do_sparc64_fault() invokes this routine to try and grow it. 283 * 284 * When we reach the maximum TSB size supported, we stick ~0UL into 285 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() 286 * will not trigger any longer. 287 * 288 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers 289 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB 290 * must be 512K aligned. It also must be physically contiguous, so we 291 * cannot use vmalloc(). 292 * 293 * The idea here is to grow the TSB when the RSS of the process approaches 294 * the number of entries that the current TSB can hold at once. Currently, 295 * we trigger when the RSS hits 3/4 of the TSB capacity. 296 */ 297 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) 298 { 299 unsigned long max_tsb_size = 1 * 1024 * 1024; 300 unsigned long new_size, old_size, flags; 301 struct tsb *old_tsb, *new_tsb; 302 unsigned long new_cache_index, old_cache_index; 303 unsigned long new_rss_limit; 304 gfp_t gfp_flags; 305 306 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) 307 max_tsb_size = (PAGE_SIZE << MAX_ORDER); 308 309 new_cache_index = 0; 310 for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { 311 new_rss_limit = tsb_size_to_rss_limit(new_size); 312 if (new_rss_limit > rss) 313 break; 314 new_cache_index++; 315 } 316 317 if (new_size == max_tsb_size) 318 new_rss_limit = ~0UL; 319 320 retry_tsb_alloc: 321 gfp_flags = GFP_KERNEL; 322 if (new_size > (PAGE_SIZE * 2)) 323 gfp_flags = __GFP_NOWARN | __GFP_NORETRY; 324 325 new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], 326 gfp_flags, numa_node_id()); 327 if (unlikely(!new_tsb)) { 328 /* Not being able to fork due to a high-order TSB 329 * allocation failure is very bad behavior. Just back 330 * down to a 0-order allocation and force no TSB 331 * growing for this address space. 332 */ 333 if (mm->context.tsb_block[tsb_index].tsb == NULL && 334 new_cache_index > 0) { 335 new_cache_index = 0; 336 new_size = 8192; 337 new_rss_limit = ~0UL; 338 goto retry_tsb_alloc; 339 } 340 341 /* If we failed on a TSB grow, we are under serious 342 * memory pressure so don't try to grow any more. 343 */ 344 if (mm->context.tsb_block[tsb_index].tsb != NULL) 345 mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; 346 return; 347 } 348 349 /* Mark all tags as invalid. */ 350 tsb_init(new_tsb, new_size); 351 352 /* Ok, we are about to commit the changes. If we are 353 * growing an existing TSB the locking is very tricky, 354 * so WATCH OUT! 355 * 356 * We have to hold mm->context.lock while committing to the 357 * new TSB, this synchronizes us with processors in 358 * flush_tsb_user() and switch_mm() for this address space. 359 * 360 * But even with that lock held, processors run asynchronously 361 * accessing the old TSB via TLB miss handling. This is OK 362 * because those actions are just propagating state from the 363 * Linux page tables into the TSB, page table mappings are not 364 * being changed. If a real fault occurs, the processor will 365 * synchronize with us when it hits flush_tsb_user(), this is 366 * also true for the case where vmscan is modifying the page 367 * tables. The only thing we need to be careful with is to 368 * skip any locked TSB entries during copy_tsb(). 369 * 370 * When we finish committing to the new TSB, we have to drop 371 * the lock and ask all other cpus running this address space 372 * to run tsb_context_switch() to see the new TSB table. 373 */ 374 spin_lock_irqsave(&mm->context.lock, flags); 375 376 old_tsb = mm->context.tsb_block[tsb_index].tsb; 377 old_cache_index = 378 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); 379 old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * 380 sizeof(struct tsb)); 381 382 383 /* Handle multiple threads trying to grow the TSB at the same time. 384 * One will get in here first, and bump the size and the RSS limit. 385 * The others will get in here next and hit this check. 386 */ 387 if (unlikely(old_tsb && 388 (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { 389 spin_unlock_irqrestore(&mm->context.lock, flags); 390 391 kmem_cache_free(tsb_caches[new_cache_index], new_tsb); 392 return; 393 } 394 395 mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; 396 397 if (old_tsb) { 398 extern void copy_tsb(unsigned long old_tsb_base, 399 unsigned long old_tsb_size, 400 unsigned long new_tsb_base, 401 unsigned long new_tsb_size); 402 unsigned long old_tsb_base = (unsigned long) old_tsb; 403 unsigned long new_tsb_base = (unsigned long) new_tsb; 404 405 if (tlb_type == cheetah_plus || tlb_type == hypervisor) { 406 old_tsb_base = __pa(old_tsb_base); 407 new_tsb_base = __pa(new_tsb_base); 408 } 409 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); 410 } 411 412 mm->context.tsb_block[tsb_index].tsb = new_tsb; 413 setup_tsb_params(mm, tsb_index, new_size); 414 415 spin_unlock_irqrestore(&mm->context.lock, flags); 416 417 /* If old_tsb is NULL, we're being invoked for the first time 418 * from init_new_context(). 419 */ 420 if (old_tsb) { 421 /* Reload it on the local cpu. */ 422 tsb_context_switch(mm); 423 424 /* Now force other processors to do the same. */ 425 preempt_disable(); 426 smp_tsb_sync(mm); 427 preempt_enable(); 428 429 /* Now it is safe to free the old tsb. */ 430 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 431 } 432 } 433 434 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 435 { 436 #ifdef CONFIG_HUGETLB_PAGE 437 unsigned long huge_pte_count; 438 #endif 439 unsigned int i; 440 441 spin_lock_init(&mm->context.lock); 442 443 mm->context.sparc64_ctx_val = 0UL; 444 445 #ifdef CONFIG_HUGETLB_PAGE 446 /* We reset it to zero because the fork() page copying 447 * will re-increment the counters as the parent PTEs are 448 * copied into the child address space. 449 */ 450 huge_pte_count = mm->context.huge_pte_count; 451 mm->context.huge_pte_count = 0; 452 #endif 453 454 /* copy_mm() copies over the parent's mm_struct before calling 455 * us, so we need to zero out the TSB pointer or else tsb_grow() 456 * will be confused and think there is an older TSB to free up. 457 */ 458 for (i = 0; i < MM_NUM_TSBS; i++) 459 mm->context.tsb_block[i].tsb = NULL; 460 461 /* If this is fork, inherit the parent's TSB size. We would 462 * grow it to that size on the first page fault anyways. 463 */ 464 tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 465 466 #ifdef CONFIG_HUGETLB_PAGE 467 if (unlikely(huge_pte_count)) 468 tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); 469 #endif 470 471 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) 472 return -ENOMEM; 473 474 return 0; 475 } 476 477 static void tsb_destroy_one(struct tsb_config *tp) 478 { 479 unsigned long cache_index; 480 481 if (!tp->tsb) 482 return; 483 cache_index = tp->tsb_reg_val & 0x7UL; 484 kmem_cache_free(tsb_caches[cache_index], tp->tsb); 485 tp->tsb = NULL; 486 tp->tsb_reg_val = 0UL; 487 } 488 489 void destroy_context(struct mm_struct *mm) 490 { 491 unsigned long flags, i; 492 493 for (i = 0; i < MM_NUM_TSBS; i++) 494 tsb_destroy_one(&mm->context.tsb_block[i]); 495 496 spin_lock_irqsave(&ctx_alloc_lock, flags); 497 498 if (CTX_VALID(mm->context)) { 499 unsigned long nr = CTX_NRBITS(mm->context); 500 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); 501 } 502 503 spin_unlock_irqrestore(&ctx_alloc_lock, flags); 504 } 505