1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Synthesize TLB refill handlers at runtime. 7 * 8 * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer 9 * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki 10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) 11 * Copyright (C) 2008, 2009 Cavium Networks, Inc. 12 * Copyright (C) 2011 MIPS Technologies, Inc. 13 * 14 * ... and the days got worse and worse and now you see 15 * I've gone completly out of my mind. 16 * 17 * They're coming to take me a away haha 18 * they're coming to take me a away hoho hihi haha 19 * to the funny farm where code is beautiful all the time ... 20 * 21 * (Condolences to Napoleon XIV) 22 */ 23 24 #include <linux/bug.h> 25 #include <linux/kernel.h> 26 #include <linux/types.h> 27 #include <linux/smp.h> 28 #include <linux/string.h> 29 #include <linux/init.h> 30 #include <linux/cache.h> 31 32 #include <asm/cacheflush.h> 33 #include <asm/pgtable.h> 34 #include <asm/war.h> 35 #include <asm/uasm.h> 36 #include <asm/setup.h> 37 38 /* 39 * TLB load/store/modify handlers. 40 * 41 * Only the fastpath gets synthesized at runtime, the slowpath for 42 * do_page_fault remains normal asm. 43 */ 44 extern void tlb_do_page_fault_0(void); 45 extern void tlb_do_page_fault_1(void); 46 47 struct work_registers { 48 int r1; 49 int r2; 50 int r3; 51 }; 52 53 struct tlb_reg_save { 54 unsigned long a; 55 unsigned long b; 56 } ____cacheline_aligned_in_smp; 57 58 static struct tlb_reg_save handler_reg_save[NR_CPUS]; 59 60 static inline int r45k_bvahwbug(void) 61 { 62 /* XXX: We should probe for the presence of this bug, but we don't. */ 63 return 0; 64 } 65 66 static inline int r4k_250MHZhwbug(void) 67 { 68 /* XXX: We should probe for the presence of this bug, but we don't. */ 69 return 0; 70 } 71 72 static inline int __maybe_unused bcm1250_m3_war(void) 73 { 74 return BCM1250_M3_WAR; 75 } 76 77 static inline int __maybe_unused r10000_llsc_war(void) 78 { 79 return R10000_LLSC_WAR; 80 } 81 82 static int use_bbit_insns(void) 83 { 84 switch (current_cpu_type()) { 85 case CPU_CAVIUM_OCTEON: 86 case CPU_CAVIUM_OCTEON_PLUS: 87 case CPU_CAVIUM_OCTEON2: 88 return 1; 89 default: 90 return 0; 91 } 92 } 93 94 static int use_lwx_insns(void) 95 { 96 switch (current_cpu_type()) { 97 case CPU_CAVIUM_OCTEON2: 98 return 1; 99 default: 100 return 0; 101 } 102 } 103 #if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ 104 CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 105 static bool scratchpad_available(void) 106 { 107 return true; 108 } 109 static int scratchpad_offset(int i) 110 { 111 /* 112 * CVMSEG starts at address -32768 and extends for 113 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. 114 */ 115 i += 1; /* Kernel use starts at the top and works down. */ 116 return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; 117 } 118 #else 119 static bool scratchpad_available(void) 120 { 121 return false; 122 } 123 static int scratchpad_offset(int i) 124 { 125 BUG(); 126 /* Really unreachable, but evidently some GCC want this. */ 127 return 0; 128 } 129 #endif 130 /* 131 * Found by experiment: At least some revisions of the 4kc throw under 132 * some circumstances a machine check exception, triggered by invalid 133 * values in the index register. Delaying the tlbp instruction until 134 * after the next branch, plus adding an additional nop in front of 135 * tlbwi/tlbwr avoids the invalid index register values. Nobody knows 136 * why; it's not an issue caused by the core RTL. 137 * 138 */ 139 static int __cpuinit m4kc_tlbp_war(void) 140 { 141 return (current_cpu_data.processor_id & 0xffff00) == 142 (PRID_COMP_MIPS | PRID_IMP_4KC); 143 } 144 145 /* Handle labels (which must be positive integers). */ 146 enum label_id { 147 label_second_part = 1, 148 label_leave, 149 label_vmalloc, 150 label_vmalloc_done, 151 label_tlbw_hazard_0, 152 label_split = label_tlbw_hazard_0 + 8, 153 label_tlbl_goaround1, 154 label_tlbl_goaround2, 155 label_nopage_tlbl, 156 label_nopage_tlbs, 157 label_nopage_tlbm, 158 label_smp_pgtable_change, 159 label_r3000_write_probe_fail, 160 label_large_segbits_fault, 161 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 162 label_tlb_huge_update, 163 #endif 164 }; 165 166 UASM_L_LA(_second_part) 167 UASM_L_LA(_leave) 168 UASM_L_LA(_vmalloc) 169 UASM_L_LA(_vmalloc_done) 170 /* _tlbw_hazard_x is handled differently. */ 171 UASM_L_LA(_split) 172 UASM_L_LA(_tlbl_goaround1) 173 UASM_L_LA(_tlbl_goaround2) 174 UASM_L_LA(_nopage_tlbl) 175 UASM_L_LA(_nopage_tlbs) 176 UASM_L_LA(_nopage_tlbm) 177 UASM_L_LA(_smp_pgtable_change) 178 UASM_L_LA(_r3000_write_probe_fail) 179 UASM_L_LA(_large_segbits_fault) 180 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 181 UASM_L_LA(_tlb_huge_update) 182 #endif 183 184 static int __cpuinitdata hazard_instance; 185 186 static void __cpuinit uasm_bgezl_hazard(u32 **p, 187 struct uasm_reloc **r, 188 int instance) 189 { 190 switch (instance) { 191 case 0 ... 7: 192 uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance); 193 return; 194 default: 195 BUG(); 196 } 197 } 198 199 static void __cpuinit uasm_bgezl_label(struct uasm_label **l, 200 u32 **p, 201 int instance) 202 { 203 switch (instance) { 204 case 0 ... 7: 205 uasm_build_label(l, *p, label_tlbw_hazard_0 + instance); 206 break; 207 default: 208 BUG(); 209 } 210 } 211 212 /* 213 * pgtable bits are assigned dynamically depending on processor feature 214 * and statically based on kernel configuration. This spits out the actual 215 * values the kernel is using. Required to make sense from disassembled 216 * TLB exception handlers. 217 */ 218 static void output_pgtable_bits_defines(void) 219 { 220 #define pr_define(fmt, ...) \ 221 pr_debug("#define " fmt, ##__VA_ARGS__) 222 223 pr_debug("#include <asm/asm.h>\n"); 224 pr_debug("#include <asm/regdef.h>\n"); 225 pr_debug("\n"); 226 227 pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); 228 pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT); 229 pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); 230 pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT); 231 pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); 232 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 233 pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); 234 pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); 235 #endif 236 if (cpu_has_rixi) { 237 #ifdef _PAGE_NO_EXEC_SHIFT 238 pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); 239 #endif 240 #ifdef _PAGE_NO_READ_SHIFT 241 pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); 242 #endif 243 } 244 pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); 245 pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); 246 pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); 247 pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); 248 pr_debug("\n"); 249 } 250 251 static inline void dump_handler(const char *symbol, const u32 *handler, int count) 252 { 253 int i; 254 255 pr_debug("LEAF(%s)\n", symbol); 256 257 pr_debug("\t.set push\n"); 258 pr_debug("\t.set noreorder\n"); 259 260 for (i = 0; i < count; i++) 261 pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]); 262 263 pr_debug("\t.set\tpop\n"); 264 265 pr_debug("\tEND(%s)\n", symbol); 266 } 267 268 /* The only general purpose registers allowed in TLB handlers. */ 269 #define K0 26 270 #define K1 27 271 272 /* Some CP0 registers */ 273 #define C0_INDEX 0, 0 274 #define C0_ENTRYLO0 2, 0 275 #define C0_TCBIND 2, 2 276 #define C0_ENTRYLO1 3, 0 277 #define C0_CONTEXT 4, 0 278 #define C0_PAGEMASK 5, 0 279 #define C0_BADVADDR 8, 0 280 #define C0_ENTRYHI 10, 0 281 #define C0_EPC 14, 0 282 #define C0_XCONTEXT 20, 0 283 284 #ifdef CONFIG_64BIT 285 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) 286 #else 287 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT) 288 #endif 289 290 /* The worst case length of the handler is around 18 instructions for 291 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. 292 * Maximum space available is 32 instructions for R3000 and 64 293 * instructions for R4000. 294 * 295 * We deliberately chose a buffer size of 128, so we won't scribble 296 * over anything important on overflow before we panic. 297 */ 298 static u32 tlb_handler[128] __cpuinitdata; 299 300 /* simply assume worst case size for labels and relocs */ 301 static struct uasm_label labels[128] __cpuinitdata; 302 static struct uasm_reloc relocs[128] __cpuinitdata; 303 304 static int check_for_high_segbits __cpuinitdata; 305 306 static unsigned int kscratch_used_mask __cpuinitdata; 307 308 static int __cpuinit allocate_kscratch(void) 309 { 310 int r; 311 unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; 312 313 r = ffs(a); 314 315 if (r == 0) 316 return -1; 317 318 r--; /* make it zero based */ 319 320 kscratch_used_mask |= (1 << r); 321 322 return r; 323 } 324 325 static int scratch_reg __cpuinitdata; 326 static int pgd_reg __cpuinitdata; 327 enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; 328 329 static struct work_registers __cpuinit build_get_work_registers(u32 **p) 330 { 331 struct work_registers r; 332 333 int smp_processor_id_reg; 334 int smp_processor_id_sel; 335 int smp_processor_id_shift; 336 337 if (scratch_reg > 0) { 338 /* Save in CPU local C0_KScratch? */ 339 UASM_i_MTC0(p, 1, 31, scratch_reg); 340 r.r1 = K0; 341 r.r2 = K1; 342 r.r3 = 1; 343 return r; 344 } 345 346 if (num_possible_cpus() > 1) { 347 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 348 smp_processor_id_shift = 51; 349 smp_processor_id_reg = 20; /* XContext */ 350 smp_processor_id_sel = 0; 351 #else 352 # ifdef CONFIG_32BIT 353 smp_processor_id_shift = 25; 354 smp_processor_id_reg = 4; /* Context */ 355 smp_processor_id_sel = 0; 356 # endif 357 # ifdef CONFIG_64BIT 358 smp_processor_id_shift = 26; 359 smp_processor_id_reg = 4; /* Context */ 360 smp_processor_id_sel = 0; 361 # endif 362 #endif 363 /* Get smp_processor_id */ 364 UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); 365 UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); 366 367 /* handler_reg_save index in K0 */ 368 UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); 369 370 UASM_i_LA(p, K1, (long)&handler_reg_save); 371 UASM_i_ADDU(p, K0, K0, K1); 372 } else { 373 UASM_i_LA(p, K0, (long)&handler_reg_save); 374 } 375 /* K0 now points to save area, save $1 and $2 */ 376 UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); 377 UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); 378 379 r.r1 = K1; 380 r.r2 = 1; 381 r.r3 = 2; 382 return r; 383 } 384 385 static void __cpuinit build_restore_work_registers(u32 **p) 386 { 387 if (scratch_reg > 0) { 388 UASM_i_MFC0(p, 1, 31, scratch_reg); 389 return; 390 } 391 /* K0 already points to save area, restore $1 and $2 */ 392 UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); 393 UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); 394 } 395 396 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 397 398 /* 399 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, 400 * we cannot do r3000 under these circumstances. 401 * 402 * Declare pgd_current here instead of including mmu_context.h to avoid type 403 * conflicts for tlbmiss_handler_setup_pgd 404 */ 405 extern unsigned long pgd_current[]; 406 407 /* 408 * The R3000 TLB handler is simple. 409 */ 410 static void __cpuinit build_r3000_tlb_refill_handler(void) 411 { 412 long pgdc = (long)pgd_current; 413 u32 *p; 414 415 memset(tlb_handler, 0, sizeof(tlb_handler)); 416 p = tlb_handler; 417 418 uasm_i_mfc0(&p, K0, C0_BADVADDR); 419 uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ 420 uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); 421 uasm_i_srl(&p, K0, K0, 22); /* load delay */ 422 uasm_i_sll(&p, K0, K0, 2); 423 uasm_i_addu(&p, K1, K1, K0); 424 uasm_i_mfc0(&p, K0, C0_CONTEXT); 425 uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ 426 uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ 427 uasm_i_addu(&p, K1, K1, K0); 428 uasm_i_lw(&p, K0, 0, K1); 429 uasm_i_nop(&p); /* load delay */ 430 uasm_i_mtc0(&p, K0, C0_ENTRYLO0); 431 uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ 432 uasm_i_tlbwr(&p); /* cp0 delay */ 433 uasm_i_jr(&p, K1); 434 uasm_i_rfe(&p); /* branch delay */ 435 436 if (p > tlb_handler + 32) 437 panic("TLB refill handler space exceeded"); 438 439 pr_debug("Wrote TLB refill handler (%u instructions).\n", 440 (unsigned int)(p - tlb_handler)); 441 442 memcpy((void *)ebase, tlb_handler, 0x80); 443 444 dump_handler("r3000_tlb_refill", (u32 *)ebase, 32); 445 } 446 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 447 448 /* 449 * The R4000 TLB handler is much more complicated. We have two 450 * consecutive handler areas with 32 instructions space each. 451 * Since they aren't used at the same time, we can overflow in the 452 * other one.To keep things simple, we first assume linear space, 453 * then we relocate it to the final handler layout as needed. 454 */ 455 static u32 final_handler[64] __cpuinitdata; 456 457 /* 458 * Hazards 459 * 460 * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: 461 * 2. A timing hazard exists for the TLBP instruction. 462 * 463 * stalling_instruction 464 * TLBP 465 * 466 * The JTLB is being read for the TLBP throughout the stall generated by the 467 * previous instruction. This is not really correct as the stalling instruction 468 * can modify the address used to access the JTLB. The failure symptom is that 469 * the TLBP instruction will use an address created for the stalling instruction 470 * and not the address held in C0_ENHI and thus report the wrong results. 471 * 472 * The software work-around is to not allow the instruction preceding the TLBP 473 * to stall - make it an NOP or some other instruction guaranteed not to stall. 474 * 475 * Errata 2 will not be fixed. This errata is also on the R5000. 476 * 477 * As if we MIPS hackers wouldn't know how to nop pipelines happy ... 478 */ 479 static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) 480 { 481 switch (current_cpu_type()) { 482 /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ 483 case CPU_R4600: 484 case CPU_R4700: 485 case CPU_R5000: 486 case CPU_NEVADA: 487 uasm_i_nop(p); 488 uasm_i_tlbp(p); 489 break; 490 491 default: 492 uasm_i_tlbp(p); 493 break; 494 } 495 } 496 497 /* 498 * Write random or indexed TLB entry, and care about the hazards from 499 * the preceding mtc0 and for the following eret. 500 */ 501 enum tlb_write_entry { tlb_random, tlb_indexed }; 502 503 static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, 504 struct uasm_reloc **r, 505 enum tlb_write_entry wmode) 506 { 507 void(*tlbw)(u32 **) = NULL; 508 509 switch (wmode) { 510 case tlb_random: tlbw = uasm_i_tlbwr; break; 511 case tlb_indexed: tlbw = uasm_i_tlbwi; break; 512 } 513 514 if (cpu_has_mips_r2) { 515 /* 516 * The architecture spec says an ehb is required here, 517 * but a number of cores do not have the hazard and 518 * using an ehb causes an expensive pipeline stall. 519 */ 520 switch (current_cpu_type()) { 521 case CPU_M14KC: 522 case CPU_74K: 523 break; 524 525 default: 526 uasm_i_ehb(p); 527 break; 528 } 529 tlbw(p); 530 return; 531 } 532 533 switch (current_cpu_type()) { 534 case CPU_R4000PC: 535 case CPU_R4000SC: 536 case CPU_R4000MC: 537 case CPU_R4400PC: 538 case CPU_R4400SC: 539 case CPU_R4400MC: 540 /* 541 * This branch uses up a mtc0 hazard nop slot and saves 542 * two nops after the tlbw instruction. 543 */ 544 uasm_bgezl_hazard(p, r, hazard_instance); 545 tlbw(p); 546 uasm_bgezl_label(l, p, hazard_instance); 547 hazard_instance++; 548 uasm_i_nop(p); 549 break; 550 551 case CPU_R4600: 552 case CPU_R4700: 553 uasm_i_nop(p); 554 tlbw(p); 555 uasm_i_nop(p); 556 break; 557 558 case CPU_R5000: 559 case CPU_NEVADA: 560 uasm_i_nop(p); /* QED specifies 2 nops hazard */ 561 uasm_i_nop(p); /* QED specifies 2 nops hazard */ 562 tlbw(p); 563 break; 564 565 case CPU_R4300: 566 case CPU_5KC: 567 case CPU_TX49XX: 568 case CPU_PR4450: 569 case CPU_XLR: 570 uasm_i_nop(p); 571 tlbw(p); 572 break; 573 574 case CPU_R10000: 575 case CPU_R12000: 576 case CPU_R14000: 577 case CPU_4KC: 578 case CPU_4KEC: 579 case CPU_M14KC: 580 case CPU_M14KEC: 581 case CPU_SB1: 582 case CPU_SB1A: 583 case CPU_4KSC: 584 case CPU_20KC: 585 case CPU_25KF: 586 case CPU_BMIPS32: 587 case CPU_BMIPS3300: 588 case CPU_BMIPS4350: 589 case CPU_BMIPS4380: 590 case CPU_BMIPS5000: 591 case CPU_LOONGSON2: 592 case CPU_R5500: 593 if (m4kc_tlbp_war()) 594 uasm_i_nop(p); 595 case CPU_ALCHEMY: 596 tlbw(p); 597 break; 598 599 case CPU_RM7000: 600 uasm_i_nop(p); 601 uasm_i_nop(p); 602 uasm_i_nop(p); 603 uasm_i_nop(p); 604 tlbw(p); 605 break; 606 607 case CPU_VR4111: 608 case CPU_VR4121: 609 case CPU_VR4122: 610 case CPU_VR4181: 611 case CPU_VR4181A: 612 uasm_i_nop(p); 613 uasm_i_nop(p); 614 tlbw(p); 615 uasm_i_nop(p); 616 uasm_i_nop(p); 617 break; 618 619 case CPU_VR4131: 620 case CPU_VR4133: 621 case CPU_R5432: 622 uasm_i_nop(p); 623 uasm_i_nop(p); 624 tlbw(p); 625 break; 626 627 case CPU_JZRISC: 628 tlbw(p); 629 uasm_i_nop(p); 630 break; 631 632 default: 633 panic("No TLB refill handler yet (CPU type: %d)", 634 current_cpu_data.cputype); 635 break; 636 } 637 } 638 639 static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, 640 unsigned int reg) 641 { 642 if (cpu_has_rixi) { 643 UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); 644 } else { 645 #ifdef CONFIG_64BIT_PHYS_ADDR 646 uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); 647 #else 648 UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); 649 #endif 650 } 651 } 652 653 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 654 655 static __cpuinit void build_restore_pagemask(u32 **p, 656 struct uasm_reloc **r, 657 unsigned int tmp, 658 enum label_id lid, 659 int restore_scratch) 660 { 661 if (restore_scratch) { 662 /* Reset default page size */ 663 if (PM_DEFAULT_MASK >> 16) { 664 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 665 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 666 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 667 uasm_il_b(p, r, lid); 668 } else if (PM_DEFAULT_MASK) { 669 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 670 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 671 uasm_il_b(p, r, lid); 672 } else { 673 uasm_i_mtc0(p, 0, C0_PAGEMASK); 674 uasm_il_b(p, r, lid); 675 } 676 if (scratch_reg > 0) 677 UASM_i_MFC0(p, 1, 31, scratch_reg); 678 else 679 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 680 } else { 681 /* Reset default page size */ 682 if (PM_DEFAULT_MASK >> 16) { 683 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 684 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 685 uasm_il_b(p, r, lid); 686 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 687 } else if (PM_DEFAULT_MASK) { 688 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 689 uasm_il_b(p, r, lid); 690 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 691 } else { 692 uasm_il_b(p, r, lid); 693 uasm_i_mtc0(p, 0, C0_PAGEMASK); 694 } 695 } 696 } 697 698 static __cpuinit void build_huge_tlb_write_entry(u32 **p, 699 struct uasm_label **l, 700 struct uasm_reloc **r, 701 unsigned int tmp, 702 enum tlb_write_entry wmode, 703 int restore_scratch) 704 { 705 /* Set huge page tlb entry size */ 706 uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); 707 uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff); 708 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 709 710 build_tlb_write_entry(p, l, r, wmode); 711 712 build_restore_pagemask(p, r, tmp, label_leave, restore_scratch); 713 } 714 715 /* 716 * Check if Huge PTE is present, if so then jump to LABEL. 717 */ 718 static void __cpuinit 719 build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, 720 unsigned int pmd, int lid) 721 { 722 UASM_i_LW(p, tmp, 0, pmd); 723 if (use_bbit_insns()) { 724 uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); 725 } else { 726 uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); 727 uasm_il_bnez(p, r, tmp, lid); 728 } 729 } 730 731 static __cpuinit void build_huge_update_entries(u32 **p, 732 unsigned int pte, 733 unsigned int tmp) 734 { 735 int small_sequence; 736 737 /* 738 * A huge PTE describes an area the size of the 739 * configured huge page size. This is twice the 740 * of the large TLB entry size we intend to use. 741 * A TLB entry half the size of the configured 742 * huge page size is configured into entrylo0 743 * and entrylo1 to cover the contiguous huge PTE 744 * address space. 745 */ 746 small_sequence = (HPAGE_SIZE >> 7) < 0x10000; 747 748 /* We can clobber tmp. It isn't used after this.*/ 749 if (!small_sequence) 750 uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); 751 752 build_convert_pte_to_entrylo(p, pte); 753 UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */ 754 /* convert to entrylo1 */ 755 if (small_sequence) 756 UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7); 757 else 758 UASM_i_ADDU(p, pte, pte, tmp); 759 760 UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ 761 } 762 763 static __cpuinit void build_huge_handler_tail(u32 **p, 764 struct uasm_reloc **r, 765 struct uasm_label **l, 766 unsigned int pte, 767 unsigned int ptr) 768 { 769 #ifdef CONFIG_SMP 770 UASM_i_SC(p, pte, 0, ptr); 771 uasm_il_beqz(p, r, pte, label_tlb_huge_update); 772 UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */ 773 #else 774 UASM_i_SW(p, pte, 0, ptr); 775 #endif 776 build_huge_update_entries(p, pte, ptr); 777 build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); 778 } 779 #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ 780 781 #ifdef CONFIG_64BIT 782 /* 783 * TMP and PTR are scratch. 784 * TMP will be clobbered, PTR will hold the pmd entry. 785 */ 786 static void __cpuinit 787 build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 788 unsigned int tmp, unsigned int ptr) 789 { 790 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 791 long pgdc = (long)pgd_current; 792 #endif 793 /* 794 * The vmalloc handling is not in the hotpath. 795 */ 796 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 797 798 if (check_for_high_segbits) { 799 /* 800 * The kernel currently implicitely assumes that the 801 * MIPS SEGBITS parameter for the processor is 802 * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never 803 * allocate virtual addresses outside the maximum 804 * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But 805 * that doesn't prevent user code from accessing the 806 * higher xuseg addresses. Here, we make sure that 807 * everything but the lower xuseg addresses goes down 808 * the module_alloc/vmalloc path. 809 */ 810 uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 811 uasm_il_bnez(p, r, ptr, label_vmalloc); 812 } else { 813 uasm_il_bltz(p, r, tmp, label_vmalloc); 814 } 815 /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ 816 817 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 818 if (pgd_reg != -1) { 819 /* pgd is in pgd_reg */ 820 UASM_i_MFC0(p, ptr, 31, pgd_reg); 821 } else { 822 /* 823 * &pgd << 11 stored in CONTEXT [23..63]. 824 */ 825 UASM_i_MFC0(p, ptr, C0_CONTEXT); 826 827 /* Clear lower 23 bits of context. */ 828 uasm_i_dins(p, ptr, 0, 0, 23); 829 830 /* 1 0 1 0 1 << 6 xkphys cached */ 831 uasm_i_ori(p, ptr, ptr, 0x540); 832 uasm_i_drotr(p, ptr, ptr, 11); 833 } 834 #elif defined(CONFIG_SMP) 835 # ifdef CONFIG_MIPS_MT_SMTC 836 /* 837 * SMTC uses TCBind value as "CPU" index 838 */ 839 uasm_i_mfc0(p, ptr, C0_TCBIND); 840 uasm_i_dsrl_safe(p, ptr, ptr, 19); 841 # else 842 /* 843 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 844 * stored in CONTEXT. 845 */ 846 uasm_i_dmfc0(p, ptr, C0_CONTEXT); 847 uasm_i_dsrl_safe(p, ptr, ptr, 23); 848 # endif 849 UASM_i_LA_mostly(p, tmp, pgdc); 850 uasm_i_daddu(p, ptr, ptr, tmp); 851 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 852 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 853 #else 854 UASM_i_LA_mostly(p, ptr, pgdc); 855 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 856 #endif 857 858 uasm_l_vmalloc_done(l, *p); 859 860 /* get pgd offset in bytes */ 861 uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3); 862 863 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); 864 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ 865 #ifndef __PAGETABLE_PMD_FOLDED 866 uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 867 uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ 868 uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ 869 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); 870 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ 871 #endif 872 } 873 874 /* 875 * BVADDR is the faulting address, PTR is scratch. 876 * PTR will hold the pgd for vmalloc. 877 */ 878 static void __cpuinit 879 build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 880 unsigned int bvaddr, unsigned int ptr, 881 enum vmalloc64_mode mode) 882 { 883 long swpd = (long)swapper_pg_dir; 884 int single_insn_swpd; 885 int did_vmalloc_branch = 0; 886 887 single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd); 888 889 uasm_l_vmalloc(l, *p); 890 891 if (mode != not_refill && check_for_high_segbits) { 892 if (single_insn_swpd) { 893 uasm_il_bltz(p, r, bvaddr, label_vmalloc_done); 894 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 895 did_vmalloc_branch = 1; 896 /* fall through */ 897 } else { 898 uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault); 899 } 900 } 901 if (!did_vmalloc_branch) { 902 if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { 903 uasm_il_b(p, r, label_vmalloc_done); 904 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 905 } else { 906 UASM_i_LA_mostly(p, ptr, swpd); 907 uasm_il_b(p, r, label_vmalloc_done); 908 if (uasm_in_compat_space_p(swpd)) 909 uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd)); 910 else 911 uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd)); 912 } 913 } 914 if (mode != not_refill && check_for_high_segbits) { 915 uasm_l_large_segbits_fault(l, *p); 916 /* 917 * We get here if we are an xsseg address, or if we are 918 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. 919 * 920 * Ignoring xsseg (assume disabled so would generate 921 * (address errors?), the only remaining possibility 922 * is the upper xuseg addresses. On processors with 923 * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these 924 * addresses would have taken an address error. We try 925 * to mimic that here by taking a load/istream page 926 * fault. 927 */ 928 UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); 929 uasm_i_jr(p, ptr); 930 931 if (mode == refill_scratch) { 932 if (scratch_reg > 0) 933 UASM_i_MFC0(p, 1, 31, scratch_reg); 934 else 935 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 936 } else { 937 uasm_i_nop(p); 938 } 939 } 940 } 941 942 #else /* !CONFIG_64BIT */ 943 944 /* 945 * TMP and PTR are scratch. 946 * TMP will be clobbered, PTR will hold the pgd entry. 947 */ 948 static void __cpuinit __maybe_unused 949 build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) 950 { 951 long pgdc = (long)pgd_current; 952 953 /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ 954 #ifdef CONFIG_SMP 955 #ifdef CONFIG_MIPS_MT_SMTC 956 /* 957 * SMTC uses TCBind value as "CPU" index 958 */ 959 uasm_i_mfc0(p, ptr, C0_TCBIND); 960 UASM_i_LA_mostly(p, tmp, pgdc); 961 uasm_i_srl(p, ptr, ptr, 19); 962 #else 963 /* 964 * smp_processor_id() << 3 is stored in CONTEXT. 965 */ 966 uasm_i_mfc0(p, ptr, C0_CONTEXT); 967 UASM_i_LA_mostly(p, tmp, pgdc); 968 uasm_i_srl(p, ptr, ptr, 23); 969 #endif 970 uasm_i_addu(p, ptr, tmp, ptr); 971 #else 972 UASM_i_LA_mostly(p, ptr, pgdc); 973 #endif 974 uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 975 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 976 uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ 977 uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); 978 uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ 979 } 980 981 #endif /* !CONFIG_64BIT */ 982 983 static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) 984 { 985 unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; 986 unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); 987 988 switch (current_cpu_type()) { 989 case CPU_VR41XX: 990 case CPU_VR4111: 991 case CPU_VR4121: 992 case CPU_VR4122: 993 case CPU_VR4131: 994 case CPU_VR4181: 995 case CPU_VR4181A: 996 case CPU_VR4133: 997 shift += 2; 998 break; 999 1000 default: 1001 break; 1002 } 1003 1004 if (shift) 1005 UASM_i_SRL(p, ctx, ctx, shift); 1006 uasm_i_andi(p, ctx, ctx, mask); 1007 } 1008 1009 static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) 1010 { 1011 /* 1012 * Bug workaround for the Nevada. It seems as if under certain 1013 * circumstances the move from cp0_context might produce a 1014 * bogus result when the mfc0 instruction and its consumer are 1015 * in a different cacheline or a load instruction, probably any 1016 * memory reference, is between them. 1017 */ 1018 switch (current_cpu_type()) { 1019 case CPU_NEVADA: 1020 UASM_i_LW(p, ptr, 0, ptr); 1021 GET_CONTEXT(p, tmp); /* get context reg */ 1022 break; 1023 1024 default: 1025 GET_CONTEXT(p, tmp); /* get context reg */ 1026 UASM_i_LW(p, ptr, 0, ptr); 1027 break; 1028 } 1029 1030 build_adjust_context(p, tmp); 1031 UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ 1032 } 1033 1034 static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, 1035 unsigned int ptep) 1036 { 1037 /* 1038 * 64bit address support (36bit on a 32bit CPU) in a 32bit 1039 * Kernel is a special case. Only a few CPUs use it. 1040 */ 1041 #ifdef CONFIG_64BIT_PHYS_ADDR 1042 if (cpu_has_64bits) { 1043 uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ 1044 uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1045 if (cpu_has_rixi) { 1046 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); 1047 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1048 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); 1049 } else { 1050 uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 1051 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1052 uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1053 } 1054 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1055 } else { 1056 int pte_off_even = sizeof(pte_t) / 2; 1057 int pte_off_odd = pte_off_even + sizeof(pte_t); 1058 1059 /* The pte entries are pre-shifted */ 1060 uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ 1061 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1062 uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ 1063 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1064 } 1065 #else 1066 UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ 1067 UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1068 if (r45k_bvahwbug()) 1069 build_tlb_probe_entry(p); 1070 if (cpu_has_rixi) { 1071 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); 1072 if (r4k_250MHZhwbug()) 1073 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1074 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1075 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); 1076 } else { 1077 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 1078 if (r4k_250MHZhwbug()) 1079 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1080 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1081 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1082 if (r45k_bvahwbug()) 1083 uasm_i_mfc0(p, tmp, C0_INDEX); 1084 } 1085 if (r4k_250MHZhwbug()) 1086 UASM_i_MTC0(p, 0, C0_ENTRYLO1); 1087 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1088 #endif 1089 } 1090 1091 struct mips_huge_tlb_info { 1092 int huge_pte; 1093 int restore_scratch; 1094 }; 1095 1096 static struct mips_huge_tlb_info __cpuinit 1097 build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, 1098 struct uasm_reloc **r, unsigned int tmp, 1099 unsigned int ptr, int c0_scratch) 1100 { 1101 struct mips_huge_tlb_info rv; 1102 unsigned int even, odd; 1103 int vmalloc_branch_delay_filled = 0; 1104 const int scratch = 1; /* Our extra working register */ 1105 1106 rv.huge_pte = scratch; 1107 rv.restore_scratch = 0; 1108 1109 if (check_for_high_segbits) { 1110 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1111 1112 if (pgd_reg != -1) 1113 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1114 else 1115 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1116 1117 if (c0_scratch >= 0) 1118 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1119 else 1120 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1121 1122 uasm_i_dsrl_safe(p, scratch, tmp, 1123 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 1124 uasm_il_bnez(p, r, scratch, label_vmalloc); 1125 1126 if (pgd_reg == -1) { 1127 vmalloc_branch_delay_filled = 1; 1128 /* Clear lower 23 bits of context. */ 1129 uasm_i_dins(p, ptr, 0, 0, 23); 1130 } 1131 } else { 1132 if (pgd_reg != -1) 1133 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1134 else 1135 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1136 1137 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1138 1139 if (c0_scratch >= 0) 1140 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1141 else 1142 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1143 1144 if (pgd_reg == -1) 1145 /* Clear lower 23 bits of context. */ 1146 uasm_i_dins(p, ptr, 0, 0, 23); 1147 1148 uasm_il_bltz(p, r, tmp, label_vmalloc); 1149 } 1150 1151 if (pgd_reg == -1) { 1152 vmalloc_branch_delay_filled = 1; 1153 /* 1 0 1 0 1 << 6 xkphys cached */ 1154 uasm_i_ori(p, ptr, ptr, 0x540); 1155 uasm_i_drotr(p, ptr, ptr, 11); 1156 } 1157 1158 #ifdef __PAGETABLE_PMD_FOLDED 1159 #define LOC_PTEP scratch 1160 #else 1161 #define LOC_PTEP ptr 1162 #endif 1163 1164 if (!vmalloc_branch_delay_filled) 1165 /* get pgd offset in bytes */ 1166 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1167 1168 uasm_l_vmalloc_done(l, *p); 1169 1170 /* 1171 * tmp ptr 1172 * fall-through case = badvaddr *pgd_current 1173 * vmalloc case = badvaddr swapper_pg_dir 1174 */ 1175 1176 if (vmalloc_branch_delay_filled) 1177 /* get pgd offset in bytes */ 1178 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1179 1180 #ifdef __PAGETABLE_PMD_FOLDED 1181 GET_CONTEXT(p, tmp); /* get context reg */ 1182 #endif 1183 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); 1184 1185 if (use_lwx_insns()) { 1186 UASM_i_LWX(p, LOC_PTEP, scratch, ptr); 1187 } else { 1188 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ 1189 uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ 1190 } 1191 1192 #ifndef __PAGETABLE_PMD_FOLDED 1193 /* get pmd offset in bytes */ 1194 uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); 1195 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); 1196 GET_CONTEXT(p, tmp); /* get context reg */ 1197 1198 if (use_lwx_insns()) { 1199 UASM_i_LWX(p, scratch, scratch, ptr); 1200 } else { 1201 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ 1202 UASM_i_LW(p, scratch, 0, ptr); 1203 } 1204 #endif 1205 /* Adjust the context during the load latency. */ 1206 build_adjust_context(p, tmp); 1207 1208 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1209 uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); 1210 /* 1211 * The in the LWX case we don't want to do the load in the 1212 * delay slot. It cannot issue in the same cycle and may be 1213 * speculative and unneeded. 1214 */ 1215 if (use_lwx_insns()) 1216 uasm_i_nop(p); 1217 #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ 1218 1219 1220 /* build_update_entries */ 1221 if (use_lwx_insns()) { 1222 even = ptr; 1223 odd = tmp; 1224 UASM_i_LWX(p, even, scratch, tmp); 1225 UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); 1226 UASM_i_LWX(p, odd, scratch, tmp); 1227 } else { 1228 UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ 1229 even = tmp; 1230 odd = ptr; 1231 UASM_i_LW(p, even, 0, ptr); /* get even pte */ 1232 UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ 1233 } 1234 if (cpu_has_rixi) { 1235 uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL)); 1236 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1237 uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL)); 1238 } else { 1239 uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); 1240 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1241 uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); 1242 } 1243 UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ 1244 1245 if (c0_scratch >= 0) { 1246 UASM_i_MFC0(p, scratch, 31, c0_scratch); 1247 build_tlb_write_entry(p, l, r, tlb_random); 1248 uasm_l_leave(l, *p); 1249 rv.restore_scratch = 1; 1250 } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) { 1251 build_tlb_write_entry(p, l, r, tlb_random); 1252 uasm_l_leave(l, *p); 1253 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1254 } else { 1255 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1256 build_tlb_write_entry(p, l, r, tlb_random); 1257 uasm_l_leave(l, *p); 1258 rv.restore_scratch = 1; 1259 } 1260 1261 uasm_i_eret(p); /* return from trap */ 1262 1263 return rv; 1264 } 1265 1266 /* 1267 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception 1268 * because EXL == 0. If we wrap, we can also use the 32 instruction 1269 * slots before the XTLB refill exception handler which belong to the 1270 * unused TLB refill exception. 1271 */ 1272 #define MIPS64_REFILL_INSNS 32 1273 1274 static void __cpuinit build_r4000_tlb_refill_handler(void) 1275 { 1276 u32 *p = tlb_handler; 1277 struct uasm_label *l = labels; 1278 struct uasm_reloc *r = relocs; 1279 u32 *f; 1280 unsigned int final_len; 1281 struct mips_huge_tlb_info htlb_info __maybe_unused; 1282 enum vmalloc64_mode vmalloc_mode __maybe_unused; 1283 1284 memset(tlb_handler, 0, sizeof(tlb_handler)); 1285 memset(labels, 0, sizeof(labels)); 1286 memset(relocs, 0, sizeof(relocs)); 1287 memset(final_handler, 0, sizeof(final_handler)); 1288 1289 if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) { 1290 htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, 1291 scratch_reg); 1292 vmalloc_mode = refill_scratch; 1293 } else { 1294 htlb_info.huge_pte = K0; 1295 htlb_info.restore_scratch = 0; 1296 vmalloc_mode = refill_noscratch; 1297 /* 1298 * create the plain linear handler 1299 */ 1300 if (bcm1250_m3_war()) { 1301 unsigned int segbits = 44; 1302 1303 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1304 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1305 uasm_i_xor(&p, K0, K0, K1); 1306 uasm_i_dsrl_safe(&p, K1, K0, 62); 1307 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1308 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1309 uasm_i_or(&p, K0, K0, K1); 1310 uasm_il_bnez(&p, &r, K0, label_leave); 1311 /* No need for uasm_i_nop */ 1312 } 1313 1314 #ifdef CONFIG_64BIT 1315 build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ 1316 #else 1317 build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ 1318 #endif 1319 1320 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1321 build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); 1322 #endif 1323 1324 build_get_ptep(&p, K0, K1); 1325 build_update_entries(&p, K0, K1); 1326 build_tlb_write_entry(&p, &l, &r, tlb_random); 1327 uasm_l_leave(&l, p); 1328 uasm_i_eret(&p); /* return from trap */ 1329 } 1330 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1331 uasm_l_tlb_huge_update(&l, p); 1332 build_huge_update_entries(&p, htlb_info.huge_pte, K1); 1333 build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, 1334 htlb_info.restore_scratch); 1335 #endif 1336 1337 #ifdef CONFIG_64BIT 1338 build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); 1339 #endif 1340 1341 /* 1342 * Overflow check: For the 64bit handler, we need at least one 1343 * free instruction slot for the wrap-around branch. In worst 1344 * case, if the intended insertion point is a delay slot, we 1345 * need three, with the second nop'ed and the third being 1346 * unused. 1347 */ 1348 /* Loongson2 ebase is different than r4k, we have more space */ 1349 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1350 if ((p - tlb_handler) > 64) 1351 panic("TLB refill handler space exceeded"); 1352 #else 1353 if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) 1354 || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) 1355 && uasm_insn_has_bdelay(relocs, 1356 tlb_handler + MIPS64_REFILL_INSNS - 3))) 1357 panic("TLB refill handler space exceeded"); 1358 #endif 1359 1360 /* 1361 * Now fold the handler in the TLB refill handler space. 1362 */ 1363 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1364 f = final_handler; 1365 /* Simplest case, just copy the handler. */ 1366 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1367 final_len = p - tlb_handler; 1368 #else /* CONFIG_64BIT */ 1369 f = final_handler + MIPS64_REFILL_INSNS; 1370 if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { 1371 /* Just copy the handler. */ 1372 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1373 final_len = p - tlb_handler; 1374 } else { 1375 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1376 const enum label_id ls = label_tlb_huge_update; 1377 #else 1378 const enum label_id ls = label_vmalloc; 1379 #endif 1380 u32 *split; 1381 int ov = 0; 1382 int i; 1383 1384 for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) 1385 ; 1386 BUG_ON(i == ARRAY_SIZE(labels)); 1387 split = labels[i].addr; 1388 1389 /* 1390 * See if we have overflown one way or the other. 1391 */ 1392 if (split > tlb_handler + MIPS64_REFILL_INSNS || 1393 split < p - MIPS64_REFILL_INSNS) 1394 ov = 1; 1395 1396 if (ov) { 1397 /* 1398 * Split two instructions before the end. One 1399 * for the branch and one for the instruction 1400 * in the delay slot. 1401 */ 1402 split = tlb_handler + MIPS64_REFILL_INSNS - 2; 1403 1404 /* 1405 * If the branch would fall in a delay slot, 1406 * we must back up an additional instruction 1407 * so that it is no longer in a delay slot. 1408 */ 1409 if (uasm_insn_has_bdelay(relocs, split - 1)) 1410 split--; 1411 } 1412 /* Copy first part of the handler. */ 1413 uasm_copy_handler(relocs, labels, tlb_handler, split, f); 1414 f += split - tlb_handler; 1415 1416 if (ov) { 1417 /* Insert branch. */ 1418 uasm_l_split(&l, final_handler); 1419 uasm_il_b(&f, &r, label_split); 1420 if (uasm_insn_has_bdelay(relocs, split)) 1421 uasm_i_nop(&f); 1422 else { 1423 uasm_copy_handler(relocs, labels, 1424 split, split + 1, f); 1425 uasm_move_labels(labels, f, f + 1, -1); 1426 f++; 1427 split++; 1428 } 1429 } 1430 1431 /* Copy the rest of the handler. */ 1432 uasm_copy_handler(relocs, labels, split, p, final_handler); 1433 final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + 1434 (p - split); 1435 } 1436 #endif /* CONFIG_64BIT */ 1437 1438 uasm_resolve_relocs(relocs, labels); 1439 pr_debug("Wrote TLB refill handler (%u instructions).\n", 1440 final_len); 1441 1442 memcpy((void *)ebase, final_handler, 0x100); 1443 1444 dump_handler("r4000_tlb_refill", (u32 *)ebase, 64); 1445 } 1446 1447 /* 1448 * 128 instructions for the fastpath handler is generous and should 1449 * never be exceeded. 1450 */ 1451 #define FASTPATH_SIZE 128 1452 1453 u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; 1454 u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; 1455 u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; 1456 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 1457 u32 tlbmiss_handler_setup_pgd_array[16] __cacheline_aligned; 1458 1459 static void __cpuinit build_r4000_setup_pgd(void) 1460 { 1461 const int a0 = 4; 1462 const int a1 = 5; 1463 u32 *p = tlbmiss_handler_setup_pgd_array; 1464 struct uasm_label *l = labels; 1465 struct uasm_reloc *r = relocs; 1466 1467 memset(tlbmiss_handler_setup_pgd_array, 0, sizeof(tlbmiss_handler_setup_pgd_array)); 1468 memset(labels, 0, sizeof(labels)); 1469 memset(relocs, 0, sizeof(relocs)); 1470 1471 pgd_reg = allocate_kscratch(); 1472 1473 if (pgd_reg == -1) { 1474 /* PGD << 11 in c0_Context */ 1475 /* 1476 * If it is a ckseg0 address, convert to a physical 1477 * address. Shifting right by 29 and adding 4 will 1478 * result in zero for these addresses. 1479 * 1480 */ 1481 UASM_i_SRA(&p, a1, a0, 29); 1482 UASM_i_ADDIU(&p, a1, a1, 4); 1483 uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); 1484 uasm_i_nop(&p); 1485 uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); 1486 uasm_l_tlbl_goaround1(&l, p); 1487 UASM_i_SLL(&p, a0, a0, 11); 1488 uasm_i_jr(&p, 31); 1489 UASM_i_MTC0(&p, a0, C0_CONTEXT); 1490 } else { 1491 /* PGD in c0_KScratch */ 1492 uasm_i_jr(&p, 31); 1493 UASM_i_MTC0(&p, a0, 31, pgd_reg); 1494 } 1495 if (p - tlbmiss_handler_setup_pgd_array > ARRAY_SIZE(tlbmiss_handler_setup_pgd_array)) 1496 panic("tlbmiss_handler_setup_pgd_array space exceeded"); 1497 uasm_resolve_relocs(relocs, labels); 1498 pr_debug("Wrote tlbmiss_handler_setup_pgd_array (%u instructions).\n", 1499 (unsigned int)(p - tlbmiss_handler_setup_pgd_array)); 1500 1501 dump_handler("tlbmiss_handler", 1502 tlbmiss_handler_setup_pgd_array, 1503 ARRAY_SIZE(tlbmiss_handler_setup_pgd_array)); 1504 } 1505 #endif 1506 1507 static void __cpuinit 1508 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) 1509 { 1510 #ifdef CONFIG_SMP 1511 # ifdef CONFIG_64BIT_PHYS_ADDR 1512 if (cpu_has_64bits) 1513 uasm_i_lld(p, pte, 0, ptr); 1514 else 1515 # endif 1516 UASM_i_LL(p, pte, 0, ptr); 1517 #else 1518 # ifdef CONFIG_64BIT_PHYS_ADDR 1519 if (cpu_has_64bits) 1520 uasm_i_ld(p, pte, 0, ptr); 1521 else 1522 # endif 1523 UASM_i_LW(p, pte, 0, ptr); 1524 #endif 1525 } 1526 1527 static void __cpuinit 1528 iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, 1529 unsigned int mode) 1530 { 1531 #ifdef CONFIG_64BIT_PHYS_ADDR 1532 unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); 1533 #endif 1534 1535 uasm_i_ori(p, pte, pte, mode); 1536 #ifdef CONFIG_SMP 1537 # ifdef CONFIG_64BIT_PHYS_ADDR 1538 if (cpu_has_64bits) 1539 uasm_i_scd(p, pte, 0, ptr); 1540 else 1541 # endif 1542 UASM_i_SC(p, pte, 0, ptr); 1543 1544 if (r10000_llsc_war()) 1545 uasm_il_beqzl(p, r, pte, label_smp_pgtable_change); 1546 else 1547 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1548 1549 # ifdef CONFIG_64BIT_PHYS_ADDR 1550 if (!cpu_has_64bits) { 1551 /* no uasm_i_nop needed */ 1552 uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); 1553 uasm_i_ori(p, pte, pte, hwmode); 1554 uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); 1555 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1556 /* no uasm_i_nop needed */ 1557 uasm_i_lw(p, pte, 0, ptr); 1558 } else 1559 uasm_i_nop(p); 1560 # else 1561 uasm_i_nop(p); 1562 # endif 1563 #else 1564 # ifdef CONFIG_64BIT_PHYS_ADDR 1565 if (cpu_has_64bits) 1566 uasm_i_sd(p, pte, 0, ptr); 1567 else 1568 # endif 1569 UASM_i_SW(p, pte, 0, ptr); 1570 1571 # ifdef CONFIG_64BIT_PHYS_ADDR 1572 if (!cpu_has_64bits) { 1573 uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); 1574 uasm_i_ori(p, pte, pte, hwmode); 1575 uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); 1576 uasm_i_lw(p, pte, 0, ptr); 1577 } 1578 # endif 1579 #endif 1580 } 1581 1582 /* 1583 * Check if PTE is present, if not then jump to LABEL. PTR points to 1584 * the page table where this PTE is located, PTE will be re-loaded 1585 * with it's original value. 1586 */ 1587 static void __cpuinit 1588 build_pte_present(u32 **p, struct uasm_reloc **r, 1589 int pte, int ptr, int scratch, enum label_id lid) 1590 { 1591 int t = scratch >= 0 ? scratch : pte; 1592 1593 if (cpu_has_rixi) { 1594 if (use_bbit_insns()) { 1595 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); 1596 uasm_i_nop(p); 1597 } else { 1598 uasm_i_andi(p, t, pte, _PAGE_PRESENT); 1599 uasm_il_beqz(p, r, t, lid); 1600 if (pte == t) 1601 /* You lose the SMP race :-(*/ 1602 iPTE_LW(p, pte, ptr); 1603 } 1604 } else { 1605 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); 1606 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); 1607 uasm_il_bnez(p, r, t, lid); 1608 if (pte == t) 1609 /* You lose the SMP race :-(*/ 1610 iPTE_LW(p, pte, ptr); 1611 } 1612 } 1613 1614 /* Make PTE valid, store result in PTR. */ 1615 static void __cpuinit 1616 build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, 1617 unsigned int ptr) 1618 { 1619 unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; 1620 1621 iPTE_SW(p, r, pte, ptr, mode); 1622 } 1623 1624 /* 1625 * Check if PTE can be written to, if not branch to LABEL. Regardless 1626 * restore PTE with value from PTR when done. 1627 */ 1628 static void __cpuinit 1629 build_pte_writable(u32 **p, struct uasm_reloc **r, 1630 unsigned int pte, unsigned int ptr, int scratch, 1631 enum label_id lid) 1632 { 1633 int t = scratch >= 0 ? scratch : pte; 1634 1635 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); 1636 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); 1637 uasm_il_bnez(p, r, t, lid); 1638 if (pte == t) 1639 /* You lose the SMP race :-(*/ 1640 iPTE_LW(p, pte, ptr); 1641 else 1642 uasm_i_nop(p); 1643 } 1644 1645 /* Make PTE writable, update software status bits as well, then store 1646 * at PTR. 1647 */ 1648 static void __cpuinit 1649 build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, 1650 unsigned int ptr) 1651 { 1652 unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID 1653 | _PAGE_DIRTY); 1654 1655 iPTE_SW(p, r, pte, ptr, mode); 1656 } 1657 1658 /* 1659 * Check if PTE can be modified, if not branch to LABEL. Regardless 1660 * restore PTE with value from PTR when done. 1661 */ 1662 static void __cpuinit 1663 build_pte_modifiable(u32 **p, struct uasm_reloc **r, 1664 unsigned int pte, unsigned int ptr, int scratch, 1665 enum label_id lid) 1666 { 1667 if (use_bbit_insns()) { 1668 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); 1669 uasm_i_nop(p); 1670 } else { 1671 int t = scratch >= 0 ? scratch : pte; 1672 uasm_i_andi(p, t, pte, _PAGE_WRITE); 1673 uasm_il_beqz(p, r, t, lid); 1674 if (pte == t) 1675 /* You lose the SMP race :-(*/ 1676 iPTE_LW(p, pte, ptr); 1677 } 1678 } 1679 1680 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 1681 1682 1683 /* 1684 * R3000 style TLB load/store/modify handlers. 1685 */ 1686 1687 /* 1688 * This places the pte into ENTRYLO0 and writes it with tlbwi. 1689 * Then it returns. 1690 */ 1691 static void __cpuinit 1692 build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) 1693 { 1694 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1695 uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ 1696 uasm_i_tlbwi(p); 1697 uasm_i_jr(p, tmp); 1698 uasm_i_rfe(p); /* branch delay */ 1699 } 1700 1701 /* 1702 * This places the pte into ENTRYLO0 and writes it with tlbwi 1703 * or tlbwr as appropriate. This is because the index register 1704 * may have the probe fail bit set as a result of a trap on a 1705 * kseg2 access, i.e. without refill. Then it returns. 1706 */ 1707 static void __cpuinit 1708 build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, 1709 struct uasm_reloc **r, unsigned int pte, 1710 unsigned int tmp) 1711 { 1712 uasm_i_mfc0(p, tmp, C0_INDEX); 1713 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1714 uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ 1715 uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */ 1716 uasm_i_tlbwi(p); /* cp0 delay */ 1717 uasm_i_jr(p, tmp); 1718 uasm_i_rfe(p); /* branch delay */ 1719 uasm_l_r3000_write_probe_fail(l, *p); 1720 uasm_i_tlbwr(p); /* cp0 delay */ 1721 uasm_i_jr(p, tmp); 1722 uasm_i_rfe(p); /* branch delay */ 1723 } 1724 1725 static void __cpuinit 1726 build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, 1727 unsigned int ptr) 1728 { 1729 long pgdc = (long)pgd_current; 1730 1731 uasm_i_mfc0(p, pte, C0_BADVADDR); 1732 uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */ 1733 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 1734 uasm_i_srl(p, pte, pte, 22); /* load delay */ 1735 uasm_i_sll(p, pte, pte, 2); 1736 uasm_i_addu(p, ptr, ptr, pte); 1737 uasm_i_mfc0(p, pte, C0_CONTEXT); 1738 uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */ 1739 uasm_i_andi(p, pte, pte, 0xffc); /* load delay */ 1740 uasm_i_addu(p, ptr, ptr, pte); 1741 uasm_i_lw(p, pte, 0, ptr); 1742 uasm_i_tlbp(p); /* load delay */ 1743 } 1744 1745 static void __cpuinit build_r3000_tlb_load_handler(void) 1746 { 1747 u32 *p = handle_tlbl; 1748 struct uasm_label *l = labels; 1749 struct uasm_reloc *r = relocs; 1750 1751 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1752 memset(labels, 0, sizeof(labels)); 1753 memset(relocs, 0, sizeof(relocs)); 1754 1755 build_r3000_tlbchange_handler_head(&p, K0, K1); 1756 build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); 1757 uasm_i_nop(&p); /* load delay */ 1758 build_make_valid(&p, &r, K0, K1); 1759 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1760 1761 uasm_l_nopage_tlbl(&l, p); 1762 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1763 uasm_i_nop(&p); 1764 1765 if ((p - handle_tlbl) > FASTPATH_SIZE) 1766 panic("TLB load handler fastpath space exceeded"); 1767 1768 uasm_resolve_relocs(relocs, labels); 1769 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 1770 (unsigned int)(p - handle_tlbl)); 1771 1772 dump_handler("r3000_tlb_load", handle_tlbl, ARRAY_SIZE(handle_tlbl)); 1773 } 1774 1775 static void __cpuinit build_r3000_tlb_store_handler(void) 1776 { 1777 u32 *p = handle_tlbs; 1778 struct uasm_label *l = labels; 1779 struct uasm_reloc *r = relocs; 1780 1781 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 1782 memset(labels, 0, sizeof(labels)); 1783 memset(relocs, 0, sizeof(relocs)); 1784 1785 build_r3000_tlbchange_handler_head(&p, K0, K1); 1786 build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); 1787 uasm_i_nop(&p); /* load delay */ 1788 build_make_write(&p, &r, K0, K1); 1789 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1790 1791 uasm_l_nopage_tlbs(&l, p); 1792 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1793 uasm_i_nop(&p); 1794 1795 if ((p - handle_tlbs) > FASTPATH_SIZE) 1796 panic("TLB store handler fastpath space exceeded"); 1797 1798 uasm_resolve_relocs(relocs, labels); 1799 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 1800 (unsigned int)(p - handle_tlbs)); 1801 1802 dump_handler("r3000_tlb_store", handle_tlbs, ARRAY_SIZE(handle_tlbs)); 1803 } 1804 1805 static void __cpuinit build_r3000_tlb_modify_handler(void) 1806 { 1807 u32 *p = handle_tlbm; 1808 struct uasm_label *l = labels; 1809 struct uasm_reloc *r = relocs; 1810 1811 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 1812 memset(labels, 0, sizeof(labels)); 1813 memset(relocs, 0, sizeof(relocs)); 1814 1815 build_r3000_tlbchange_handler_head(&p, K0, K1); 1816 build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); 1817 uasm_i_nop(&p); /* load delay */ 1818 build_make_write(&p, &r, K0, K1); 1819 build_r3000_pte_reload_tlbwi(&p, K0, K1); 1820 1821 uasm_l_nopage_tlbm(&l, p); 1822 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1823 uasm_i_nop(&p); 1824 1825 if ((p - handle_tlbm) > FASTPATH_SIZE) 1826 panic("TLB modify handler fastpath space exceeded"); 1827 1828 uasm_resolve_relocs(relocs, labels); 1829 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 1830 (unsigned int)(p - handle_tlbm)); 1831 1832 dump_handler("r3000_tlb_modify", handle_tlbm, ARRAY_SIZE(handle_tlbm)); 1833 } 1834 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 1835 1836 /* 1837 * R4000 style TLB load/store/modify handlers. 1838 */ 1839 static struct work_registers __cpuinit 1840 build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, 1841 struct uasm_reloc **r) 1842 { 1843 struct work_registers wr = build_get_work_registers(p); 1844 1845 #ifdef CONFIG_64BIT 1846 build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ 1847 #else 1848 build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ 1849 #endif 1850 1851 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1852 /* 1853 * For huge tlb entries, pmd doesn't contain an address but 1854 * instead contains the tlb pte. Check the PAGE_HUGE bit and 1855 * see if we need to jump to huge tlb processing. 1856 */ 1857 build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); 1858 #endif 1859 1860 UASM_i_MFC0(p, wr.r1, C0_BADVADDR); 1861 UASM_i_LW(p, wr.r2, 0, wr.r2); 1862 UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); 1863 uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); 1864 UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); 1865 1866 #ifdef CONFIG_SMP 1867 uasm_l_smp_pgtable_change(l, *p); 1868 #endif 1869 iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ 1870 if (!m4kc_tlbp_war()) 1871 build_tlb_probe_entry(p); 1872 return wr; 1873 } 1874 1875 static void __cpuinit 1876 build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, 1877 struct uasm_reloc **r, unsigned int tmp, 1878 unsigned int ptr) 1879 { 1880 uasm_i_ori(p, ptr, ptr, sizeof(pte_t)); 1881 uasm_i_xori(p, ptr, ptr, sizeof(pte_t)); 1882 build_update_entries(p, tmp, ptr); 1883 build_tlb_write_entry(p, l, r, tlb_indexed); 1884 uasm_l_leave(l, *p); 1885 build_restore_work_registers(p); 1886 uasm_i_eret(p); /* return from trap */ 1887 1888 #ifdef CONFIG_64BIT 1889 build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill); 1890 #endif 1891 } 1892 1893 static void __cpuinit build_r4000_tlb_load_handler(void) 1894 { 1895 u32 *p = handle_tlbl; 1896 struct uasm_label *l = labels; 1897 struct uasm_reloc *r = relocs; 1898 struct work_registers wr; 1899 1900 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1901 memset(labels, 0, sizeof(labels)); 1902 memset(relocs, 0, sizeof(relocs)); 1903 1904 if (bcm1250_m3_war()) { 1905 unsigned int segbits = 44; 1906 1907 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1908 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1909 uasm_i_xor(&p, K0, K0, K1); 1910 uasm_i_dsrl_safe(&p, K1, K0, 62); 1911 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1912 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1913 uasm_i_or(&p, K0, K0, K1); 1914 uasm_il_bnez(&p, &r, K0, label_leave); 1915 /* No need for uasm_i_nop */ 1916 } 1917 1918 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 1919 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1920 if (m4kc_tlbp_war()) 1921 build_tlb_probe_entry(&p); 1922 1923 if (cpu_has_rixi) { 1924 /* 1925 * If the page is not _PAGE_VALID, RI or XI could not 1926 * have triggered it. Skip the expensive test.. 1927 */ 1928 if (use_bbit_insns()) { 1929 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1930 label_tlbl_goaround1); 1931 } else { 1932 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1933 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); 1934 } 1935 uasm_i_nop(&p); 1936 1937 uasm_i_tlbr(&p); 1938 /* Examine entrylo 0 or 1 based on ptr. */ 1939 if (use_bbit_insns()) { 1940 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1941 } else { 1942 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1943 uasm_i_beqz(&p, wr.r3, 8); 1944 } 1945 /* load it in the delay slot*/ 1946 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 1947 /* load it if ptr is odd */ 1948 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 1949 /* 1950 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 1951 * XI must have triggered it. 1952 */ 1953 if (use_bbit_insns()) { 1954 uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); 1955 uasm_i_nop(&p); 1956 uasm_l_tlbl_goaround1(&l, p); 1957 } else { 1958 uasm_i_andi(&p, wr.r3, wr.r3, 2); 1959 uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); 1960 uasm_i_nop(&p); 1961 } 1962 uasm_l_tlbl_goaround1(&l, p); 1963 } 1964 build_make_valid(&p, &r, wr.r1, wr.r2); 1965 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 1966 1967 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1968 /* 1969 * This is the entry point when build_r4000_tlbchange_handler_head 1970 * spots a huge page. 1971 */ 1972 uasm_l_tlb_huge_update(&l, p); 1973 iPTE_LW(&p, wr.r1, wr.r2); 1974 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1975 build_tlb_probe_entry(&p); 1976 1977 if (cpu_has_rixi) { 1978 /* 1979 * If the page is not _PAGE_VALID, RI or XI could not 1980 * have triggered it. Skip the expensive test.. 1981 */ 1982 if (use_bbit_insns()) { 1983 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1984 label_tlbl_goaround2); 1985 } else { 1986 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1987 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 1988 } 1989 uasm_i_nop(&p); 1990 1991 uasm_i_tlbr(&p); 1992 /* Examine entrylo 0 or 1 based on ptr. */ 1993 if (use_bbit_insns()) { 1994 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1995 } else { 1996 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1997 uasm_i_beqz(&p, wr.r3, 8); 1998 } 1999 /* load it in the delay slot*/ 2000 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 2001 /* load it if ptr is odd */ 2002 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 2003 /* 2004 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 2005 * XI must have triggered it. 2006 */ 2007 if (use_bbit_insns()) { 2008 uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); 2009 } else { 2010 uasm_i_andi(&p, wr.r3, wr.r3, 2); 2011 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 2012 } 2013 if (PM_DEFAULT_MASK == 0) 2014 uasm_i_nop(&p); 2015 /* 2016 * We clobbered C0_PAGEMASK, restore it. On the other branch 2017 * it is restored in build_huge_tlb_write_entry. 2018 */ 2019 build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0); 2020 2021 uasm_l_tlbl_goaround2(&l, p); 2022 } 2023 uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); 2024 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2025 #endif 2026 2027 uasm_l_nopage_tlbl(&l, p); 2028 build_restore_work_registers(&p); 2029 #ifdef CONFIG_CPU_MICROMIPS 2030 if ((unsigned long)tlb_do_page_fault_0 & 1) { 2031 uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0)); 2032 uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0)); 2033 uasm_i_jr(&p, K0); 2034 } else 2035 #endif 2036 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 2037 uasm_i_nop(&p); 2038 2039 if ((p - handle_tlbl) > FASTPATH_SIZE) 2040 panic("TLB load handler fastpath space exceeded"); 2041 2042 uasm_resolve_relocs(relocs, labels); 2043 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 2044 (unsigned int)(p - handle_tlbl)); 2045 2046 dump_handler("r4000_tlb_load", handle_tlbl, ARRAY_SIZE(handle_tlbl)); 2047 } 2048 2049 static void __cpuinit build_r4000_tlb_store_handler(void) 2050 { 2051 u32 *p = handle_tlbs; 2052 struct uasm_label *l = labels; 2053 struct uasm_reloc *r = relocs; 2054 struct work_registers wr; 2055 2056 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 2057 memset(labels, 0, sizeof(labels)); 2058 memset(relocs, 0, sizeof(relocs)); 2059 2060 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2061 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2062 if (m4kc_tlbp_war()) 2063 build_tlb_probe_entry(&p); 2064 build_make_write(&p, &r, wr.r1, wr.r2); 2065 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2066 2067 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 2068 /* 2069 * This is the entry point when 2070 * build_r4000_tlbchange_handler_head spots a huge page. 2071 */ 2072 uasm_l_tlb_huge_update(&l, p); 2073 iPTE_LW(&p, wr.r1, wr.r2); 2074 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2075 build_tlb_probe_entry(&p); 2076 uasm_i_ori(&p, wr.r1, wr.r1, 2077 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2078 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2079 #endif 2080 2081 uasm_l_nopage_tlbs(&l, p); 2082 build_restore_work_registers(&p); 2083 #ifdef CONFIG_CPU_MICROMIPS 2084 if ((unsigned long)tlb_do_page_fault_1 & 1) { 2085 uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); 2086 uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); 2087 uasm_i_jr(&p, K0); 2088 } else 2089 #endif 2090 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2091 uasm_i_nop(&p); 2092 2093 if ((p - handle_tlbs) > FASTPATH_SIZE) 2094 panic("TLB store handler fastpath space exceeded"); 2095 2096 uasm_resolve_relocs(relocs, labels); 2097 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 2098 (unsigned int)(p - handle_tlbs)); 2099 2100 dump_handler("r4000_tlb_store", handle_tlbs, ARRAY_SIZE(handle_tlbs)); 2101 } 2102 2103 static void __cpuinit build_r4000_tlb_modify_handler(void) 2104 { 2105 u32 *p = handle_tlbm; 2106 struct uasm_label *l = labels; 2107 struct uasm_reloc *r = relocs; 2108 struct work_registers wr; 2109 2110 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 2111 memset(labels, 0, sizeof(labels)); 2112 memset(relocs, 0, sizeof(relocs)); 2113 2114 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2115 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2116 if (m4kc_tlbp_war()) 2117 build_tlb_probe_entry(&p); 2118 /* Present and writable bits set, set accessed and dirty bits. */ 2119 build_make_write(&p, &r, wr.r1, wr.r2); 2120 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2121 2122 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 2123 /* 2124 * This is the entry point when 2125 * build_r4000_tlbchange_handler_head spots a huge page. 2126 */ 2127 uasm_l_tlb_huge_update(&l, p); 2128 iPTE_LW(&p, wr.r1, wr.r2); 2129 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2130 build_tlb_probe_entry(&p); 2131 uasm_i_ori(&p, wr.r1, wr.r1, 2132 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2133 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2134 #endif 2135 2136 uasm_l_nopage_tlbm(&l, p); 2137 build_restore_work_registers(&p); 2138 #ifdef CONFIG_CPU_MICROMIPS 2139 if ((unsigned long)tlb_do_page_fault_1 & 1) { 2140 uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); 2141 uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); 2142 uasm_i_jr(&p, K0); 2143 } else 2144 #endif 2145 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2146 uasm_i_nop(&p); 2147 2148 if ((p - handle_tlbm) > FASTPATH_SIZE) 2149 panic("TLB modify handler fastpath space exceeded"); 2150 2151 uasm_resolve_relocs(relocs, labels); 2152 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 2153 (unsigned int)(p - handle_tlbm)); 2154 2155 dump_handler("r4000_tlb_modify", handle_tlbm, ARRAY_SIZE(handle_tlbm)); 2156 } 2157 2158 void __cpuinit build_tlb_refill_handler(void) 2159 { 2160 /* 2161 * The refill handler is generated per-CPU, multi-node systems 2162 * may have local storage for it. The other handlers are only 2163 * needed once. 2164 */ 2165 static int run_once = 0; 2166 2167 output_pgtable_bits_defines(); 2168 2169 #ifdef CONFIG_64BIT 2170 check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 2171 #endif 2172 2173 switch (current_cpu_type()) { 2174 case CPU_R2000: 2175 case CPU_R3000: 2176 case CPU_R3000A: 2177 case CPU_R3081E: 2178 case CPU_TX3912: 2179 case CPU_TX3922: 2180 case CPU_TX3927: 2181 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 2182 if (cpu_has_local_ebase) 2183 build_r3000_tlb_refill_handler(); 2184 if (!run_once) { 2185 if (!cpu_has_local_ebase) 2186 build_r3000_tlb_refill_handler(); 2187 build_r3000_tlb_load_handler(); 2188 build_r3000_tlb_store_handler(); 2189 build_r3000_tlb_modify_handler(); 2190 run_once++; 2191 } 2192 #else 2193 panic("No R3000 TLB refill handler"); 2194 #endif 2195 break; 2196 2197 case CPU_R6000: 2198 case CPU_R6000A: 2199 panic("No R6000 TLB refill handler yet"); 2200 break; 2201 2202 case CPU_R8000: 2203 panic("No R8000 TLB refill handler yet"); 2204 break; 2205 2206 default: 2207 if (!run_once) { 2208 scratch_reg = allocate_kscratch(); 2209 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2210 build_r4000_setup_pgd(); 2211 #endif 2212 build_r4000_tlb_load_handler(); 2213 build_r4000_tlb_store_handler(); 2214 build_r4000_tlb_modify_handler(); 2215 if (!cpu_has_local_ebase) 2216 build_r4000_tlb_refill_handler(); 2217 run_once++; 2218 } 2219 if (cpu_has_local_ebase) 2220 build_r4000_tlb_refill_handler(); 2221 } 2222 } 2223 2224 void __cpuinit flush_tlb_handlers(void) 2225 { 2226 local_flush_icache_range((unsigned long)handle_tlbl, 2227 (unsigned long)handle_tlbl + sizeof(handle_tlbl)); 2228 local_flush_icache_range((unsigned long)handle_tlbs, 2229 (unsigned long)handle_tlbs + sizeof(handle_tlbs)); 2230 local_flush_icache_range((unsigned long)handle_tlbm, 2231 (unsigned long)handle_tlbm + sizeof(handle_tlbm)); 2232 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2233 local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd_array, 2234 (unsigned long)tlbmiss_handler_setup_pgd_array + sizeof(handle_tlbm)); 2235 #endif 2236 } 2237