1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Synthesize TLB refill handlers at runtime. 7 * 8 * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer 9 * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki 10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) 11 * Copyright (C) 2008, 2009 Cavium Networks, Inc. 12 * 13 * ... and the days got worse and worse and now you see 14 * I've gone completly out of my mind. 15 * 16 * They're coming to take me a away haha 17 * they're coming to take me a away hoho hihi haha 18 * to the funny farm where code is beautiful all the time ... 19 * 20 * (Condolences to Napoleon XIV) 21 */ 22 23 #include <linux/bug.h> 24 #include <linux/kernel.h> 25 #include <linux/types.h> 26 #include <linux/smp.h> 27 #include <linux/string.h> 28 #include <linux/init.h> 29 #include <linux/cache.h> 30 31 #include <asm/cacheflush.h> 32 #include <asm/pgtable.h> 33 #include <asm/war.h> 34 #include <asm/uasm.h> 35 #include <asm/setup.h> 36 37 /* 38 * TLB load/store/modify handlers. 39 * 40 * Only the fastpath gets synthesized at runtime, the slowpath for 41 * do_page_fault remains normal asm. 42 */ 43 extern void tlb_do_page_fault_0(void); 44 extern void tlb_do_page_fault_1(void); 45 46 struct work_registers { 47 int r1; 48 int r2; 49 int r3; 50 }; 51 52 struct tlb_reg_save { 53 unsigned long a; 54 unsigned long b; 55 } ____cacheline_aligned_in_smp; 56 57 static struct tlb_reg_save handler_reg_save[NR_CPUS]; 58 59 static inline int r45k_bvahwbug(void) 60 { 61 /* XXX: We should probe for the presence of this bug, but we don't. */ 62 return 0; 63 } 64 65 static inline int r4k_250MHZhwbug(void) 66 { 67 /* XXX: We should probe for the presence of this bug, but we don't. */ 68 return 0; 69 } 70 71 static inline int __maybe_unused bcm1250_m3_war(void) 72 { 73 return BCM1250_M3_WAR; 74 } 75 76 static inline int __maybe_unused r10000_llsc_war(void) 77 { 78 return R10000_LLSC_WAR; 79 } 80 81 static int use_bbit_insns(void) 82 { 83 switch (current_cpu_type()) { 84 case CPU_CAVIUM_OCTEON: 85 case CPU_CAVIUM_OCTEON_PLUS: 86 case CPU_CAVIUM_OCTEON2: 87 return 1; 88 default: 89 return 0; 90 } 91 } 92 93 static int use_lwx_insns(void) 94 { 95 switch (current_cpu_type()) { 96 case CPU_CAVIUM_OCTEON2: 97 return 1; 98 default: 99 return 0; 100 } 101 } 102 #if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ 103 CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 104 static bool scratchpad_available(void) 105 { 106 return true; 107 } 108 static int scratchpad_offset(int i) 109 { 110 /* 111 * CVMSEG starts at address -32768 and extends for 112 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. 113 */ 114 i += 1; /* Kernel use starts at the top and works down. */ 115 return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; 116 } 117 #else 118 static bool scratchpad_available(void) 119 { 120 return false; 121 } 122 static int scratchpad_offset(int i) 123 { 124 BUG(); 125 /* Really unreachable, but evidently some GCC want this. */ 126 return 0; 127 } 128 #endif 129 /* 130 * Found by experiment: At least some revisions of the 4kc throw under 131 * some circumstances a machine check exception, triggered by invalid 132 * values in the index register. Delaying the tlbp instruction until 133 * after the next branch, plus adding an additional nop in front of 134 * tlbwi/tlbwr avoids the invalid index register values. Nobody knows 135 * why; it's not an issue caused by the core RTL. 136 * 137 */ 138 static int __cpuinit m4kc_tlbp_war(void) 139 { 140 return (current_cpu_data.processor_id & 0xffff00) == 141 (PRID_COMP_MIPS | PRID_IMP_4KC); 142 } 143 144 /* Handle labels (which must be positive integers). */ 145 enum label_id { 146 label_second_part = 1, 147 label_leave, 148 label_vmalloc, 149 label_vmalloc_done, 150 label_tlbw_hazard, 151 label_split, 152 label_tlbl_goaround1, 153 label_tlbl_goaround2, 154 label_nopage_tlbl, 155 label_nopage_tlbs, 156 label_nopage_tlbm, 157 label_smp_pgtable_change, 158 label_r3000_write_probe_fail, 159 label_large_segbits_fault, 160 #ifdef CONFIG_HUGETLB_PAGE 161 label_tlb_huge_update, 162 #endif 163 }; 164 165 UASM_L_LA(_second_part) 166 UASM_L_LA(_leave) 167 UASM_L_LA(_vmalloc) 168 UASM_L_LA(_vmalloc_done) 169 UASM_L_LA(_tlbw_hazard) 170 UASM_L_LA(_split) 171 UASM_L_LA(_tlbl_goaround1) 172 UASM_L_LA(_tlbl_goaround2) 173 UASM_L_LA(_nopage_tlbl) 174 UASM_L_LA(_nopage_tlbs) 175 UASM_L_LA(_nopage_tlbm) 176 UASM_L_LA(_smp_pgtable_change) 177 UASM_L_LA(_r3000_write_probe_fail) 178 UASM_L_LA(_large_segbits_fault) 179 #ifdef CONFIG_HUGETLB_PAGE 180 UASM_L_LA(_tlb_huge_update) 181 #endif 182 183 /* 184 * For debug purposes. 185 */ 186 static inline void dump_handler(const u32 *handler, int count) 187 { 188 int i; 189 190 pr_debug("\t.set push\n"); 191 pr_debug("\t.set noreorder\n"); 192 193 for (i = 0; i < count; i++) 194 pr_debug("\t%p\t.word 0x%08x\n", &handler[i], handler[i]); 195 196 pr_debug("\t.set pop\n"); 197 } 198 199 /* The only general purpose registers allowed in TLB handlers. */ 200 #define K0 26 201 #define K1 27 202 203 /* Some CP0 registers */ 204 #define C0_INDEX 0, 0 205 #define C0_ENTRYLO0 2, 0 206 #define C0_TCBIND 2, 2 207 #define C0_ENTRYLO1 3, 0 208 #define C0_CONTEXT 4, 0 209 #define C0_PAGEMASK 5, 0 210 #define C0_BADVADDR 8, 0 211 #define C0_ENTRYHI 10, 0 212 #define C0_EPC 14, 0 213 #define C0_XCONTEXT 20, 0 214 215 #ifdef CONFIG_64BIT 216 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) 217 #else 218 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT) 219 #endif 220 221 /* The worst case length of the handler is around 18 instructions for 222 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. 223 * Maximum space available is 32 instructions for R3000 and 64 224 * instructions for R4000. 225 * 226 * We deliberately chose a buffer size of 128, so we won't scribble 227 * over anything important on overflow before we panic. 228 */ 229 static u32 tlb_handler[128] __cpuinitdata; 230 231 /* simply assume worst case size for labels and relocs */ 232 static struct uasm_label labels[128] __cpuinitdata; 233 static struct uasm_reloc relocs[128] __cpuinitdata; 234 235 #ifdef CONFIG_64BIT 236 static int check_for_high_segbits __cpuinitdata; 237 #endif 238 239 static int check_for_high_segbits __cpuinitdata; 240 241 static unsigned int kscratch_used_mask __cpuinitdata; 242 243 static int __cpuinit allocate_kscratch(void) 244 { 245 int r; 246 unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; 247 248 r = ffs(a); 249 250 if (r == 0) 251 return -1; 252 253 r--; /* make it zero based */ 254 255 kscratch_used_mask |= (1 << r); 256 257 return r; 258 } 259 260 static int scratch_reg __cpuinitdata; 261 static int pgd_reg __cpuinitdata; 262 enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; 263 264 static struct work_registers __cpuinit build_get_work_registers(u32 **p) 265 { 266 struct work_registers r; 267 268 int smp_processor_id_reg; 269 int smp_processor_id_sel; 270 int smp_processor_id_shift; 271 272 if (scratch_reg > 0) { 273 /* Save in CPU local C0_KScratch? */ 274 UASM_i_MTC0(p, 1, 31, scratch_reg); 275 r.r1 = K0; 276 r.r2 = K1; 277 r.r3 = 1; 278 return r; 279 } 280 281 if (num_possible_cpus() > 1) { 282 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 283 smp_processor_id_shift = 51; 284 smp_processor_id_reg = 20; /* XContext */ 285 smp_processor_id_sel = 0; 286 #else 287 # ifdef CONFIG_32BIT 288 smp_processor_id_shift = 25; 289 smp_processor_id_reg = 4; /* Context */ 290 smp_processor_id_sel = 0; 291 # endif 292 # ifdef CONFIG_64BIT 293 smp_processor_id_shift = 26; 294 smp_processor_id_reg = 4; /* Context */ 295 smp_processor_id_sel = 0; 296 # endif 297 #endif 298 /* Get smp_processor_id */ 299 UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); 300 UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); 301 302 /* handler_reg_save index in K0 */ 303 UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); 304 305 UASM_i_LA(p, K1, (long)&handler_reg_save); 306 UASM_i_ADDU(p, K0, K0, K1); 307 } else { 308 UASM_i_LA(p, K0, (long)&handler_reg_save); 309 } 310 /* K0 now points to save area, save $1 and $2 */ 311 UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); 312 UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); 313 314 r.r1 = K1; 315 r.r2 = 1; 316 r.r3 = 2; 317 return r; 318 } 319 320 static void __cpuinit build_restore_work_registers(u32 **p) 321 { 322 if (scratch_reg > 0) { 323 UASM_i_MFC0(p, 1, 31, scratch_reg); 324 return; 325 } 326 /* K0 already points to save area, restore $1 and $2 */ 327 UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); 328 UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); 329 } 330 331 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 332 333 /* 334 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, 335 * we cannot do r3000 under these circumstances. 336 * 337 * Declare pgd_current here instead of including mmu_context.h to avoid type 338 * conflicts for tlbmiss_handler_setup_pgd 339 */ 340 extern unsigned long pgd_current[]; 341 342 /* 343 * The R3000 TLB handler is simple. 344 */ 345 static void __cpuinit build_r3000_tlb_refill_handler(void) 346 { 347 long pgdc = (long)pgd_current; 348 u32 *p; 349 350 memset(tlb_handler, 0, sizeof(tlb_handler)); 351 p = tlb_handler; 352 353 uasm_i_mfc0(&p, K0, C0_BADVADDR); 354 uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ 355 uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); 356 uasm_i_srl(&p, K0, K0, 22); /* load delay */ 357 uasm_i_sll(&p, K0, K0, 2); 358 uasm_i_addu(&p, K1, K1, K0); 359 uasm_i_mfc0(&p, K0, C0_CONTEXT); 360 uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ 361 uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ 362 uasm_i_addu(&p, K1, K1, K0); 363 uasm_i_lw(&p, K0, 0, K1); 364 uasm_i_nop(&p); /* load delay */ 365 uasm_i_mtc0(&p, K0, C0_ENTRYLO0); 366 uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ 367 uasm_i_tlbwr(&p); /* cp0 delay */ 368 uasm_i_jr(&p, K1); 369 uasm_i_rfe(&p); /* branch delay */ 370 371 if (p > tlb_handler + 32) 372 panic("TLB refill handler space exceeded"); 373 374 pr_debug("Wrote TLB refill handler (%u instructions).\n", 375 (unsigned int)(p - tlb_handler)); 376 377 memcpy((void *)ebase, tlb_handler, 0x80); 378 379 dump_handler((u32 *)ebase, 32); 380 } 381 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 382 383 /* 384 * The R4000 TLB handler is much more complicated. We have two 385 * consecutive handler areas with 32 instructions space each. 386 * Since they aren't used at the same time, we can overflow in the 387 * other one.To keep things simple, we first assume linear space, 388 * then we relocate it to the final handler layout as needed. 389 */ 390 static u32 final_handler[64] __cpuinitdata; 391 392 /* 393 * Hazards 394 * 395 * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: 396 * 2. A timing hazard exists for the TLBP instruction. 397 * 398 * stalling_instruction 399 * TLBP 400 * 401 * The JTLB is being read for the TLBP throughout the stall generated by the 402 * previous instruction. This is not really correct as the stalling instruction 403 * can modify the address used to access the JTLB. The failure symptom is that 404 * the TLBP instruction will use an address created for the stalling instruction 405 * and not the address held in C0_ENHI and thus report the wrong results. 406 * 407 * The software work-around is to not allow the instruction preceding the TLBP 408 * to stall - make it an NOP or some other instruction guaranteed not to stall. 409 * 410 * Errata 2 will not be fixed. This errata is also on the R5000. 411 * 412 * As if we MIPS hackers wouldn't know how to nop pipelines happy ... 413 */ 414 static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) 415 { 416 switch (current_cpu_type()) { 417 /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ 418 case CPU_R4600: 419 case CPU_R4700: 420 case CPU_R5000: 421 case CPU_R5000A: 422 case CPU_NEVADA: 423 uasm_i_nop(p); 424 uasm_i_tlbp(p); 425 break; 426 427 default: 428 uasm_i_tlbp(p); 429 break; 430 } 431 } 432 433 /* 434 * Write random or indexed TLB entry, and care about the hazards from 435 * the preceding mtc0 and for the following eret. 436 */ 437 enum tlb_write_entry { tlb_random, tlb_indexed }; 438 439 static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, 440 struct uasm_reloc **r, 441 enum tlb_write_entry wmode) 442 { 443 void(*tlbw)(u32 **) = NULL; 444 445 switch (wmode) { 446 case tlb_random: tlbw = uasm_i_tlbwr; break; 447 case tlb_indexed: tlbw = uasm_i_tlbwi; break; 448 } 449 450 if (cpu_has_mips_r2) { 451 if (cpu_has_mips_r2_exec_hazard) 452 uasm_i_ehb(p); 453 tlbw(p); 454 return; 455 } 456 457 switch (current_cpu_type()) { 458 case CPU_R4000PC: 459 case CPU_R4000SC: 460 case CPU_R4000MC: 461 case CPU_R4400PC: 462 case CPU_R4400SC: 463 case CPU_R4400MC: 464 /* 465 * This branch uses up a mtc0 hazard nop slot and saves 466 * two nops after the tlbw instruction. 467 */ 468 uasm_il_bgezl(p, r, 0, label_tlbw_hazard); 469 tlbw(p); 470 uasm_l_tlbw_hazard(l, *p); 471 uasm_i_nop(p); 472 break; 473 474 case CPU_R4600: 475 case CPU_R4700: 476 case CPU_R5000: 477 case CPU_R5000A: 478 uasm_i_nop(p); 479 tlbw(p); 480 uasm_i_nop(p); 481 break; 482 483 case CPU_R4300: 484 case CPU_5KC: 485 case CPU_TX49XX: 486 case CPU_PR4450: 487 case CPU_XLR: 488 uasm_i_nop(p); 489 tlbw(p); 490 break; 491 492 case CPU_R10000: 493 case CPU_R12000: 494 case CPU_R14000: 495 case CPU_4KC: 496 case CPU_4KEC: 497 case CPU_SB1: 498 case CPU_SB1A: 499 case CPU_4KSC: 500 case CPU_20KC: 501 case CPU_25KF: 502 case CPU_BMIPS32: 503 case CPU_BMIPS3300: 504 case CPU_BMIPS4350: 505 case CPU_BMIPS4380: 506 case CPU_BMIPS5000: 507 case CPU_LOONGSON2: 508 case CPU_R5500: 509 if (m4kc_tlbp_war()) 510 uasm_i_nop(p); 511 case CPU_ALCHEMY: 512 tlbw(p); 513 break; 514 515 case CPU_NEVADA: 516 uasm_i_nop(p); /* QED specifies 2 nops hazard */ 517 /* 518 * This branch uses up a mtc0 hazard nop slot and saves 519 * a nop after the tlbw instruction. 520 */ 521 uasm_il_bgezl(p, r, 0, label_tlbw_hazard); 522 tlbw(p); 523 uasm_l_tlbw_hazard(l, *p); 524 break; 525 526 case CPU_RM7000: 527 uasm_i_nop(p); 528 uasm_i_nop(p); 529 uasm_i_nop(p); 530 uasm_i_nop(p); 531 tlbw(p); 532 break; 533 534 case CPU_RM9000: 535 /* 536 * When the JTLB is updated by tlbwi or tlbwr, a subsequent 537 * use of the JTLB for instructions should not occur for 4 538 * cpu cycles and use for data translations should not occur 539 * for 3 cpu cycles. 540 */ 541 uasm_i_ssnop(p); 542 uasm_i_ssnop(p); 543 uasm_i_ssnop(p); 544 uasm_i_ssnop(p); 545 tlbw(p); 546 uasm_i_ssnop(p); 547 uasm_i_ssnop(p); 548 uasm_i_ssnop(p); 549 uasm_i_ssnop(p); 550 break; 551 552 case CPU_VR4111: 553 case CPU_VR4121: 554 case CPU_VR4122: 555 case CPU_VR4181: 556 case CPU_VR4181A: 557 uasm_i_nop(p); 558 uasm_i_nop(p); 559 tlbw(p); 560 uasm_i_nop(p); 561 uasm_i_nop(p); 562 break; 563 564 case CPU_VR4131: 565 case CPU_VR4133: 566 case CPU_R5432: 567 uasm_i_nop(p); 568 uasm_i_nop(p); 569 tlbw(p); 570 break; 571 572 case CPU_JZRISC: 573 tlbw(p); 574 uasm_i_nop(p); 575 break; 576 577 default: 578 panic("No TLB refill handler yet (CPU type: %d)", 579 current_cpu_data.cputype); 580 break; 581 } 582 } 583 584 static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, 585 unsigned int reg) 586 { 587 if (kernel_uses_smartmips_rixi) { 588 UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); 589 UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 590 } else { 591 #ifdef CONFIG_64BIT_PHYS_ADDR 592 uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); 593 #else 594 UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); 595 #endif 596 } 597 } 598 599 #ifdef CONFIG_HUGETLB_PAGE 600 601 static __cpuinit void build_restore_pagemask(u32 **p, 602 struct uasm_reloc **r, 603 unsigned int tmp, 604 enum label_id lid, 605 int restore_scratch) 606 { 607 if (restore_scratch) { 608 /* Reset default page size */ 609 if (PM_DEFAULT_MASK >> 16) { 610 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 611 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 612 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 613 uasm_il_b(p, r, lid); 614 } else if (PM_DEFAULT_MASK) { 615 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 616 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 617 uasm_il_b(p, r, lid); 618 } else { 619 uasm_i_mtc0(p, 0, C0_PAGEMASK); 620 uasm_il_b(p, r, lid); 621 } 622 if (scratch_reg > 0) 623 UASM_i_MFC0(p, 1, 31, scratch_reg); 624 else 625 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 626 } else { 627 /* Reset default page size */ 628 if (PM_DEFAULT_MASK >> 16) { 629 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 630 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 631 uasm_il_b(p, r, lid); 632 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 633 } else if (PM_DEFAULT_MASK) { 634 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 635 uasm_il_b(p, r, lid); 636 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 637 } else { 638 uasm_il_b(p, r, lid); 639 uasm_i_mtc0(p, 0, C0_PAGEMASK); 640 } 641 } 642 } 643 644 static __cpuinit void build_huge_tlb_write_entry(u32 **p, 645 struct uasm_label **l, 646 struct uasm_reloc **r, 647 unsigned int tmp, 648 enum tlb_write_entry wmode, 649 int restore_scratch) 650 { 651 /* Set huge page tlb entry size */ 652 uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); 653 uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff); 654 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 655 656 build_tlb_write_entry(p, l, r, wmode); 657 658 build_restore_pagemask(p, r, tmp, label_leave, restore_scratch); 659 } 660 661 /* 662 * Check if Huge PTE is present, if so then jump to LABEL. 663 */ 664 static void __cpuinit 665 build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, 666 unsigned int pmd, int lid) 667 { 668 UASM_i_LW(p, tmp, 0, pmd); 669 if (use_bbit_insns()) { 670 uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); 671 } else { 672 uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); 673 uasm_il_bnez(p, r, tmp, lid); 674 } 675 } 676 677 static __cpuinit void build_huge_update_entries(u32 **p, 678 unsigned int pte, 679 unsigned int tmp) 680 { 681 int small_sequence; 682 683 /* 684 * A huge PTE describes an area the size of the 685 * configured huge page size. This is twice the 686 * of the large TLB entry size we intend to use. 687 * A TLB entry half the size of the configured 688 * huge page size is configured into entrylo0 689 * and entrylo1 to cover the contiguous huge PTE 690 * address space. 691 */ 692 small_sequence = (HPAGE_SIZE >> 7) < 0x10000; 693 694 /* We can clobber tmp. It isn't used after this.*/ 695 if (!small_sequence) 696 uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); 697 698 build_convert_pte_to_entrylo(p, pte); 699 UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */ 700 /* convert to entrylo1 */ 701 if (small_sequence) 702 UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7); 703 else 704 UASM_i_ADDU(p, pte, pte, tmp); 705 706 UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ 707 } 708 709 static __cpuinit void build_huge_handler_tail(u32 **p, 710 struct uasm_reloc **r, 711 struct uasm_label **l, 712 unsigned int pte, 713 unsigned int ptr) 714 { 715 #ifdef CONFIG_SMP 716 UASM_i_SC(p, pte, 0, ptr); 717 uasm_il_beqz(p, r, pte, label_tlb_huge_update); 718 UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */ 719 #else 720 UASM_i_SW(p, pte, 0, ptr); 721 #endif 722 build_huge_update_entries(p, pte, ptr); 723 build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); 724 } 725 #endif /* CONFIG_HUGETLB_PAGE */ 726 727 #ifdef CONFIG_64BIT 728 /* 729 * TMP and PTR are scratch. 730 * TMP will be clobbered, PTR will hold the pmd entry. 731 */ 732 static void __cpuinit 733 build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 734 unsigned int tmp, unsigned int ptr) 735 { 736 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 737 long pgdc = (long)pgd_current; 738 #endif 739 /* 740 * The vmalloc handling is not in the hotpath. 741 */ 742 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 743 744 if (check_for_high_segbits) { 745 /* 746 * The kernel currently implicitely assumes that the 747 * MIPS SEGBITS parameter for the processor is 748 * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never 749 * allocate virtual addresses outside the maximum 750 * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But 751 * that doesn't prevent user code from accessing the 752 * higher xuseg addresses. Here, we make sure that 753 * everything but the lower xuseg addresses goes down 754 * the module_alloc/vmalloc path. 755 */ 756 uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 757 uasm_il_bnez(p, r, ptr, label_vmalloc); 758 } else { 759 uasm_il_bltz(p, r, tmp, label_vmalloc); 760 } 761 /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ 762 763 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 764 if (pgd_reg != -1) { 765 /* pgd is in pgd_reg */ 766 UASM_i_MFC0(p, ptr, 31, pgd_reg); 767 } else { 768 /* 769 * &pgd << 11 stored in CONTEXT [23..63]. 770 */ 771 UASM_i_MFC0(p, ptr, C0_CONTEXT); 772 773 /* Clear lower 23 bits of context. */ 774 uasm_i_dins(p, ptr, 0, 0, 23); 775 776 /* 1 0 1 0 1 << 6 xkphys cached */ 777 uasm_i_ori(p, ptr, ptr, 0x540); 778 uasm_i_drotr(p, ptr, ptr, 11); 779 } 780 #elif defined(CONFIG_SMP) 781 # ifdef CONFIG_MIPS_MT_SMTC 782 /* 783 * SMTC uses TCBind value as "CPU" index 784 */ 785 uasm_i_mfc0(p, ptr, C0_TCBIND); 786 uasm_i_dsrl_safe(p, ptr, ptr, 19); 787 # else 788 /* 789 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 790 * stored in CONTEXT. 791 */ 792 uasm_i_dmfc0(p, ptr, C0_CONTEXT); 793 uasm_i_dsrl_safe(p, ptr, ptr, 23); 794 # endif 795 UASM_i_LA_mostly(p, tmp, pgdc); 796 uasm_i_daddu(p, ptr, ptr, tmp); 797 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 798 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 799 #else 800 UASM_i_LA_mostly(p, ptr, pgdc); 801 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 802 #endif 803 804 uasm_l_vmalloc_done(l, *p); 805 806 /* get pgd offset in bytes */ 807 uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3); 808 809 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); 810 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ 811 #ifndef __PAGETABLE_PMD_FOLDED 812 uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 813 uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ 814 uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ 815 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); 816 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ 817 #endif 818 } 819 820 /* 821 * BVADDR is the faulting address, PTR is scratch. 822 * PTR will hold the pgd for vmalloc. 823 */ 824 static void __cpuinit 825 build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 826 unsigned int bvaddr, unsigned int ptr, 827 enum vmalloc64_mode mode) 828 { 829 long swpd = (long)swapper_pg_dir; 830 int single_insn_swpd; 831 int did_vmalloc_branch = 0; 832 833 single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd); 834 835 uasm_l_vmalloc(l, *p); 836 837 if (mode != not_refill && check_for_high_segbits) { 838 if (single_insn_swpd) { 839 uasm_il_bltz(p, r, bvaddr, label_vmalloc_done); 840 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 841 did_vmalloc_branch = 1; 842 /* fall through */ 843 } else { 844 uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault); 845 } 846 } 847 if (!did_vmalloc_branch) { 848 if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { 849 uasm_il_b(p, r, label_vmalloc_done); 850 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 851 } else { 852 UASM_i_LA_mostly(p, ptr, swpd); 853 uasm_il_b(p, r, label_vmalloc_done); 854 if (uasm_in_compat_space_p(swpd)) 855 uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd)); 856 else 857 uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd)); 858 } 859 } 860 if (mode != not_refill && check_for_high_segbits) { 861 uasm_l_large_segbits_fault(l, *p); 862 /* 863 * We get here if we are an xsseg address, or if we are 864 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. 865 * 866 * Ignoring xsseg (assume disabled so would generate 867 * (address errors?), the only remaining possibility 868 * is the upper xuseg addresses. On processors with 869 * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these 870 * addresses would have taken an address error. We try 871 * to mimic that here by taking a load/istream page 872 * fault. 873 */ 874 UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); 875 uasm_i_jr(p, ptr); 876 877 if (mode == refill_scratch) { 878 if (scratch_reg > 0) 879 UASM_i_MFC0(p, 1, 31, scratch_reg); 880 else 881 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 882 } else { 883 uasm_i_nop(p); 884 } 885 } 886 } 887 888 #else /* !CONFIG_64BIT */ 889 890 /* 891 * TMP and PTR are scratch. 892 * TMP will be clobbered, PTR will hold the pgd entry. 893 */ 894 static void __cpuinit __maybe_unused 895 build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) 896 { 897 long pgdc = (long)pgd_current; 898 899 /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ 900 #ifdef CONFIG_SMP 901 #ifdef CONFIG_MIPS_MT_SMTC 902 /* 903 * SMTC uses TCBind value as "CPU" index 904 */ 905 uasm_i_mfc0(p, ptr, C0_TCBIND); 906 UASM_i_LA_mostly(p, tmp, pgdc); 907 uasm_i_srl(p, ptr, ptr, 19); 908 #else 909 /* 910 * smp_processor_id() << 3 is stored in CONTEXT. 911 */ 912 uasm_i_mfc0(p, ptr, C0_CONTEXT); 913 UASM_i_LA_mostly(p, tmp, pgdc); 914 uasm_i_srl(p, ptr, ptr, 23); 915 #endif 916 uasm_i_addu(p, ptr, tmp, ptr); 917 #else 918 UASM_i_LA_mostly(p, ptr, pgdc); 919 #endif 920 uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 921 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 922 uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ 923 uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); 924 uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ 925 } 926 927 #endif /* !CONFIG_64BIT */ 928 929 static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) 930 { 931 unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; 932 unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); 933 934 switch (current_cpu_type()) { 935 case CPU_VR41XX: 936 case CPU_VR4111: 937 case CPU_VR4121: 938 case CPU_VR4122: 939 case CPU_VR4131: 940 case CPU_VR4181: 941 case CPU_VR4181A: 942 case CPU_VR4133: 943 shift += 2; 944 break; 945 946 default: 947 break; 948 } 949 950 if (shift) 951 UASM_i_SRL(p, ctx, ctx, shift); 952 uasm_i_andi(p, ctx, ctx, mask); 953 } 954 955 static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) 956 { 957 /* 958 * Bug workaround for the Nevada. It seems as if under certain 959 * circumstances the move from cp0_context might produce a 960 * bogus result when the mfc0 instruction and its consumer are 961 * in a different cacheline or a load instruction, probably any 962 * memory reference, is between them. 963 */ 964 switch (current_cpu_type()) { 965 case CPU_NEVADA: 966 UASM_i_LW(p, ptr, 0, ptr); 967 GET_CONTEXT(p, tmp); /* get context reg */ 968 break; 969 970 default: 971 GET_CONTEXT(p, tmp); /* get context reg */ 972 UASM_i_LW(p, ptr, 0, ptr); 973 break; 974 } 975 976 build_adjust_context(p, tmp); 977 UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ 978 } 979 980 static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, 981 unsigned int ptep) 982 { 983 /* 984 * 64bit address support (36bit on a 32bit CPU) in a 32bit 985 * Kernel is a special case. Only a few CPUs use it. 986 */ 987 #ifdef CONFIG_64BIT_PHYS_ADDR 988 if (cpu_has_64bits) { 989 uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ 990 uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 991 if (kernel_uses_smartmips_rixi) { 992 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); 993 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); 994 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 995 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 996 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 997 } else { 998 uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 999 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1000 uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1001 } 1002 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1003 } else { 1004 int pte_off_even = sizeof(pte_t) / 2; 1005 int pte_off_odd = pte_off_even + sizeof(pte_t); 1006 1007 /* The pte entries are pre-shifted */ 1008 uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ 1009 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1010 uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ 1011 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1012 } 1013 #else 1014 UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ 1015 UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1016 if (r45k_bvahwbug()) 1017 build_tlb_probe_entry(p); 1018 if (kernel_uses_smartmips_rixi) { 1019 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); 1020 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); 1021 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1022 if (r4k_250MHZhwbug()) 1023 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1024 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1025 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1026 } else { 1027 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 1028 if (r4k_250MHZhwbug()) 1029 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1030 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1031 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1032 if (r45k_bvahwbug()) 1033 uasm_i_mfc0(p, tmp, C0_INDEX); 1034 } 1035 if (r4k_250MHZhwbug()) 1036 UASM_i_MTC0(p, 0, C0_ENTRYLO1); 1037 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1038 #endif 1039 } 1040 1041 struct mips_huge_tlb_info { 1042 int huge_pte; 1043 int restore_scratch; 1044 }; 1045 1046 static struct mips_huge_tlb_info __cpuinit 1047 build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, 1048 struct uasm_reloc **r, unsigned int tmp, 1049 unsigned int ptr, int c0_scratch) 1050 { 1051 struct mips_huge_tlb_info rv; 1052 unsigned int even, odd; 1053 int vmalloc_branch_delay_filled = 0; 1054 const int scratch = 1; /* Our extra working register */ 1055 1056 rv.huge_pte = scratch; 1057 rv.restore_scratch = 0; 1058 1059 if (check_for_high_segbits) { 1060 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1061 1062 if (pgd_reg != -1) 1063 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1064 else 1065 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1066 1067 if (c0_scratch >= 0) 1068 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1069 else 1070 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1071 1072 uasm_i_dsrl_safe(p, scratch, tmp, 1073 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 1074 uasm_il_bnez(p, r, scratch, label_vmalloc); 1075 1076 if (pgd_reg == -1) { 1077 vmalloc_branch_delay_filled = 1; 1078 /* Clear lower 23 bits of context. */ 1079 uasm_i_dins(p, ptr, 0, 0, 23); 1080 } 1081 } else { 1082 if (pgd_reg != -1) 1083 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1084 else 1085 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1086 1087 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1088 1089 if (c0_scratch >= 0) 1090 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1091 else 1092 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1093 1094 if (pgd_reg == -1) 1095 /* Clear lower 23 bits of context. */ 1096 uasm_i_dins(p, ptr, 0, 0, 23); 1097 1098 uasm_il_bltz(p, r, tmp, label_vmalloc); 1099 } 1100 1101 if (pgd_reg == -1) { 1102 vmalloc_branch_delay_filled = 1; 1103 /* 1 0 1 0 1 << 6 xkphys cached */ 1104 uasm_i_ori(p, ptr, ptr, 0x540); 1105 uasm_i_drotr(p, ptr, ptr, 11); 1106 } 1107 1108 #ifdef __PAGETABLE_PMD_FOLDED 1109 #define LOC_PTEP scratch 1110 #else 1111 #define LOC_PTEP ptr 1112 #endif 1113 1114 if (!vmalloc_branch_delay_filled) 1115 /* get pgd offset in bytes */ 1116 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1117 1118 uasm_l_vmalloc_done(l, *p); 1119 1120 /* 1121 * tmp ptr 1122 * fall-through case = badvaddr *pgd_current 1123 * vmalloc case = badvaddr swapper_pg_dir 1124 */ 1125 1126 if (vmalloc_branch_delay_filled) 1127 /* get pgd offset in bytes */ 1128 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1129 1130 #ifdef __PAGETABLE_PMD_FOLDED 1131 GET_CONTEXT(p, tmp); /* get context reg */ 1132 #endif 1133 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); 1134 1135 if (use_lwx_insns()) { 1136 UASM_i_LWX(p, LOC_PTEP, scratch, ptr); 1137 } else { 1138 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ 1139 uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ 1140 } 1141 1142 #ifndef __PAGETABLE_PMD_FOLDED 1143 /* get pmd offset in bytes */ 1144 uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); 1145 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); 1146 GET_CONTEXT(p, tmp); /* get context reg */ 1147 1148 if (use_lwx_insns()) { 1149 UASM_i_LWX(p, scratch, scratch, ptr); 1150 } else { 1151 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ 1152 UASM_i_LW(p, scratch, 0, ptr); 1153 } 1154 #endif 1155 /* Adjust the context during the load latency. */ 1156 build_adjust_context(p, tmp); 1157 1158 #ifdef CONFIG_HUGETLB_PAGE 1159 uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); 1160 /* 1161 * The in the LWX case we don't want to do the load in the 1162 * delay slot. It cannot issue in the same cycle and may be 1163 * speculative and unneeded. 1164 */ 1165 if (use_lwx_insns()) 1166 uasm_i_nop(p); 1167 #endif /* CONFIG_HUGETLB_PAGE */ 1168 1169 1170 /* build_update_entries */ 1171 if (use_lwx_insns()) { 1172 even = ptr; 1173 odd = tmp; 1174 UASM_i_LWX(p, even, scratch, tmp); 1175 UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); 1176 UASM_i_LWX(p, odd, scratch, tmp); 1177 } else { 1178 UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ 1179 even = tmp; 1180 odd = ptr; 1181 UASM_i_LW(p, even, 0, ptr); /* get even pte */ 1182 UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ 1183 } 1184 if (kernel_uses_smartmips_rixi) { 1185 uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_NO_EXEC)); 1186 uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_NO_EXEC)); 1187 uasm_i_drotr(p, even, even, 1188 ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1189 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1190 uasm_i_drotr(p, odd, odd, 1191 ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1192 } else { 1193 uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); 1194 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1195 uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); 1196 } 1197 UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ 1198 1199 if (c0_scratch >= 0) { 1200 UASM_i_MFC0(p, scratch, 31, c0_scratch); 1201 build_tlb_write_entry(p, l, r, tlb_random); 1202 uasm_l_leave(l, *p); 1203 rv.restore_scratch = 1; 1204 } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) { 1205 build_tlb_write_entry(p, l, r, tlb_random); 1206 uasm_l_leave(l, *p); 1207 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1208 } else { 1209 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1210 build_tlb_write_entry(p, l, r, tlb_random); 1211 uasm_l_leave(l, *p); 1212 rv.restore_scratch = 1; 1213 } 1214 1215 uasm_i_eret(p); /* return from trap */ 1216 1217 return rv; 1218 } 1219 1220 /* 1221 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception 1222 * because EXL == 0. If we wrap, we can also use the 32 instruction 1223 * slots before the XTLB refill exception handler which belong to the 1224 * unused TLB refill exception. 1225 */ 1226 #define MIPS64_REFILL_INSNS 32 1227 1228 static void __cpuinit build_r4000_tlb_refill_handler(void) 1229 { 1230 u32 *p = tlb_handler; 1231 struct uasm_label *l = labels; 1232 struct uasm_reloc *r = relocs; 1233 u32 *f; 1234 unsigned int final_len; 1235 struct mips_huge_tlb_info htlb_info __maybe_unused; 1236 enum vmalloc64_mode vmalloc_mode __maybe_unused; 1237 1238 memset(tlb_handler, 0, sizeof(tlb_handler)); 1239 memset(labels, 0, sizeof(labels)); 1240 memset(relocs, 0, sizeof(relocs)); 1241 memset(final_handler, 0, sizeof(final_handler)); 1242 1243 if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) { 1244 htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, 1245 scratch_reg); 1246 vmalloc_mode = refill_scratch; 1247 } else { 1248 htlb_info.huge_pte = K0; 1249 htlb_info.restore_scratch = 0; 1250 vmalloc_mode = refill_noscratch; 1251 /* 1252 * create the plain linear handler 1253 */ 1254 if (bcm1250_m3_war()) { 1255 unsigned int segbits = 44; 1256 1257 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1258 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1259 uasm_i_xor(&p, K0, K0, K1); 1260 uasm_i_dsrl_safe(&p, K1, K0, 62); 1261 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1262 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1263 uasm_i_or(&p, K0, K0, K1); 1264 uasm_il_bnez(&p, &r, K0, label_leave); 1265 /* No need for uasm_i_nop */ 1266 } 1267 1268 #ifdef CONFIG_64BIT 1269 build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ 1270 #else 1271 build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ 1272 #endif 1273 1274 #ifdef CONFIG_HUGETLB_PAGE 1275 build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); 1276 #endif 1277 1278 build_get_ptep(&p, K0, K1); 1279 build_update_entries(&p, K0, K1); 1280 build_tlb_write_entry(&p, &l, &r, tlb_random); 1281 uasm_l_leave(&l, p); 1282 uasm_i_eret(&p); /* return from trap */ 1283 } 1284 #ifdef CONFIG_HUGETLB_PAGE 1285 uasm_l_tlb_huge_update(&l, p); 1286 build_huge_update_entries(&p, htlb_info.huge_pte, K1); 1287 build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, 1288 htlb_info.restore_scratch); 1289 #endif 1290 1291 #ifdef CONFIG_64BIT 1292 build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); 1293 #endif 1294 1295 /* 1296 * Overflow check: For the 64bit handler, we need at least one 1297 * free instruction slot for the wrap-around branch. In worst 1298 * case, if the intended insertion point is a delay slot, we 1299 * need three, with the second nop'ed and the third being 1300 * unused. 1301 */ 1302 /* Loongson2 ebase is different than r4k, we have more space */ 1303 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1304 if ((p - tlb_handler) > 64) 1305 panic("TLB refill handler space exceeded"); 1306 #else 1307 if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) 1308 || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) 1309 && uasm_insn_has_bdelay(relocs, 1310 tlb_handler + MIPS64_REFILL_INSNS - 3))) 1311 panic("TLB refill handler space exceeded"); 1312 #endif 1313 1314 /* 1315 * Now fold the handler in the TLB refill handler space. 1316 */ 1317 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1318 f = final_handler; 1319 /* Simplest case, just copy the handler. */ 1320 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1321 final_len = p - tlb_handler; 1322 #else /* CONFIG_64BIT */ 1323 f = final_handler + MIPS64_REFILL_INSNS; 1324 if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { 1325 /* Just copy the handler. */ 1326 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1327 final_len = p - tlb_handler; 1328 } else { 1329 #if defined(CONFIG_HUGETLB_PAGE) 1330 const enum label_id ls = label_tlb_huge_update; 1331 #else 1332 const enum label_id ls = label_vmalloc; 1333 #endif 1334 u32 *split; 1335 int ov = 0; 1336 int i; 1337 1338 for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) 1339 ; 1340 BUG_ON(i == ARRAY_SIZE(labels)); 1341 split = labels[i].addr; 1342 1343 /* 1344 * See if we have overflown one way or the other. 1345 */ 1346 if (split > tlb_handler + MIPS64_REFILL_INSNS || 1347 split < p - MIPS64_REFILL_INSNS) 1348 ov = 1; 1349 1350 if (ov) { 1351 /* 1352 * Split two instructions before the end. One 1353 * for the branch and one for the instruction 1354 * in the delay slot. 1355 */ 1356 split = tlb_handler + MIPS64_REFILL_INSNS - 2; 1357 1358 /* 1359 * If the branch would fall in a delay slot, 1360 * we must back up an additional instruction 1361 * so that it is no longer in a delay slot. 1362 */ 1363 if (uasm_insn_has_bdelay(relocs, split - 1)) 1364 split--; 1365 } 1366 /* Copy first part of the handler. */ 1367 uasm_copy_handler(relocs, labels, tlb_handler, split, f); 1368 f += split - tlb_handler; 1369 1370 if (ov) { 1371 /* Insert branch. */ 1372 uasm_l_split(&l, final_handler); 1373 uasm_il_b(&f, &r, label_split); 1374 if (uasm_insn_has_bdelay(relocs, split)) 1375 uasm_i_nop(&f); 1376 else { 1377 uasm_copy_handler(relocs, labels, 1378 split, split + 1, f); 1379 uasm_move_labels(labels, f, f + 1, -1); 1380 f++; 1381 split++; 1382 } 1383 } 1384 1385 /* Copy the rest of the handler. */ 1386 uasm_copy_handler(relocs, labels, split, p, final_handler); 1387 final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + 1388 (p - split); 1389 } 1390 #endif /* CONFIG_64BIT */ 1391 1392 uasm_resolve_relocs(relocs, labels); 1393 pr_debug("Wrote TLB refill handler (%u instructions).\n", 1394 final_len); 1395 1396 memcpy((void *)ebase, final_handler, 0x100); 1397 1398 dump_handler((u32 *)ebase, 64); 1399 } 1400 1401 /* 1402 * 128 instructions for the fastpath handler is generous and should 1403 * never be exceeded. 1404 */ 1405 #define FASTPATH_SIZE 128 1406 1407 u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; 1408 u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; 1409 u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; 1410 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 1411 u32 tlbmiss_handler_setup_pgd[16] __cacheline_aligned; 1412 1413 static void __cpuinit build_r4000_setup_pgd(void) 1414 { 1415 const int a0 = 4; 1416 const int a1 = 5; 1417 u32 *p = tlbmiss_handler_setup_pgd; 1418 struct uasm_label *l = labels; 1419 struct uasm_reloc *r = relocs; 1420 1421 memset(tlbmiss_handler_setup_pgd, 0, sizeof(tlbmiss_handler_setup_pgd)); 1422 memset(labels, 0, sizeof(labels)); 1423 memset(relocs, 0, sizeof(relocs)); 1424 1425 pgd_reg = allocate_kscratch(); 1426 1427 if (pgd_reg == -1) { 1428 /* PGD << 11 in c0_Context */ 1429 /* 1430 * If it is a ckseg0 address, convert to a physical 1431 * address. Shifting right by 29 and adding 4 will 1432 * result in zero for these addresses. 1433 * 1434 */ 1435 UASM_i_SRA(&p, a1, a0, 29); 1436 UASM_i_ADDIU(&p, a1, a1, 4); 1437 uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); 1438 uasm_i_nop(&p); 1439 uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); 1440 uasm_l_tlbl_goaround1(&l, p); 1441 UASM_i_SLL(&p, a0, a0, 11); 1442 uasm_i_jr(&p, 31); 1443 UASM_i_MTC0(&p, a0, C0_CONTEXT); 1444 } else { 1445 /* PGD in c0_KScratch */ 1446 uasm_i_jr(&p, 31); 1447 UASM_i_MTC0(&p, a0, 31, pgd_reg); 1448 } 1449 if (p - tlbmiss_handler_setup_pgd > ARRAY_SIZE(tlbmiss_handler_setup_pgd)) 1450 panic("tlbmiss_handler_setup_pgd space exceeded"); 1451 uasm_resolve_relocs(relocs, labels); 1452 pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", 1453 (unsigned int)(p - tlbmiss_handler_setup_pgd)); 1454 1455 dump_handler(tlbmiss_handler_setup_pgd, 1456 ARRAY_SIZE(tlbmiss_handler_setup_pgd)); 1457 } 1458 #endif 1459 1460 static void __cpuinit 1461 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) 1462 { 1463 #ifdef CONFIG_SMP 1464 # ifdef CONFIG_64BIT_PHYS_ADDR 1465 if (cpu_has_64bits) 1466 uasm_i_lld(p, pte, 0, ptr); 1467 else 1468 # endif 1469 UASM_i_LL(p, pte, 0, ptr); 1470 #else 1471 # ifdef CONFIG_64BIT_PHYS_ADDR 1472 if (cpu_has_64bits) 1473 uasm_i_ld(p, pte, 0, ptr); 1474 else 1475 # endif 1476 UASM_i_LW(p, pte, 0, ptr); 1477 #endif 1478 } 1479 1480 static void __cpuinit 1481 iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, 1482 unsigned int mode) 1483 { 1484 #ifdef CONFIG_64BIT_PHYS_ADDR 1485 unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); 1486 #endif 1487 1488 uasm_i_ori(p, pte, pte, mode); 1489 #ifdef CONFIG_SMP 1490 # ifdef CONFIG_64BIT_PHYS_ADDR 1491 if (cpu_has_64bits) 1492 uasm_i_scd(p, pte, 0, ptr); 1493 else 1494 # endif 1495 UASM_i_SC(p, pte, 0, ptr); 1496 1497 if (r10000_llsc_war()) 1498 uasm_il_beqzl(p, r, pte, label_smp_pgtable_change); 1499 else 1500 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1501 1502 # ifdef CONFIG_64BIT_PHYS_ADDR 1503 if (!cpu_has_64bits) { 1504 /* no uasm_i_nop needed */ 1505 uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); 1506 uasm_i_ori(p, pte, pte, hwmode); 1507 uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); 1508 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1509 /* no uasm_i_nop needed */ 1510 uasm_i_lw(p, pte, 0, ptr); 1511 } else 1512 uasm_i_nop(p); 1513 # else 1514 uasm_i_nop(p); 1515 # endif 1516 #else 1517 # ifdef CONFIG_64BIT_PHYS_ADDR 1518 if (cpu_has_64bits) 1519 uasm_i_sd(p, pte, 0, ptr); 1520 else 1521 # endif 1522 UASM_i_SW(p, pte, 0, ptr); 1523 1524 # ifdef CONFIG_64BIT_PHYS_ADDR 1525 if (!cpu_has_64bits) { 1526 uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); 1527 uasm_i_ori(p, pte, pte, hwmode); 1528 uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); 1529 uasm_i_lw(p, pte, 0, ptr); 1530 } 1531 # endif 1532 #endif 1533 } 1534 1535 /* 1536 * Check if PTE is present, if not then jump to LABEL. PTR points to 1537 * the page table where this PTE is located, PTE will be re-loaded 1538 * with it's original value. 1539 */ 1540 static void __cpuinit 1541 build_pte_present(u32 **p, struct uasm_reloc **r, 1542 int pte, int ptr, int scratch, enum label_id lid) 1543 { 1544 int t = scratch >= 0 ? scratch : pte; 1545 1546 if (kernel_uses_smartmips_rixi) { 1547 if (use_bbit_insns()) { 1548 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); 1549 uasm_i_nop(p); 1550 } else { 1551 uasm_i_andi(p, t, pte, _PAGE_PRESENT); 1552 uasm_il_beqz(p, r, t, lid); 1553 if (pte == t) 1554 /* You lose the SMP race :-(*/ 1555 iPTE_LW(p, pte, ptr); 1556 } 1557 } else { 1558 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); 1559 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); 1560 uasm_il_bnez(p, r, t, lid); 1561 if (pte == t) 1562 /* You lose the SMP race :-(*/ 1563 iPTE_LW(p, pte, ptr); 1564 } 1565 } 1566 1567 /* Make PTE valid, store result in PTR. */ 1568 static void __cpuinit 1569 build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, 1570 unsigned int ptr) 1571 { 1572 unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; 1573 1574 iPTE_SW(p, r, pte, ptr, mode); 1575 } 1576 1577 /* 1578 * Check if PTE can be written to, if not branch to LABEL. Regardless 1579 * restore PTE with value from PTR when done. 1580 */ 1581 static void __cpuinit 1582 build_pte_writable(u32 **p, struct uasm_reloc **r, 1583 unsigned int pte, unsigned int ptr, int scratch, 1584 enum label_id lid) 1585 { 1586 int t = scratch >= 0 ? scratch : pte; 1587 1588 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); 1589 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); 1590 uasm_il_bnez(p, r, t, lid); 1591 if (pte == t) 1592 /* You lose the SMP race :-(*/ 1593 iPTE_LW(p, pte, ptr); 1594 else 1595 uasm_i_nop(p); 1596 } 1597 1598 /* Make PTE writable, update software status bits as well, then store 1599 * at PTR. 1600 */ 1601 static void __cpuinit 1602 build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, 1603 unsigned int ptr) 1604 { 1605 unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID 1606 | _PAGE_DIRTY); 1607 1608 iPTE_SW(p, r, pte, ptr, mode); 1609 } 1610 1611 /* 1612 * Check if PTE can be modified, if not branch to LABEL. Regardless 1613 * restore PTE with value from PTR when done. 1614 */ 1615 static void __cpuinit 1616 build_pte_modifiable(u32 **p, struct uasm_reloc **r, 1617 unsigned int pte, unsigned int ptr, int scratch, 1618 enum label_id lid) 1619 { 1620 if (use_bbit_insns()) { 1621 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); 1622 uasm_i_nop(p); 1623 } else { 1624 int t = scratch >= 0 ? scratch : pte; 1625 uasm_i_andi(p, t, pte, _PAGE_WRITE); 1626 uasm_il_beqz(p, r, t, lid); 1627 if (pte == t) 1628 /* You lose the SMP race :-(*/ 1629 iPTE_LW(p, pte, ptr); 1630 } 1631 } 1632 1633 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 1634 1635 1636 /* 1637 * R3000 style TLB load/store/modify handlers. 1638 */ 1639 1640 /* 1641 * This places the pte into ENTRYLO0 and writes it with tlbwi. 1642 * Then it returns. 1643 */ 1644 static void __cpuinit 1645 build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) 1646 { 1647 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1648 uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ 1649 uasm_i_tlbwi(p); 1650 uasm_i_jr(p, tmp); 1651 uasm_i_rfe(p); /* branch delay */ 1652 } 1653 1654 /* 1655 * This places the pte into ENTRYLO0 and writes it with tlbwi 1656 * or tlbwr as appropriate. This is because the index register 1657 * may have the probe fail bit set as a result of a trap on a 1658 * kseg2 access, i.e. without refill. Then it returns. 1659 */ 1660 static void __cpuinit 1661 build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, 1662 struct uasm_reloc **r, unsigned int pte, 1663 unsigned int tmp) 1664 { 1665 uasm_i_mfc0(p, tmp, C0_INDEX); 1666 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1667 uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ 1668 uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */ 1669 uasm_i_tlbwi(p); /* cp0 delay */ 1670 uasm_i_jr(p, tmp); 1671 uasm_i_rfe(p); /* branch delay */ 1672 uasm_l_r3000_write_probe_fail(l, *p); 1673 uasm_i_tlbwr(p); /* cp0 delay */ 1674 uasm_i_jr(p, tmp); 1675 uasm_i_rfe(p); /* branch delay */ 1676 } 1677 1678 static void __cpuinit 1679 build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, 1680 unsigned int ptr) 1681 { 1682 long pgdc = (long)pgd_current; 1683 1684 uasm_i_mfc0(p, pte, C0_BADVADDR); 1685 uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */ 1686 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 1687 uasm_i_srl(p, pte, pte, 22); /* load delay */ 1688 uasm_i_sll(p, pte, pte, 2); 1689 uasm_i_addu(p, ptr, ptr, pte); 1690 uasm_i_mfc0(p, pte, C0_CONTEXT); 1691 uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */ 1692 uasm_i_andi(p, pte, pte, 0xffc); /* load delay */ 1693 uasm_i_addu(p, ptr, ptr, pte); 1694 uasm_i_lw(p, pte, 0, ptr); 1695 uasm_i_tlbp(p); /* load delay */ 1696 } 1697 1698 static void __cpuinit build_r3000_tlb_load_handler(void) 1699 { 1700 u32 *p = handle_tlbl; 1701 struct uasm_label *l = labels; 1702 struct uasm_reloc *r = relocs; 1703 1704 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1705 memset(labels, 0, sizeof(labels)); 1706 memset(relocs, 0, sizeof(relocs)); 1707 1708 build_r3000_tlbchange_handler_head(&p, K0, K1); 1709 build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); 1710 uasm_i_nop(&p); /* load delay */ 1711 build_make_valid(&p, &r, K0, K1); 1712 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1713 1714 uasm_l_nopage_tlbl(&l, p); 1715 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1716 uasm_i_nop(&p); 1717 1718 if ((p - handle_tlbl) > FASTPATH_SIZE) 1719 panic("TLB load handler fastpath space exceeded"); 1720 1721 uasm_resolve_relocs(relocs, labels); 1722 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 1723 (unsigned int)(p - handle_tlbl)); 1724 1725 dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); 1726 } 1727 1728 static void __cpuinit build_r3000_tlb_store_handler(void) 1729 { 1730 u32 *p = handle_tlbs; 1731 struct uasm_label *l = labels; 1732 struct uasm_reloc *r = relocs; 1733 1734 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 1735 memset(labels, 0, sizeof(labels)); 1736 memset(relocs, 0, sizeof(relocs)); 1737 1738 build_r3000_tlbchange_handler_head(&p, K0, K1); 1739 build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); 1740 uasm_i_nop(&p); /* load delay */ 1741 build_make_write(&p, &r, K0, K1); 1742 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1743 1744 uasm_l_nopage_tlbs(&l, p); 1745 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1746 uasm_i_nop(&p); 1747 1748 if ((p - handle_tlbs) > FASTPATH_SIZE) 1749 panic("TLB store handler fastpath space exceeded"); 1750 1751 uasm_resolve_relocs(relocs, labels); 1752 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 1753 (unsigned int)(p - handle_tlbs)); 1754 1755 dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); 1756 } 1757 1758 static void __cpuinit build_r3000_tlb_modify_handler(void) 1759 { 1760 u32 *p = handle_tlbm; 1761 struct uasm_label *l = labels; 1762 struct uasm_reloc *r = relocs; 1763 1764 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 1765 memset(labels, 0, sizeof(labels)); 1766 memset(relocs, 0, sizeof(relocs)); 1767 1768 build_r3000_tlbchange_handler_head(&p, K0, K1); 1769 build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); 1770 uasm_i_nop(&p); /* load delay */ 1771 build_make_write(&p, &r, K0, K1); 1772 build_r3000_pte_reload_tlbwi(&p, K0, K1); 1773 1774 uasm_l_nopage_tlbm(&l, p); 1775 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1776 uasm_i_nop(&p); 1777 1778 if ((p - handle_tlbm) > FASTPATH_SIZE) 1779 panic("TLB modify handler fastpath space exceeded"); 1780 1781 uasm_resolve_relocs(relocs, labels); 1782 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 1783 (unsigned int)(p - handle_tlbm)); 1784 1785 dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); 1786 } 1787 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 1788 1789 /* 1790 * R4000 style TLB load/store/modify handlers. 1791 */ 1792 static struct work_registers __cpuinit 1793 build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, 1794 struct uasm_reloc **r) 1795 { 1796 struct work_registers wr = build_get_work_registers(p); 1797 1798 #ifdef CONFIG_64BIT 1799 build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ 1800 #else 1801 build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ 1802 #endif 1803 1804 #ifdef CONFIG_HUGETLB_PAGE 1805 /* 1806 * For huge tlb entries, pmd doesn't contain an address but 1807 * instead contains the tlb pte. Check the PAGE_HUGE bit and 1808 * see if we need to jump to huge tlb processing. 1809 */ 1810 build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); 1811 #endif 1812 1813 UASM_i_MFC0(p, wr.r1, C0_BADVADDR); 1814 UASM_i_LW(p, wr.r2, 0, wr.r2); 1815 UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); 1816 uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); 1817 UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); 1818 1819 #ifdef CONFIG_SMP 1820 uasm_l_smp_pgtable_change(l, *p); 1821 #endif 1822 iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ 1823 if (!m4kc_tlbp_war()) 1824 build_tlb_probe_entry(p); 1825 return wr; 1826 } 1827 1828 static void __cpuinit 1829 build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, 1830 struct uasm_reloc **r, unsigned int tmp, 1831 unsigned int ptr) 1832 { 1833 uasm_i_ori(p, ptr, ptr, sizeof(pte_t)); 1834 uasm_i_xori(p, ptr, ptr, sizeof(pte_t)); 1835 build_update_entries(p, tmp, ptr); 1836 build_tlb_write_entry(p, l, r, tlb_indexed); 1837 uasm_l_leave(l, *p); 1838 build_restore_work_registers(p); 1839 uasm_i_eret(p); /* return from trap */ 1840 1841 #ifdef CONFIG_64BIT 1842 build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill); 1843 #endif 1844 } 1845 1846 static void __cpuinit build_r4000_tlb_load_handler(void) 1847 { 1848 u32 *p = handle_tlbl; 1849 struct uasm_label *l = labels; 1850 struct uasm_reloc *r = relocs; 1851 struct work_registers wr; 1852 1853 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1854 memset(labels, 0, sizeof(labels)); 1855 memset(relocs, 0, sizeof(relocs)); 1856 1857 if (bcm1250_m3_war()) { 1858 unsigned int segbits = 44; 1859 1860 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1861 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1862 uasm_i_xor(&p, K0, K0, K1); 1863 uasm_i_dsrl_safe(&p, K1, K0, 62); 1864 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1865 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1866 uasm_i_or(&p, K0, K0, K1); 1867 uasm_il_bnez(&p, &r, K0, label_leave); 1868 /* No need for uasm_i_nop */ 1869 } 1870 1871 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 1872 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1873 if (m4kc_tlbp_war()) 1874 build_tlb_probe_entry(&p); 1875 1876 if (kernel_uses_smartmips_rixi) { 1877 /* 1878 * If the page is not _PAGE_VALID, RI or XI could not 1879 * have triggered it. Skip the expensive test.. 1880 */ 1881 if (use_bbit_insns()) { 1882 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1883 label_tlbl_goaround1); 1884 } else { 1885 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1886 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); 1887 } 1888 uasm_i_nop(&p); 1889 1890 uasm_i_tlbr(&p); 1891 /* Examine entrylo 0 or 1 based on ptr. */ 1892 if (use_bbit_insns()) { 1893 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1894 } else { 1895 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1896 uasm_i_beqz(&p, wr.r3, 8); 1897 } 1898 /* load it in the delay slot*/ 1899 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 1900 /* load it if ptr is odd */ 1901 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 1902 /* 1903 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 1904 * XI must have triggered it. 1905 */ 1906 if (use_bbit_insns()) { 1907 uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); 1908 uasm_i_nop(&p); 1909 uasm_l_tlbl_goaround1(&l, p); 1910 } else { 1911 uasm_i_andi(&p, wr.r3, wr.r3, 2); 1912 uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); 1913 uasm_i_nop(&p); 1914 } 1915 uasm_l_tlbl_goaround1(&l, p); 1916 } 1917 build_make_valid(&p, &r, wr.r1, wr.r2); 1918 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 1919 1920 #ifdef CONFIG_HUGETLB_PAGE 1921 /* 1922 * This is the entry point when build_r4000_tlbchange_handler_head 1923 * spots a huge page. 1924 */ 1925 uasm_l_tlb_huge_update(&l, p); 1926 iPTE_LW(&p, wr.r1, wr.r2); 1927 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1928 build_tlb_probe_entry(&p); 1929 1930 if (kernel_uses_smartmips_rixi) { 1931 /* 1932 * If the page is not _PAGE_VALID, RI or XI could not 1933 * have triggered it. Skip the expensive test.. 1934 */ 1935 if (use_bbit_insns()) { 1936 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1937 label_tlbl_goaround2); 1938 } else { 1939 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1940 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 1941 } 1942 uasm_i_nop(&p); 1943 1944 uasm_i_tlbr(&p); 1945 /* Examine entrylo 0 or 1 based on ptr. */ 1946 if (use_bbit_insns()) { 1947 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1948 } else { 1949 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1950 uasm_i_beqz(&p, wr.r3, 8); 1951 } 1952 /* load it in the delay slot*/ 1953 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 1954 /* load it if ptr is odd */ 1955 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 1956 /* 1957 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 1958 * XI must have triggered it. 1959 */ 1960 if (use_bbit_insns()) { 1961 uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); 1962 } else { 1963 uasm_i_andi(&p, wr.r3, wr.r3, 2); 1964 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 1965 } 1966 if (PM_DEFAULT_MASK == 0) 1967 uasm_i_nop(&p); 1968 /* 1969 * We clobbered C0_PAGEMASK, restore it. On the other branch 1970 * it is restored in build_huge_tlb_write_entry. 1971 */ 1972 build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0); 1973 1974 uasm_l_tlbl_goaround2(&l, p); 1975 } 1976 uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); 1977 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 1978 #endif 1979 1980 uasm_l_nopage_tlbl(&l, p); 1981 build_restore_work_registers(&p); 1982 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1983 uasm_i_nop(&p); 1984 1985 if ((p - handle_tlbl) > FASTPATH_SIZE) 1986 panic("TLB load handler fastpath space exceeded"); 1987 1988 uasm_resolve_relocs(relocs, labels); 1989 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 1990 (unsigned int)(p - handle_tlbl)); 1991 1992 dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); 1993 } 1994 1995 static void __cpuinit build_r4000_tlb_store_handler(void) 1996 { 1997 u32 *p = handle_tlbs; 1998 struct uasm_label *l = labels; 1999 struct uasm_reloc *r = relocs; 2000 struct work_registers wr; 2001 2002 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 2003 memset(labels, 0, sizeof(labels)); 2004 memset(relocs, 0, sizeof(relocs)); 2005 2006 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2007 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2008 if (m4kc_tlbp_war()) 2009 build_tlb_probe_entry(&p); 2010 build_make_write(&p, &r, wr.r1, wr.r2); 2011 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2012 2013 #ifdef CONFIG_HUGETLB_PAGE 2014 /* 2015 * This is the entry point when 2016 * build_r4000_tlbchange_handler_head spots a huge page. 2017 */ 2018 uasm_l_tlb_huge_update(&l, p); 2019 iPTE_LW(&p, wr.r1, wr.r2); 2020 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2021 build_tlb_probe_entry(&p); 2022 uasm_i_ori(&p, wr.r1, wr.r1, 2023 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2024 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2025 #endif 2026 2027 uasm_l_nopage_tlbs(&l, p); 2028 build_restore_work_registers(&p); 2029 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2030 uasm_i_nop(&p); 2031 2032 if ((p - handle_tlbs) > FASTPATH_SIZE) 2033 panic("TLB store handler fastpath space exceeded"); 2034 2035 uasm_resolve_relocs(relocs, labels); 2036 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 2037 (unsigned int)(p - handle_tlbs)); 2038 2039 dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); 2040 } 2041 2042 static void __cpuinit build_r4000_tlb_modify_handler(void) 2043 { 2044 u32 *p = handle_tlbm; 2045 struct uasm_label *l = labels; 2046 struct uasm_reloc *r = relocs; 2047 struct work_registers wr; 2048 2049 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 2050 memset(labels, 0, sizeof(labels)); 2051 memset(relocs, 0, sizeof(relocs)); 2052 2053 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2054 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2055 if (m4kc_tlbp_war()) 2056 build_tlb_probe_entry(&p); 2057 /* Present and writable bits set, set accessed and dirty bits. */ 2058 build_make_write(&p, &r, wr.r1, wr.r2); 2059 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2060 2061 #ifdef CONFIG_HUGETLB_PAGE 2062 /* 2063 * This is the entry point when 2064 * build_r4000_tlbchange_handler_head spots a huge page. 2065 */ 2066 uasm_l_tlb_huge_update(&l, p); 2067 iPTE_LW(&p, wr.r1, wr.r2); 2068 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2069 build_tlb_probe_entry(&p); 2070 uasm_i_ori(&p, wr.r1, wr.r1, 2071 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2072 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2073 #endif 2074 2075 uasm_l_nopage_tlbm(&l, p); 2076 build_restore_work_registers(&p); 2077 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2078 uasm_i_nop(&p); 2079 2080 if ((p - handle_tlbm) > FASTPATH_SIZE) 2081 panic("TLB modify handler fastpath space exceeded"); 2082 2083 uasm_resolve_relocs(relocs, labels); 2084 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 2085 (unsigned int)(p - handle_tlbm)); 2086 2087 dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); 2088 } 2089 2090 void __cpuinit build_tlb_refill_handler(void) 2091 { 2092 /* 2093 * The refill handler is generated per-CPU, multi-node systems 2094 * may have local storage for it. The other handlers are only 2095 * needed once. 2096 */ 2097 static int run_once = 0; 2098 2099 #ifdef CONFIG_64BIT 2100 check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 2101 #endif 2102 2103 switch (current_cpu_type()) { 2104 case CPU_R2000: 2105 case CPU_R3000: 2106 case CPU_R3000A: 2107 case CPU_R3081E: 2108 case CPU_TX3912: 2109 case CPU_TX3922: 2110 case CPU_TX3927: 2111 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 2112 build_r3000_tlb_refill_handler(); 2113 if (!run_once) { 2114 build_r3000_tlb_load_handler(); 2115 build_r3000_tlb_store_handler(); 2116 build_r3000_tlb_modify_handler(); 2117 run_once++; 2118 } 2119 #else 2120 panic("No R3000 TLB refill handler"); 2121 #endif 2122 break; 2123 2124 case CPU_R6000: 2125 case CPU_R6000A: 2126 panic("No R6000 TLB refill handler yet"); 2127 break; 2128 2129 case CPU_R8000: 2130 panic("No R8000 TLB refill handler yet"); 2131 break; 2132 2133 default: 2134 if (!run_once) { 2135 scratch_reg = allocate_kscratch(); 2136 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2137 build_r4000_setup_pgd(); 2138 #endif 2139 build_r4000_tlb_load_handler(); 2140 build_r4000_tlb_store_handler(); 2141 build_r4000_tlb_modify_handler(); 2142 run_once++; 2143 } 2144 build_r4000_tlb_refill_handler(); 2145 } 2146 } 2147 2148 void __cpuinit flush_tlb_handlers(void) 2149 { 2150 local_flush_icache_range((unsigned long)handle_tlbl, 2151 (unsigned long)handle_tlbl + sizeof(handle_tlbl)); 2152 local_flush_icache_range((unsigned long)handle_tlbs, 2153 (unsigned long)handle_tlbs + sizeof(handle_tlbs)); 2154 local_flush_icache_range((unsigned long)handle_tlbm, 2155 (unsigned long)handle_tlbm + sizeof(handle_tlbm)); 2156 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2157 local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, 2158 (unsigned long)tlbmiss_handler_setup_pgd + sizeof(handle_tlbm)); 2159 #endif 2160 } 2161