1/* 2 * linux/arch/arm/boot/compressed/head.S 3 * 4 * Copyright (C) 1996-2002 Russell King 5 * Copyright (C) 2004 Hyok S. Choi (MPU support) 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .arch armv7-a 15/* 16 * Debugging stuff 17 * 18 * Note that these macros must not contain any code which is not 19 * 100% relocatable. Any attempt to do so will result in a crash. 20 * Please select one of the following when turning on debugging. 21 */ 22#ifdef DEBUG 23 24#if defined(CONFIG_DEBUG_ICEDCC) 25 26#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) 27 .macro loadsp, rb, tmp 28 .endm 29 .macro writeb, ch, rb 30 mcr p14, 0, \ch, c0, c5, 0 31 .endm 32#elif defined(CONFIG_CPU_XSCALE) 33 .macro loadsp, rb, tmp 34 .endm 35 .macro writeb, ch, rb 36 mcr p14, 0, \ch, c8, c0, 0 37 .endm 38#else 39 .macro loadsp, rb, tmp 40 .endm 41 .macro writeb, ch, rb 42 mcr p14, 0, \ch, c1, c0, 0 43 .endm 44#endif 45 46#else 47 48#include CONFIG_DEBUG_LL_INCLUDE 49 50 .macro writeb, ch, rb 51 senduart \ch, \rb 52 .endm 53 54#if defined(CONFIG_ARCH_SA1100) 55 .macro loadsp, rb, tmp 56 mov \rb, #0x80000000 @ physical base address 57#ifdef CONFIG_DEBUG_LL_SER3 58 add \rb, \rb, #0x00050000 @ Ser3 59#else 60 add \rb, \rb, #0x00010000 @ Ser1 61#endif 62 .endm 63#elif defined(CONFIG_ARCH_S3C24XX) 64 .macro loadsp, rb, tmp 65 mov \rb, #0x50000000 66 add \rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT 67 .endm 68#else 69 .macro loadsp, rb, tmp 70 addruart \rb, \tmp 71 .endm 72#endif 73#endif 74#endif 75 76 .macro kputc,val 77 mov r0, \val 78 bl putc 79 .endm 80 81 .macro kphex,val,len 82 mov r0, \val 83 mov r1, #\len 84 bl phex 85 .endm 86 87 .macro debug_reloc_start 88#ifdef DEBUG 89 kputc #'\n' 90 kphex r6, 8 /* processor id */ 91 kputc #':' 92 kphex r7, 8 /* architecture id */ 93#ifdef CONFIG_CPU_CP15 94 kputc #':' 95 mrc p15, 0, r0, c1, c0 96 kphex r0, 8 /* control reg */ 97#endif 98 kputc #'\n' 99 kphex r5, 8 /* decompressed kernel start */ 100 kputc #'-' 101 kphex r9, 8 /* decompressed kernel end */ 102 kputc #'>' 103 kphex r4, 8 /* kernel execution address */ 104 kputc #'\n' 105#endif 106 .endm 107 108 .macro debug_reloc_end 109#ifdef DEBUG 110 kphex r5, 8 /* end of kernel */ 111 kputc #'\n' 112 mov r0, r4 113 bl memdump /* dump 256 bytes at start of kernel */ 114#endif 115 .endm 116 117 .section ".start", #alloc, #execinstr 118/* 119 * sort out different calling conventions 120 */ 121 .align 122 .arm @ Always enter in ARM state 123start: 124 .type start,#function 125 .rept 7 126 mov r0, r0 127 .endr 128 ARM( mov r0, r0 ) 129 ARM( b 1f ) 130 THUMB( adr r12, BSYM(1f) ) 131 THUMB( bx r12 ) 132 133 .word 0x016f2818 @ Magic numbers to help the loader 134 .word start @ absolute load/run zImage address 135 .word _edata @ zImage end address 136 THUMB( .thumb ) 1371: 138 ARM_BE8( setend be ) @ go BE8 if compiled for BE8 139 mrs r9, cpsr 140#ifdef CONFIG_ARM_VIRT_EXT 141 bl __hyp_stub_install @ get into SVC mode, reversibly 142#endif 143 mov r7, r1 @ save architecture ID 144 mov r8, r2 @ save atags pointer 145 146 /* 147 * Booting from Angel - need to enter SVC mode and disable 148 * FIQs/IRQs (numeric definitions from angel arm.h source). 149 * We only do this if we were in user mode on entry. 150 */ 151 mrs r2, cpsr @ get current mode 152 tst r2, #3 @ not user? 153 bne not_angel 154 mov r0, #0x17 @ angel_SWIreason_EnterSVC 155 ARM( swi 0x123456 ) @ angel_SWI_ARM 156 THUMB( svc 0xab ) @ angel_SWI_THUMB 157not_angel: 158 safe_svcmode_maskall r0 159 msr spsr_cxsf, r9 @ Save the CPU boot mode in 160 @ SPSR 161 /* 162 * Note that some cache flushing and other stuff may 163 * be needed here - is there an Angel SWI call for this? 164 */ 165 166 /* 167 * some architecture specific code can be inserted 168 * by the linker here, but it should preserve r7, r8, and r9. 169 */ 170 171 .text 172 173#ifdef CONFIG_AUTO_ZRELADDR 174 @ determine final kernel image address 175 mov r4, pc 176 and r4, r4, #0xf8000000 177 add r4, r4, #TEXT_OFFSET 178#else 179 ldr r4, =zreladdr 180#endif 181 182 /* 183 * Set up a page table only if it won't overwrite ourself. 184 * That means r4 < pc && r4 - 16k page directory > &_end. 185 * Given that r4 > &_end is most unfrequent, we add a rough 186 * additional 1MB of room for a possible appended DTB. 187 */ 188 mov r0, pc 189 cmp r0, r4 190 ldrcc r0, LC0+32 191 addcc r0, r0, pc 192 cmpcc r4, r0 193 orrcc r4, r4, #1 @ remember we skipped cache_on 194 blcs cache_on 195 196restart: adr r0, LC0 197 ldmia r0, {r1, r2, r3, r6, r10, r11, r12} 198 ldr sp, [r0, #28] 199 200 /* 201 * We might be running at a different address. We need 202 * to fix up various pointers. 203 */ 204 sub r0, r0, r1 @ calculate the delta offset 205 add r6, r6, r0 @ _edata 206 add r10, r10, r0 @ inflated kernel size location 207 208 /* 209 * The kernel build system appends the size of the 210 * decompressed kernel at the end of the compressed data 211 * in little-endian form. 212 */ 213 ldrb r9, [r10, #0] 214 ldrb lr, [r10, #1] 215 orr r9, r9, lr, lsl #8 216 ldrb lr, [r10, #2] 217 ldrb r10, [r10, #3] 218 orr r9, r9, lr, lsl #16 219 orr r9, r9, r10, lsl #24 220 221#ifndef CONFIG_ZBOOT_ROM 222 /* malloc space is above the relocated stack (64k max) */ 223 add sp, sp, r0 224 add r10, sp, #0x10000 225#else 226 /* 227 * With ZBOOT_ROM the bss/stack is non relocatable, 228 * but someone could still run this code from RAM, 229 * in which case our reference is _edata. 230 */ 231 mov r10, r6 232#endif 233 234 mov r5, #0 @ init dtb size to 0 235#ifdef CONFIG_ARM_APPENDED_DTB 236/* 237 * r0 = delta 238 * r2 = BSS start 239 * r3 = BSS end 240 * r4 = final kernel address (possibly with LSB set) 241 * r5 = appended dtb size (still unknown) 242 * r6 = _edata 243 * r7 = architecture ID 244 * r8 = atags/device tree pointer 245 * r9 = size of decompressed image 246 * r10 = end of this image, including bss/stack/malloc space if non XIP 247 * r11 = GOT start 248 * r12 = GOT end 249 * sp = stack pointer 250 * 251 * if there are device trees (dtb) appended to zImage, advance r10 so that the 252 * dtb data will get relocated along with the kernel if necessary. 253 */ 254 255 ldr lr, [r6, #0] 256#ifndef __ARMEB__ 257 ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian 258#else 259 ldr r1, =0xd00dfeed 260#endif 261 cmp lr, r1 262 bne dtb_check_done @ not found 263 264#ifdef CONFIG_ARM_ATAG_DTB_COMPAT 265 /* 266 * OK... Let's do some funky business here. 267 * If we do have a DTB appended to zImage, and we do have 268 * an ATAG list around, we want the later to be translated 269 * and folded into the former here. To be on the safe side, 270 * let's temporarily move the stack away into the malloc 271 * area. No GOT fixup has occurred yet, but none of the 272 * code we're about to call uses any global variable. 273 */ 274 add sp, sp, #0x10000 275 stmfd sp!, {r0-r3, ip, lr} 276 mov r0, r8 277 mov r1, r6 278 sub r2, sp, r6 279 bl atags_to_fdt 280 281 /* 282 * If returned value is 1, there is no ATAG at the location 283 * pointed by r8. Try the typical 0x100 offset from start 284 * of RAM and hope for the best. 285 */ 286 cmp r0, #1 287 sub r0, r4, #TEXT_OFFSET 288 bic r0, r0, #1 289 add r0, r0, #0x100 290 mov r1, r6 291 sub r2, sp, r6 292 bleq atags_to_fdt 293 294 ldmfd sp!, {r0-r3, ip, lr} 295 sub sp, sp, #0x10000 296#endif 297 298 mov r8, r6 @ use the appended device tree 299 300 /* 301 * Make sure that the DTB doesn't end up in the final 302 * kernel's .bss area. To do so, we adjust the decompressed 303 * kernel size to compensate if that .bss size is larger 304 * than the relocated code. 305 */ 306 ldr r5, =_kernel_bss_size 307 adr r1, wont_overwrite 308 sub r1, r6, r1 309 subs r1, r5, r1 310 addhi r9, r9, r1 311 312 /* Get the dtb's size */ 313 ldr r5, [r6, #4] 314#ifndef __ARMEB__ 315 /* convert r5 (dtb size) to little endian */ 316 eor r1, r5, r5, ror #16 317 bic r1, r1, #0x00ff0000 318 mov r5, r5, ror #8 319 eor r5, r5, r1, lsr #8 320#endif 321 322 /* preserve 64-bit alignment */ 323 add r5, r5, #7 324 bic r5, r5, #7 325 326 /* relocate some pointers past the appended dtb */ 327 add r6, r6, r5 328 add r10, r10, r5 329 add sp, sp, r5 330dtb_check_done: 331#endif 332 333/* 334 * Check to see if we will overwrite ourselves. 335 * r4 = final kernel address (possibly with LSB set) 336 * r9 = size of decompressed image 337 * r10 = end of this image, including bss/stack/malloc space if non XIP 338 * We basically want: 339 * r4 - 16k page directory >= r10 -> OK 340 * r4 + image length <= address of wont_overwrite -> OK 341 * Note: the possible LSB in r4 is harmless here. 342 */ 343 add r10, r10, #16384 344 cmp r4, r10 345 bhs wont_overwrite 346 add r10, r4, r9 347 adr r9, wont_overwrite 348 cmp r10, r9 349 bls wont_overwrite 350 351/* 352 * Relocate ourselves past the end of the decompressed kernel. 353 * r6 = _edata 354 * r10 = end of the decompressed kernel 355 * Because we always copy ahead, we need to do it from the end and go 356 * backward in case the source and destination overlap. 357 */ 358 /* 359 * Bump to the next 256-byte boundary with the size of 360 * the relocation code added. This avoids overwriting 361 * ourself when the offset is small. 362 */ 363 add r10, r10, #((reloc_code_end - restart + 256) & ~255) 364 bic r10, r10, #255 365 366 /* Get start of code we want to copy and align it down. */ 367 adr r5, restart 368 bic r5, r5, #31 369 370/* Relocate the hyp vector base if necessary */ 371#ifdef CONFIG_ARM_VIRT_EXT 372 mrs r0, spsr 373 and r0, r0, #MODE_MASK 374 cmp r0, #HYP_MODE 375 bne 1f 376 377 bl __hyp_get_vectors 378 sub r0, r0, r5 379 add r0, r0, r10 380 bl __hyp_set_vectors 3811: 382#endif 383 384 sub r9, r6, r5 @ size to copy 385 add r9, r9, #31 @ rounded up to a multiple 386 bic r9, r9, #31 @ ... of 32 bytes 387 add r6, r9, r5 388 add r9, r9, r10 389 3901: ldmdb r6!, {r0 - r3, r10 - r12, lr} 391 cmp r6, r5 392 stmdb r9!, {r0 - r3, r10 - r12, lr} 393 bhi 1b 394 395 /* Preserve offset to relocated code. */ 396 sub r6, r9, r6 397 398#ifndef CONFIG_ZBOOT_ROM 399 /* cache_clean_flush may use the stack, so relocate it */ 400 add sp, sp, r6 401#endif 402 403 tst r4, #1 404 bleq cache_clean_flush 405 406 adr r0, BSYM(restart) 407 add r0, r0, r6 408 mov pc, r0 409 410wont_overwrite: 411/* 412 * If delta is zero, we are running at the address we were linked at. 413 * r0 = delta 414 * r2 = BSS start 415 * r3 = BSS end 416 * r4 = kernel execution address (possibly with LSB set) 417 * r5 = appended dtb size (0 if not present) 418 * r7 = architecture ID 419 * r8 = atags pointer 420 * r11 = GOT start 421 * r12 = GOT end 422 * sp = stack pointer 423 */ 424 orrs r1, r0, r5 425 beq not_relocated 426 427 add r11, r11, r0 428 add r12, r12, r0 429 430#ifndef CONFIG_ZBOOT_ROM 431 /* 432 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, 433 * we need to fix up pointers into the BSS region. 434 * Note that the stack pointer has already been fixed up. 435 */ 436 add r2, r2, r0 437 add r3, r3, r0 438 439 /* 440 * Relocate all entries in the GOT table. 441 * Bump bss entries to _edata + dtb size 442 */ 4431: ldr r1, [r11, #0] @ relocate entries in the GOT 444 add r1, r1, r0 @ This fixes up C references 445 cmp r1, r2 @ if entry >= bss_start && 446 cmphs r3, r1 @ bss_end > entry 447 addhi r1, r1, r5 @ entry += dtb size 448 str r1, [r11], #4 @ next entry 449 cmp r11, r12 450 blo 1b 451 452 /* bump our bss pointers too */ 453 add r2, r2, r5 454 add r3, r3, r5 455 456#else 457 458 /* 459 * Relocate entries in the GOT table. We only relocate 460 * the entries that are outside the (relocated) BSS region. 461 */ 4621: ldr r1, [r11, #0] @ relocate entries in the GOT 463 cmp r1, r2 @ entry < bss_start || 464 cmphs r3, r1 @ _end < entry 465 addlo r1, r1, r0 @ table. This fixes up the 466 str r1, [r11], #4 @ C references. 467 cmp r11, r12 468 blo 1b 469#endif 470 471not_relocated: mov r0, #0 4721: str r0, [r2], #4 @ clear bss 473 str r0, [r2], #4 474 str r0, [r2], #4 475 str r0, [r2], #4 476 cmp r2, r3 477 blo 1b 478 479 /* 480 * Did we skip the cache setup earlier? 481 * That is indicated by the LSB in r4. 482 * Do it now if so. 483 */ 484 tst r4, #1 485 bic r4, r4, #1 486 blne cache_on 487 488/* 489 * The C runtime environment should now be setup sufficiently. 490 * Set up some pointers, and start decompressing. 491 * r4 = kernel execution address 492 * r7 = architecture ID 493 * r8 = atags pointer 494 */ 495 mov r0, r4 496 mov r1, sp @ malloc space above stack 497 add r2, sp, #0x10000 @ 64k max 498 mov r3, r7 499 bl decompress_kernel 500 bl cache_clean_flush 501 bl cache_off 502 mov r1, r7 @ restore architecture number 503 mov r2, r8 @ restore atags pointer 504 505#ifdef CONFIG_ARM_VIRT_EXT 506 mrs r0, spsr @ Get saved CPU boot mode 507 and r0, r0, #MODE_MASK 508 cmp r0, #HYP_MODE @ if not booted in HYP mode... 509 bne __enter_kernel @ boot kernel directly 510 511 adr r12, .L__hyp_reentry_vectors_offset 512 ldr r0, [r12] 513 add r0, r0, r12 514 515 bl __hyp_set_vectors 516 __HVC(0) @ otherwise bounce to hyp mode 517 518 b . @ should never be reached 519 520 .align 2 521.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . 522#else 523 b __enter_kernel 524#endif 525 526 .align 2 527 .type LC0, #object 528LC0: .word LC0 @ r1 529 .word __bss_start @ r2 530 .word _end @ r3 531 .word _edata @ r6 532 .word input_data_end - 4 @ r10 (inflated size location) 533 .word _got_start @ r11 534 .word _got_end @ ip 535 .word .L_user_stack_end @ sp 536 .word _end - restart + 16384 + 1024*1024 537 .size LC0, . - LC0 538 539#ifdef CONFIG_ARCH_RPC 540 .globl params 541params: ldr r0, =0x10000100 @ params_phys for RPC 542 mov pc, lr 543 .ltorg 544 .align 545#endif 546 547/* 548 * Turn on the cache. We need to setup some page tables so that we 549 * can have both the I and D caches on. 550 * 551 * We place the page tables 16k down from the kernel execution address, 552 * and we hope that nothing else is using it. If we're using it, we 553 * will go pop! 554 * 555 * On entry, 556 * r4 = kernel execution address 557 * r7 = architecture number 558 * r8 = atags pointer 559 * On exit, 560 * r0, r1, r2, r3, r9, r10, r12 corrupted 561 * This routine must preserve: 562 * r4, r7, r8 563 */ 564 .align 5 565cache_on: mov r3, #8 @ cache_on function 566 b call_cache_fn 567 568/* 569 * Initialize the highest priority protection region, PR7 570 * to cover all 32bit address and cacheable and bufferable. 571 */ 572__armv4_mpu_cache_on: 573 mov r0, #0x3f @ 4G, the whole 574 mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting 575 mcr p15, 0, r0, c6, c7, 1 576 577 mov r0, #0x80 @ PR7 578 mcr p15, 0, r0, c2, c0, 0 @ D-cache on 579 mcr p15, 0, r0, c2, c0, 1 @ I-cache on 580 mcr p15, 0, r0, c3, c0, 0 @ write-buffer on 581 582 mov r0, #0xc000 583 mcr p15, 0, r0, c5, c0, 1 @ I-access permission 584 mcr p15, 0, r0, c5, c0, 0 @ D-access permission 585 586 mov r0, #0 587 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 588 mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache 589 mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache 590 mrc p15, 0, r0, c1, c0, 0 @ read control reg 591 @ ...I .... ..D. WC.M 592 orr r0, r0, #0x002d @ .... .... ..1. 11.1 593 orr r0, r0, #0x1000 @ ...1 .... .... .... 594 595 mcr p15, 0, r0, c1, c0, 0 @ write control reg 596 597 mov r0, #0 598 mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache 599 mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache 600 mov pc, lr 601 602__armv3_mpu_cache_on: 603 mov r0, #0x3f @ 4G, the whole 604 mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting 605 606 mov r0, #0x80 @ PR7 607 mcr p15, 0, r0, c2, c0, 0 @ cache on 608 mcr p15, 0, r0, c3, c0, 0 @ write-buffer on 609 610 mov r0, #0xc000 611 mcr p15, 0, r0, c5, c0, 0 @ access permission 612 613 mov r0, #0 614 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 615 /* 616 * ?? ARMv3 MMU does not allow reading the control register, 617 * does this really work on ARMv3 MPU? 618 */ 619 mrc p15, 0, r0, c1, c0, 0 @ read control reg 620 @ .... .... .... WC.M 621 orr r0, r0, #0x000d @ .... .... .... 11.1 622 /* ?? this overwrites the value constructed above? */ 623 mov r0, #0 624 mcr p15, 0, r0, c1, c0, 0 @ write control reg 625 626 /* ?? invalidate for the second time? */ 627 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 628 mov pc, lr 629 630#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH 631#define CB_BITS 0x08 632#else 633#define CB_BITS 0x0c 634#endif 635 636__setup_mmu: sub r3, r4, #16384 @ Page directory size 637 bic r3, r3, #0xff @ Align the pointer 638 bic r3, r3, #0x3f00 639/* 640 * Initialise the page tables, turning on the cacheable and bufferable 641 * bits for the RAM area only. 642 */ 643 mov r0, r3 644 mov r9, r0, lsr #18 645 mov r9, r9, lsl #18 @ start of RAM 646 add r10, r9, #0x10000000 @ a reasonable RAM size 647 mov r1, #0x12 @ XN|U + section mapping 648 orr r1, r1, #3 << 10 @ AP=11 649 add r2, r3, #16384 6501: cmp r1, r9 @ if virt > start of RAM 651 cmphs r10, r1 @ && end of RAM > virt 652 bic r1, r1, #0x1c @ clear XN|U + C + B 653 orrlo r1, r1, #0x10 @ Set XN|U for non-RAM 654 orrhs r1, r1, r6 @ set RAM section settings 655 str r1, [r0], #4 @ 1:1 mapping 656 add r1, r1, #1048576 657 teq r0, r2 658 bne 1b 659/* 660 * If ever we are running from Flash, then we surely want the cache 661 * to be enabled also for our execution instance... We map 2MB of it 662 * so there is no map overlap problem for up to 1 MB compressed kernel. 663 * If the execution is in RAM then we would only be duplicating the above. 664 */ 665 orr r1, r6, #0x04 @ ensure B is set for this 666 orr r1, r1, #3 << 10 667 mov r2, pc 668 mov r2, r2, lsr #20 669 orr r1, r1, r2, lsl #20 670 add r0, r3, r2, lsl #2 671 str r1, [r0], #4 672 add r1, r1, #1048576 673 str r1, [r0] 674 mov pc, lr 675ENDPROC(__setup_mmu) 676 677@ Enable unaligned access on v6, to allow better code generation 678@ for the decompressor C code: 679__armv6_mmu_cache_on: 680 mrc p15, 0, r0, c1, c0, 0 @ read SCTLR 681 bic r0, r0, #2 @ A (no unaligned access fault) 682 orr r0, r0, #1 << 22 @ U (v6 unaligned access model) 683 mcr p15, 0, r0, c1, c0, 0 @ write SCTLR 684 b __armv4_mmu_cache_on 685 686__arm926ejs_mmu_cache_on: 687#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH 688 mov r0, #4 @ put dcache in WT mode 689 mcr p15, 7, r0, c15, c0, 0 690#endif 691 692__armv4_mmu_cache_on: 693 mov r12, lr 694#ifdef CONFIG_MMU 695 mov r6, #CB_BITS | 0x12 @ U 696 bl __setup_mmu 697 mov r0, #0 698 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 699 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 700 mrc p15, 0, r0, c1, c0, 0 @ read control reg 701 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement 702 orr r0, r0, #0x0030 703 ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables 704 bl __common_mmu_cache_on 705 mov r0, #0 706 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 707#endif 708 mov pc, r12 709 710__armv7_mmu_cache_on: 711 mov r12, lr 712#ifdef CONFIG_MMU 713 mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0 714 tst r11, #0xf @ VMSA 715 movne r6, #CB_BITS | 0x02 @ !XN 716 blne __setup_mmu 717 mov r0, #0 718 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 719 tst r11, #0xf @ VMSA 720 mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 721#endif 722 mrc p15, 0, r0, c1, c0, 0 @ read control reg 723 bic r0, r0, #1 << 28 @ clear SCTLR.TRE 724 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement 725 orr r0, r0, #0x003c @ write buffer 726 bic r0, r0, #2 @ A (no unaligned access fault) 727 orr r0, r0, #1 << 22 @ U (v6 unaligned access model) 728 @ (needed for ARM1176) 729#ifdef CONFIG_MMU 730 ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables 731 mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg 732 orrne r0, r0, #1 @ MMU enabled 733 movne r1, #0xfffffffd @ domain 0 = client 734 bic r6, r6, #1 << 31 @ 32-bit translation system 735 bic r6, r6, #3 << 0 @ use only ttbr0 736 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer 737 mcrne p15, 0, r1, c3, c0, 0 @ load domain access control 738 mcrne p15, 0, r6, c2, c0, 2 @ load ttb control 739#endif 740 mcr p15, 0, r0, c7, c5, 4 @ ISB 741 mcr p15, 0, r0, c1, c0, 0 @ load control register 742 mrc p15, 0, r0, c1, c0, 0 @ and read it back 743 mov r0, #0 744 mcr p15, 0, r0, c7, c5, 4 @ ISB 745 mov pc, r12 746 747__fa526_cache_on: 748 mov r12, lr 749 mov r6, #CB_BITS | 0x12 @ U 750 bl __setup_mmu 751 mov r0, #0 752 mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache 753 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 754 mcr p15, 0, r0, c8, c7, 0 @ flush UTLB 755 mrc p15, 0, r0, c1, c0, 0 @ read control reg 756 orr r0, r0, #0x1000 @ I-cache enable 757 bl __common_mmu_cache_on 758 mov r0, #0 759 mcr p15, 0, r0, c8, c7, 0 @ flush UTLB 760 mov pc, r12 761 762__common_mmu_cache_on: 763#ifndef CONFIG_THUMB2_KERNEL 764#ifndef DEBUG 765 orr r0, r0, #0x000d @ Write buffer, mmu 766#endif 767 mov r1, #-1 768 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer 769 mcr p15, 0, r1, c3, c0, 0 @ load domain access control 770 b 1f 771 .align 5 @ cache line aligned 7721: mcr p15, 0, r0, c1, c0, 0 @ load control register 773 mrc p15, 0, r0, c1, c0, 0 @ and read it back to 774 sub pc, lr, r0, lsr #32 @ properly flush pipeline 775#endif 776 777#define PROC_ENTRY_SIZE (4*5) 778 779/* 780 * Here follow the relocatable cache support functions for the 781 * various processors. This is a generic hook for locating an 782 * entry and jumping to an instruction at the specified offset 783 * from the start of the block. Please note this is all position 784 * independent code. 785 * 786 * r1 = corrupted 787 * r2 = corrupted 788 * r3 = block offset 789 * r9 = corrupted 790 * r12 = corrupted 791 */ 792 793call_cache_fn: adr r12, proc_types 794#ifdef CONFIG_CPU_CP15 795 mrc p15, 0, r9, c0, c0 @ get processor ID 796#else 797 ldr r9, =CONFIG_PROCESSOR_ID 798#endif 7991: ldr r1, [r12, #0] @ get value 800 ldr r2, [r12, #4] @ get mask 801 eor r1, r1, r9 @ (real ^ match) 802 tst r1, r2 @ & mask 803 ARM( addeq pc, r12, r3 ) @ call cache function 804 THUMB( addeq r12, r3 ) 805 THUMB( moveq pc, r12 ) @ call cache function 806 add r12, r12, #PROC_ENTRY_SIZE 807 b 1b 808 809/* 810 * Table for cache operations. This is basically: 811 * - CPU ID match 812 * - CPU ID mask 813 * - 'cache on' method instruction 814 * - 'cache off' method instruction 815 * - 'cache flush' method instruction 816 * 817 * We match an entry using: ((real_id ^ match) & mask) == 0 818 * 819 * Writethrough caches generally only need 'on' and 'off' 820 * methods. Writeback caches _must_ have the flush method 821 * defined. 822 */ 823 .align 2 824 .type proc_types,#object 825proc_types: 826 .word 0x41000000 @ old ARM ID 827 .word 0xff00f000 828 mov pc, lr 829 THUMB( nop ) 830 mov pc, lr 831 THUMB( nop ) 832 mov pc, lr 833 THUMB( nop ) 834 835 .word 0x41007000 @ ARM7/710 836 .word 0xfff8fe00 837 mov pc, lr 838 THUMB( nop ) 839 mov pc, lr 840 THUMB( nop ) 841 mov pc, lr 842 THUMB( nop ) 843 844 .word 0x41807200 @ ARM720T (writethrough) 845 .word 0xffffff00 846 W(b) __armv4_mmu_cache_on 847 W(b) __armv4_mmu_cache_off 848 mov pc, lr 849 THUMB( nop ) 850 851 .word 0x41007400 @ ARM74x 852 .word 0xff00ff00 853 W(b) __armv3_mpu_cache_on 854 W(b) __armv3_mpu_cache_off 855 W(b) __armv3_mpu_cache_flush 856 857 .word 0x41009400 @ ARM94x 858 .word 0xff00ff00 859 W(b) __armv4_mpu_cache_on 860 W(b) __armv4_mpu_cache_off 861 W(b) __armv4_mpu_cache_flush 862 863 .word 0x41069260 @ ARM926EJ-S (v5TEJ) 864 .word 0xff0ffff0 865 W(b) __arm926ejs_mmu_cache_on 866 W(b) __armv4_mmu_cache_off 867 W(b) __armv5tej_mmu_cache_flush 868 869 .word 0x00007000 @ ARM7 IDs 870 .word 0x0000f000 871 mov pc, lr 872 THUMB( nop ) 873 mov pc, lr 874 THUMB( nop ) 875 mov pc, lr 876 THUMB( nop ) 877 878 @ Everything from here on will be the new ID system. 879 880 .word 0x4401a100 @ sa110 / sa1100 881 .word 0xffffffe0 882 W(b) __armv4_mmu_cache_on 883 W(b) __armv4_mmu_cache_off 884 W(b) __armv4_mmu_cache_flush 885 886 .word 0x6901b110 @ sa1110 887 .word 0xfffffff0 888 W(b) __armv4_mmu_cache_on 889 W(b) __armv4_mmu_cache_off 890 W(b) __armv4_mmu_cache_flush 891 892 .word 0x56056900 893 .word 0xffffff00 @ PXA9xx 894 W(b) __armv4_mmu_cache_on 895 W(b) __armv4_mmu_cache_off 896 W(b) __armv4_mmu_cache_flush 897 898 .word 0x56158000 @ PXA168 899 .word 0xfffff000 900 W(b) __armv4_mmu_cache_on 901 W(b) __armv4_mmu_cache_off 902 W(b) __armv5tej_mmu_cache_flush 903 904 .word 0x56050000 @ Feroceon 905 .word 0xff0f0000 906 W(b) __armv4_mmu_cache_on 907 W(b) __armv4_mmu_cache_off 908 W(b) __armv5tej_mmu_cache_flush 909 910#ifdef CONFIG_CPU_FEROCEON_OLD_ID 911 /* this conflicts with the standard ARMv5TE entry */ 912 .long 0x41009260 @ Old Feroceon 913 .long 0xff00fff0 914 b __armv4_mmu_cache_on 915 b __armv4_mmu_cache_off 916 b __armv5tej_mmu_cache_flush 917#endif 918 919 .word 0x66015261 @ FA526 920 .word 0xff01fff1 921 W(b) __fa526_cache_on 922 W(b) __armv4_mmu_cache_off 923 W(b) __fa526_cache_flush 924 925 @ These match on the architecture ID 926 927 .word 0x00020000 @ ARMv4T 928 .word 0x000f0000 929 W(b) __armv4_mmu_cache_on 930 W(b) __armv4_mmu_cache_off 931 W(b) __armv4_mmu_cache_flush 932 933 .word 0x00050000 @ ARMv5TE 934 .word 0x000f0000 935 W(b) __armv4_mmu_cache_on 936 W(b) __armv4_mmu_cache_off 937 W(b) __armv4_mmu_cache_flush 938 939 .word 0x00060000 @ ARMv5TEJ 940 .word 0x000f0000 941 W(b) __armv4_mmu_cache_on 942 W(b) __armv4_mmu_cache_off 943 W(b) __armv5tej_mmu_cache_flush 944 945 .word 0x0007b000 @ ARMv6 946 .word 0x000ff000 947 W(b) __armv6_mmu_cache_on 948 W(b) __armv4_mmu_cache_off 949 W(b) __armv6_mmu_cache_flush 950 951 .word 0x000f0000 @ new CPU Id 952 .word 0x000f0000 953 W(b) __armv7_mmu_cache_on 954 W(b) __armv7_mmu_cache_off 955 W(b) __armv7_mmu_cache_flush 956 957 .word 0 @ unrecognised type 958 .word 0 959 mov pc, lr 960 THUMB( nop ) 961 mov pc, lr 962 THUMB( nop ) 963 mov pc, lr 964 THUMB( nop ) 965 966 .size proc_types, . - proc_types 967 968 /* 969 * If you get a "non-constant expression in ".if" statement" 970 * error from the assembler on this line, check that you have 971 * not accidentally written a "b" instruction where you should 972 * have written W(b). 973 */ 974 .if (. - proc_types) % PROC_ENTRY_SIZE != 0 975 .error "The size of one or more proc_types entries is wrong." 976 .endif 977 978/* 979 * Turn off the Cache and MMU. ARMv3 does not support 980 * reading the control register, but ARMv4 does. 981 * 982 * On exit, 983 * r0, r1, r2, r3, r9, r12 corrupted 984 * This routine must preserve: 985 * r4, r7, r8 986 */ 987 .align 5 988cache_off: mov r3, #12 @ cache_off function 989 b call_cache_fn 990 991__armv4_mpu_cache_off: 992 mrc p15, 0, r0, c1, c0 993 bic r0, r0, #0x000d 994 mcr p15, 0, r0, c1, c0 @ turn MPU and cache off 995 mov r0, #0 996 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 997 mcr p15, 0, r0, c7, c6, 0 @ flush D-Cache 998 mcr p15, 0, r0, c7, c5, 0 @ flush I-Cache 999 mov pc, lr 1000 1001__armv3_mpu_cache_off: 1002 mrc p15, 0, r0, c1, c0 1003 bic r0, r0, #0x000d 1004 mcr p15, 0, r0, c1, c0, 0 @ turn MPU and cache off 1005 mov r0, #0 1006 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 1007 mov pc, lr 1008 1009__armv4_mmu_cache_off: 1010#ifdef CONFIG_MMU 1011 mrc p15, 0, r0, c1, c0 1012 bic r0, r0, #0x000d 1013 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off 1014 mov r0, #0 1015 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 1016 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 1017#endif 1018 mov pc, lr 1019 1020__armv7_mmu_cache_off: 1021 mrc p15, 0, r0, c1, c0 1022#ifdef CONFIG_MMU 1023 bic r0, r0, #0x000d 1024#else 1025 bic r0, r0, #0x000c 1026#endif 1027 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off 1028 mov r12, lr 1029 bl __armv7_mmu_cache_flush 1030 mov r0, #0 1031#ifdef CONFIG_MMU 1032 mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB 1033#endif 1034 mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC 1035 mcr p15, 0, r0, c7, c10, 4 @ DSB 1036 mcr p15, 0, r0, c7, c5, 4 @ ISB 1037 mov pc, r12 1038 1039/* 1040 * Clean and flush the cache to maintain consistency. 1041 * 1042 * On exit, 1043 * r1, r2, r3, r9, r10, r11, r12 corrupted 1044 * This routine must preserve: 1045 * r4, r6, r7, r8 1046 */ 1047 .align 5 1048cache_clean_flush: 1049 mov r3, #16 1050 b call_cache_fn 1051 1052__armv4_mpu_cache_flush: 1053 mov r2, #1 1054 mov r3, #0 1055 mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache 1056 mov r1, #7 << 5 @ 8 segments 10571: orr r3, r1, #63 << 26 @ 64 entries 10582: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index 1059 subs r3, r3, #1 << 26 1060 bcs 2b @ entries 63 to 0 1061 subs r1, r1, #1 << 5 1062 bcs 1b @ segments 7 to 0 1063 1064 teq r2, #0 1065 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache 1066 mcr p15, 0, ip, c7, c10, 4 @ drain WB 1067 mov pc, lr 1068 1069__fa526_cache_flush: 1070 mov r1, #0 1071 mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache 1072 mcr p15, 0, r1, c7, c5, 0 @ flush I cache 1073 mcr p15, 0, r1, c7, c10, 4 @ drain WB 1074 mov pc, lr 1075 1076__armv6_mmu_cache_flush: 1077 mov r1, #0 1078 mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D 1079 mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB 1080 mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified 1081 mcr p15, 0, r1, c7, c10, 4 @ drain WB 1082 mov pc, lr 1083 1084__armv7_mmu_cache_flush: 1085 mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 1086 tst r10, #0xf << 16 @ hierarchical cache (ARMv7) 1087 mov r10, #0 1088 beq hierarchical 1089 mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D 1090 b iflush 1091hierarchical: 1092 mcr p15, 0, r10, c7, c10, 5 @ DMB 1093 stmfd sp!, {r0-r7, r9-r11} 1094 mrc p15, 1, r0, c0, c0, 1 @ read clidr 1095 ands r3, r0, #0x7000000 @ extract loc from clidr 1096 mov r3, r3, lsr #23 @ left align loc bit field 1097 beq finished @ if loc is 0, then no need to clean 1098 mov r10, #0 @ start clean at cache level 0 1099loop1: 1100 add r2, r10, r10, lsr #1 @ work out 3x current cache level 1101 mov r1, r0, lsr r2 @ extract cache type bits from clidr 1102 and r1, r1, #7 @ mask of the bits for current cache only 1103 cmp r1, #2 @ see what cache we have at this level 1104 blt skip @ skip if no cache, or just i-cache 1105 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 1106 mcr p15, 0, r10, c7, c5, 4 @ isb to sych the new cssr&csidr 1107 mrc p15, 1, r1, c0, c0, 0 @ read the new csidr 1108 and r2, r1, #7 @ extract the length of the cache lines 1109 add r2, r2, #4 @ add 4 (line length offset) 1110 ldr r4, =0x3ff 1111 ands r4, r4, r1, lsr #3 @ find maximum number on the way size 1112 clz r5, r4 @ find bit position of way size increment 1113 ldr r7, =0x7fff 1114 ands r7, r7, r1, lsr #13 @ extract max number of the index size 1115loop2: 1116 mov r9, r4 @ create working copy of max way size 1117loop3: 1118 ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 1119 ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 1120 THUMB( lsl r6, r9, r5 ) 1121 THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 1122 THUMB( lsl r6, r7, r2 ) 1123 THUMB( orr r11, r11, r6 ) @ factor index number into r11 1124 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way 1125 subs r9, r9, #1 @ decrement the way 1126 bge loop3 1127 subs r7, r7, #1 @ decrement the index 1128 bge loop2 1129skip: 1130 add r10, r10, #2 @ increment cache number 1131 cmp r3, r10 1132 bgt loop1 1133finished: 1134 ldmfd sp!, {r0-r7, r9-r11} 1135 mov r10, #0 @ swith back to cache level 0 1136 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 1137iflush: 1138 mcr p15, 0, r10, c7, c10, 4 @ DSB 1139 mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB 1140 mcr p15, 0, r10, c7, c10, 4 @ DSB 1141 mcr p15, 0, r10, c7, c5, 4 @ ISB 1142 mov pc, lr 1143 1144__armv5tej_mmu_cache_flush: 11451: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache 1146 bne 1b 1147 mcr p15, 0, r0, c7, c5, 0 @ flush I cache 1148 mcr p15, 0, r0, c7, c10, 4 @ drain WB 1149 mov pc, lr 1150 1151__armv4_mmu_cache_flush: 1152 mov r2, #64*1024 @ default: 32K dcache size (*2) 1153 mov r11, #32 @ default: 32 byte line size 1154 mrc p15, 0, r3, c0, c0, 1 @ read cache type 1155 teq r3, r9 @ cache ID register present? 1156 beq no_cache_id 1157 mov r1, r3, lsr #18 1158 and r1, r1, #7 1159 mov r2, #1024 1160 mov r2, r2, lsl r1 @ base dcache size *2 1161 tst r3, #1 << 14 @ test M bit 1162 addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 1163 mov r3, r3, lsr #12 1164 and r3, r3, #3 1165 mov r11, #8 1166 mov r11, r11, lsl r3 @ cache line size in bytes 1167no_cache_id: 1168 mov r1, pc 1169 bic r1, r1, #63 @ align to longest cache line 1170 add r2, r1, r2 11711: 1172 ARM( ldr r3, [r1], r11 ) @ s/w flush D cache 1173 THUMB( ldr r3, [r1] ) @ s/w flush D cache 1174 THUMB( add r1, r1, r11 ) 1175 teq r1, r2 1176 bne 1b 1177 1178 mcr p15, 0, r1, c7, c5, 0 @ flush I cache 1179 mcr p15, 0, r1, c7, c6, 0 @ flush D cache 1180 mcr p15, 0, r1, c7, c10, 4 @ drain WB 1181 mov pc, lr 1182 1183__armv3_mmu_cache_flush: 1184__armv3_mpu_cache_flush: 1185 mov r1, #0 1186 mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3 1187 mov pc, lr 1188 1189/* 1190 * Various debugging routines for printing hex characters and 1191 * memory, which again must be relocatable. 1192 */ 1193#ifdef DEBUG 1194 .align 2 1195 .type phexbuf,#object 1196phexbuf: .space 12 1197 .size phexbuf, . - phexbuf 1198 1199@ phex corrupts {r0, r1, r2, r3} 1200phex: adr r3, phexbuf 1201 mov r2, #0 1202 strb r2, [r3, r1] 12031: subs r1, r1, #1 1204 movmi r0, r3 1205 bmi puts 1206 and r2, r0, #15 1207 mov r0, r0, lsr #4 1208 cmp r2, #10 1209 addge r2, r2, #7 1210 add r2, r2, #'0' 1211 strb r2, [r3, r1] 1212 b 1b 1213 1214@ puts corrupts {r0, r1, r2, r3} 1215puts: loadsp r3, r1 12161: ldrb r2, [r0], #1 1217 teq r2, #0 1218 moveq pc, lr 12192: writeb r2, r3 1220 mov r1, #0x00020000 12213: subs r1, r1, #1 1222 bne 3b 1223 teq r2, #'\n' 1224 moveq r2, #'\r' 1225 beq 2b 1226 teq r0, #0 1227 bne 1b 1228 mov pc, lr 1229@ putc corrupts {r0, r1, r2, r3} 1230putc: 1231 mov r2, r0 1232 mov r0, #0 1233 loadsp r3, r1 1234 b 2b 1235 1236@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr} 1237memdump: mov r12, r0 1238 mov r10, lr 1239 mov r11, #0 12402: mov r0, r11, lsl #2 1241 add r0, r0, r12 1242 mov r1, #8 1243 bl phex 1244 mov r0, #':' 1245 bl putc 12461: mov r0, #' ' 1247 bl putc 1248 ldr r0, [r12, r11, lsl #2] 1249 mov r1, #8 1250 bl phex 1251 and r0, r11, #7 1252 teq r0, #3 1253 moveq r0, #' ' 1254 bleq putc 1255 and r0, r11, #7 1256 add r11, r11, #1 1257 teq r0, #7 1258 bne 1b 1259 mov r0, #'\n' 1260 bl putc 1261 cmp r11, #64 1262 blt 2b 1263 mov pc, r10 1264#endif 1265 1266 .ltorg 1267 1268#ifdef CONFIG_ARM_VIRT_EXT 1269.align 5 1270__hyp_reentry_vectors: 1271 W(b) . @ reset 1272 W(b) . @ undef 1273 W(b) . @ svc 1274 W(b) . @ pabort 1275 W(b) . @ dabort 1276 W(b) __enter_kernel @ hyp 1277 W(b) . @ irq 1278 W(b) . @ fiq 1279#endif /* CONFIG_ARM_VIRT_EXT */ 1280 1281__enter_kernel: 1282 mov r0, #0 @ must be 0 1283 ARM( mov pc, r4 ) @ call kernel 1284 THUMB( bx r4 ) @ entry point is always ARM 1285 1286reloc_code_end: 1287 1288 .align 1289 .section ".stack", "aw", %nobits 1290.L_user_stack: .space 4096 1291.L_user_stack_end: 1292