1/* 2 * linux/arch/arm/boot/compressed/head.S 3 * 4 * Copyright (C) 1996-2002 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#include <linux/config.h> 11#include <linux/linkage.h> 12 13/* 14 * Debugging stuff 15 * 16 * Note that these macros must not contain any code which is not 17 * 100% relocatable. Any attempt to do so will result in a crash. 18 * Please select one of the following when turning on debugging. 19 */ 20#ifdef DEBUG 21 22#if defined(CONFIG_DEBUG_ICEDCC) 23 .macro loadsp, rb 24 .endm 25 .macro writeb, ch, rb 26 mcr p14, 0, \ch, c0, c1, 0 27 .endm 28#else 29 30#include <asm/arch/debug-macro.S> 31 32 .macro writeb, ch, rb 33 senduart \ch, \rb 34 .endm 35 36#if defined(CONFIG_ARCH_SA1100) 37 .macro loadsp, rb 38 mov \rb, #0x80000000 @ physical base address 39#ifdef CONFIG_DEBUG_LL_SER3 40 add \rb, \rb, #0x00050000 @ Ser3 41#else 42 add \rb, \rb, #0x00010000 @ Ser1 43#endif 44 .endm 45#elif defined(CONFIG_ARCH_IOP331) 46 .macro loadsp, rb 47 mov \rb, #0xff000000 48 orr \rb, \rb, #0x00ff0000 49 orr \rb, \rb, #0x0000f700 @ location of the UART 50 .endm 51#elif defined(CONFIG_ARCH_S3C2410) 52 .macro loadsp, rb 53 mov \rb, #0x50000000 54 add \rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT 55 .endm 56#else 57 .macro loadsp, rb 58 addruart \rb 59 .endm 60#endif 61#endif 62#endif 63 64 .macro kputc,val 65 mov r0, \val 66 bl putc 67 .endm 68 69 .macro kphex,val,len 70 mov r0, \val 71 mov r1, #\len 72 bl phex 73 .endm 74 75 .macro debug_reloc_start 76#ifdef DEBUG 77 kputc #'\n' 78 kphex r6, 8 /* processor id */ 79 kputc #':' 80 kphex r7, 8 /* architecture id */ 81 kputc #':' 82 mrc p15, 0, r0, c1, c0 83 kphex r0, 8 /* control reg */ 84 kputc #'\n' 85 kphex r5, 8 /* decompressed kernel start */ 86 kputc #'-' 87 kphex r8, 8 /* decompressed kernel end */ 88 kputc #'>' 89 kphex r4, 8 /* kernel execution address */ 90 kputc #'\n' 91#endif 92 .endm 93 94 .macro debug_reloc_end 95#ifdef DEBUG 96 kphex r5, 8 /* end of kernel */ 97 kputc #'\n' 98 mov r0, r4 99 bl memdump /* dump 256 bytes at start of kernel */ 100#endif 101 .endm 102 103 .section ".start", #alloc, #execinstr 104/* 105 * sort out different calling conventions 106 */ 107 .align 108start: 109 .type start,#function 110 .rept 8 111 mov r0, r0 112 .endr 113 114 b 1f 115 .word 0x016f2818 @ Magic numbers to help the loader 116 .word start @ absolute load/run zImage address 117 .word _edata @ zImage end address 1181: mov r7, r1 @ save architecture ID 119 mov r8, #0 @ save r0 120 121#ifndef __ARM_ARCH_2__ 122 /* 123 * Booting from Angel - need to enter SVC mode and disable 124 * FIQs/IRQs (numeric definitions from angel arm.h source). 125 * We only do this if we were in user mode on entry. 126 */ 127 mrs r2, cpsr @ get current mode 128 tst r2, #3 @ not user? 129 bne not_angel 130 mov r0, #0x17 @ angel_SWIreason_EnterSVC 131 swi 0x123456 @ angel_SWI_ARM 132not_angel: 133 mrs r2, cpsr @ turn off interrupts to 134 orr r2, r2, #0xc0 @ prevent angel from running 135 msr cpsr_c, r2 136#else 137 teqp pc, #0x0c000003 @ turn off interrupts 138#endif 139 140 /* 141 * Note that some cache flushing and other stuff may 142 * be needed here - is there an Angel SWI call for this? 143 */ 144 145 /* 146 * some architecture specific code can be inserted 147 * by the linker here, but it should preserve r7 and r8. 148 */ 149 150 .text 151 adr r0, LC0 152 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp} 153 subs r0, r0, r1 @ calculate the delta offset 154 155 @ if delta is zero, we are 156 beq not_relocated @ running at the address we 157 @ were linked at. 158 159 /* 160 * We're running at a different address. We need to fix 161 * up various pointers: 162 * r5 - zImage base address 163 * r6 - GOT start 164 * ip - GOT end 165 */ 166 add r5, r5, r0 167 add r6, r6, r0 168 add ip, ip, r0 169 170#ifndef CONFIG_ZBOOT_ROM 171 /* 172 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n, 173 * we need to fix up pointers into the BSS region. 174 * r2 - BSS start 175 * r3 - BSS end 176 * sp - stack pointer 177 */ 178 add r2, r2, r0 179 add r3, r3, r0 180 add sp, sp, r0 181 182 /* 183 * Relocate all entries in the GOT table. 184 */ 1851: ldr r1, [r6, #0] @ relocate entries in the GOT 186 add r1, r1, r0 @ table. This fixes up the 187 str r1, [r6], #4 @ C references. 188 cmp r6, ip 189 blo 1b 190#else 191 192 /* 193 * Relocate entries in the GOT table. We only relocate 194 * the entries that are outside the (relocated) BSS region. 195 */ 1961: ldr r1, [r6, #0] @ relocate entries in the GOT 197 cmp r1, r2 @ entry < bss_start || 198 cmphs r3, r1 @ _end < entry 199 addlo r1, r1, r0 @ table. This fixes up the 200 str r1, [r6], #4 @ C references. 201 cmp r6, ip 202 blo 1b 203#endif 204 205not_relocated: mov r0, #0 2061: str r0, [r2], #4 @ clear bss 207 str r0, [r2], #4 208 str r0, [r2], #4 209 str r0, [r2], #4 210 cmp r2, r3 211 blo 1b 212 213 /* 214 * The C runtime environment should now be setup 215 * sufficiently. Turn the cache on, set up some 216 * pointers, and start decompressing. 217 */ 218 bl cache_on 219 220 mov r1, sp @ malloc space above stack 221 add r2, sp, #0x10000 @ 64k max 222 223/* 224 * Check to see if we will overwrite ourselves. 225 * r4 = final kernel address 226 * r5 = start of this image 227 * r2 = end of malloc space (and therefore this image) 228 * We basically want: 229 * r4 >= r2 -> OK 230 * r4 + image length <= r5 -> OK 231 */ 232 cmp r4, r2 233 bhs wont_overwrite 234 add r0, r4, #4096*1024 @ 4MB largest kernel size 235 cmp r0, r5 236 bls wont_overwrite 237 238 mov r5, r2 @ decompress after malloc space 239 mov r0, r5 240 mov r3, r7 241 bl decompress_kernel 242 243 add r0, r0, #127 244 bic r0, r0, #127 @ align the kernel length 245/* 246 * r0 = decompressed kernel length 247 * r1-r3 = unused 248 * r4 = kernel execution address 249 * r5 = decompressed kernel start 250 * r6 = processor ID 251 * r7 = architecture ID 252 * r8-r14 = unused 253 */ 254 add r1, r5, r0 @ end of decompressed kernel 255 adr r2, reloc_start 256 ldr r3, LC1 257 add r3, r2, r3 2581: ldmia r2!, {r8 - r13} @ copy relocation code 259 stmia r1!, {r8 - r13} 260 ldmia r2!, {r8 - r13} 261 stmia r1!, {r8 - r13} 262 cmp r2, r3 263 blo 1b 264 265 bl cache_clean_flush 266 add pc, r5, r0 @ call relocation code 267 268/* 269 * We're not in danger of overwriting ourselves. Do this the simple way. 270 * 271 * r4 = kernel execution address 272 * r7 = architecture ID 273 */ 274wont_overwrite: mov r0, r4 275 mov r3, r7 276 bl decompress_kernel 277 b call_kernel 278 279 .type LC0, #object 280LC0: .word LC0 @ r1 281 .word __bss_start @ r2 282 .word _end @ r3 283 .word zreladdr @ r4 284 .word _start @ r5 285 .word _got_start @ r6 286 .word _got_end @ ip 287 .word user_stack+4096 @ sp 288LC1: .word reloc_end - reloc_start 289 .size LC0, . - LC0 290 291#ifdef CONFIG_ARCH_RPC 292 .globl params 293params: ldr r0, =params_phys 294 mov pc, lr 295 .ltorg 296 .align 297#endif 298 299/* 300 * Turn on the cache. We need to setup some page tables so that we 301 * can have both the I and D caches on. 302 * 303 * We place the page tables 16k down from the kernel execution address, 304 * and we hope that nothing else is using it. If we're using it, we 305 * will go pop! 306 * 307 * On entry, 308 * r4 = kernel execution address 309 * r6 = processor ID 310 * r7 = architecture number 311 * r8 = run-time address of "start" 312 * On exit, 313 * r1, r2, r3, r8, r9, r12 corrupted 314 * This routine must preserve: 315 * r4, r5, r6, r7 316 */ 317 .align 5 318cache_on: mov r3, #8 @ cache_on function 319 b call_cache_fn 320 321__setup_mmu: sub r3, r4, #16384 @ Page directory size 322 bic r3, r3, #0xff @ Align the pointer 323 bic r3, r3, #0x3f00 324/* 325 * Initialise the page tables, turning on the cacheable and bufferable 326 * bits for the RAM area only. 327 */ 328 mov r0, r3 329 mov r8, r0, lsr #18 330 mov r8, r8, lsl #18 @ start of RAM 331 add r9, r8, #0x10000000 @ a reasonable RAM size 332 mov r1, #0x12 333 orr r1, r1, #3 << 10 334 add r2, r3, #16384 3351: cmp r1, r8 @ if virt > start of RAM 336 orrhs r1, r1, #0x0c @ set cacheable, bufferable 337 cmp r1, r9 @ if virt > end of RAM 338 bichs r1, r1, #0x0c @ clear cacheable, bufferable 339 str r1, [r0], #4 @ 1:1 mapping 340 add r1, r1, #1048576 341 teq r0, r2 342 bne 1b 343/* 344 * If ever we are running from Flash, then we surely want the cache 345 * to be enabled also for our execution instance... We map 2MB of it 346 * so there is no map overlap problem for up to 1 MB compressed kernel. 347 * If the execution is in RAM then we would only be duplicating the above. 348 */ 349 mov r1, #0x1e 350 orr r1, r1, #3 << 10 351 mov r2, pc, lsr #20 352 orr r1, r1, r2, lsl #20 353 add r0, r3, r2, lsl #2 354 str r1, [r0], #4 355 add r1, r1, #1048576 356 str r1, [r0] 357 mov pc, lr 358 359__armv4_cache_on: 360 mov r12, lr 361 bl __setup_mmu 362 mov r0, #0 363 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer 364 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 365 mrc p15, 0, r0, c1, c0, 0 @ read control reg 366 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement 367 orr r0, r0, #0x0030 368 bl __common_cache_on 369 mov r0, #0 370 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs 371 mov pc, r12 372 373__arm6_cache_on: 374 mov r12, lr 375 bl __setup_mmu 376 mov r0, #0 377 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 378 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 379 mov r0, #0x30 380 bl __common_cache_on 381 mov r0, #0 382 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 383 mov pc, r12 384 385__common_cache_on: 386#ifndef DEBUG 387 orr r0, r0, #0x000d @ Write buffer, mmu 388#endif 389 mov r1, #-1 390 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer 391 mcr p15, 0, r1, c3, c0, 0 @ load domain access control 392 mcr p15, 0, r0, c1, c0, 0 @ load control register 393 mov pc, lr 394 395/* 396 * All code following this line is relocatable. It is relocated by 397 * the above code to the end of the decompressed kernel image and 398 * executed there. During this time, we have no stacks. 399 * 400 * r0 = decompressed kernel length 401 * r1-r3 = unused 402 * r4 = kernel execution address 403 * r5 = decompressed kernel start 404 * r6 = processor ID 405 * r7 = architecture ID 406 * r8-r14 = unused 407 */ 408 .align 5 409reloc_start: add r8, r5, r0 410 debug_reloc_start 411 mov r1, r4 4121: 413 .rept 4 414 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel 415 stmia r1!, {r0, r2, r3, r9 - r13} 416 .endr 417 418 cmp r5, r8 419 blo 1b 420 debug_reloc_end 421 422call_kernel: bl cache_clean_flush 423 bl cache_off 424 mov r0, #0 425 mov r1, r7 @ restore architecture number 426 mov pc, r4 @ call kernel 427 428/* 429 * Here follow the relocatable cache support functions for the 430 * various processors. This is a generic hook for locating an 431 * entry and jumping to an instruction at the specified offset 432 * from the start of the block. Please note this is all position 433 * independent code. 434 * 435 * r1 = corrupted 436 * r2 = corrupted 437 * r3 = block offset 438 * r6 = corrupted 439 * r12 = corrupted 440 */ 441 442call_cache_fn: adr r12, proc_types 443 mrc p15, 0, r6, c0, c0 @ get processor ID 4441: ldr r1, [r12, #0] @ get value 445 ldr r2, [r12, #4] @ get mask 446 eor r1, r1, r6 @ (real ^ match) 447 tst r1, r2 @ & mask 448 addeq pc, r12, r3 @ call cache function 449 add r12, r12, #4*5 450 b 1b 451 452/* 453 * Table for cache operations. This is basically: 454 * - CPU ID match 455 * - CPU ID mask 456 * - 'cache on' method instruction 457 * - 'cache off' method instruction 458 * - 'cache flush' method instruction 459 * 460 * We match an entry using: ((real_id ^ match) & mask) == 0 461 * 462 * Writethrough caches generally only need 'on' and 'off' 463 * methods. Writeback caches _must_ have the flush method 464 * defined. 465 */ 466 .type proc_types,#object 467proc_types: 468 .word 0x41560600 @ ARM6/610 469 .word 0xffffffe0 470 b __arm6_cache_off @ works, but slow 471 b __arm6_cache_off 472 mov pc, lr 473@ b __arm6_cache_on @ untested 474@ b __arm6_cache_off 475@ b __armv3_cache_flush 476 477 .word 0x00000000 @ old ARM ID 478 .word 0x0000f000 479 mov pc, lr 480 mov pc, lr 481 mov pc, lr 482 483 .word 0x41007000 @ ARM7/710 484 .word 0xfff8fe00 485 b __arm7_cache_off 486 b __arm7_cache_off 487 mov pc, lr 488 489 .word 0x41807200 @ ARM720T (writethrough) 490 .word 0xffffff00 491 b __armv4_cache_on 492 b __armv4_cache_off 493 mov pc, lr 494 495 .word 0x00007000 @ ARM7 IDs 496 .word 0x0000f000 497 mov pc, lr 498 mov pc, lr 499 mov pc, lr 500 501 @ Everything from here on will be the new ID system. 502 503 .word 0x4401a100 @ sa110 / sa1100 504 .word 0xffffffe0 505 b __armv4_cache_on 506 b __armv4_cache_off 507 b __armv4_cache_flush 508 509 .word 0x6901b110 @ sa1110 510 .word 0xfffffff0 511 b __armv4_cache_on 512 b __armv4_cache_off 513 b __armv4_cache_flush 514 515 @ These match on the architecture ID 516 517 .word 0x00020000 @ ARMv4T 518 .word 0x000f0000 519 b __armv4_cache_on 520 b __armv4_cache_off 521 b __armv4_cache_flush 522 523 .word 0x00050000 @ ARMv5TE 524 .word 0x000f0000 525 b __armv4_cache_on 526 b __armv4_cache_off 527 b __armv4_cache_flush 528 529 .word 0x00060000 @ ARMv5TEJ 530 .word 0x000f0000 531 b __armv4_cache_on 532 b __armv4_cache_off 533 b __armv4_cache_flush 534 535 .word 0x00070000 @ ARMv6 536 .word 0x000f0000 537 b __armv4_cache_on 538 b __armv4_cache_off 539 b __armv6_cache_flush 540 541 .word 0 @ unrecognised type 542 .word 0 543 mov pc, lr 544 mov pc, lr 545 mov pc, lr 546 547 .size proc_types, . - proc_types 548 549/* 550 * Turn off the Cache and MMU. ARMv3 does not support 551 * reading the control register, but ARMv4 does. 552 * 553 * On entry, r6 = processor ID 554 * On exit, r0, r1, r2, r3, r12 corrupted 555 * This routine must preserve: r4, r6, r7 556 */ 557 .align 5 558cache_off: mov r3, #12 @ cache_off function 559 b call_cache_fn 560 561__armv4_cache_off: 562 mrc p15, 0, r0, c1, c0 563 bic r0, r0, #0x000d 564 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off 565 mov r0, #0 566 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4 567 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4 568 mov pc, lr 569 570__arm6_cache_off: 571 mov r0, #0x00000030 @ ARM6 control reg. 572 b __armv3_cache_off 573 574__arm7_cache_off: 575 mov r0, #0x00000070 @ ARM7 control reg. 576 b __armv3_cache_off 577 578__armv3_cache_off: 579 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off 580 mov r0, #0 581 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 582 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3 583 mov pc, lr 584 585/* 586 * Clean and flush the cache to maintain consistency. 587 * 588 * On entry, 589 * r6 = processor ID 590 * On exit, 591 * r1, r2, r3, r11, r12 corrupted 592 * This routine must preserve: 593 * r0, r4, r5, r6, r7 594 */ 595 .align 5 596cache_clean_flush: 597 mov r3, #16 598 b call_cache_fn 599 600__armv6_cache_flush: 601 mov r1, #0 602 mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D 603 mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB 604 mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified 605 mcr p15, 0, r1, c7, c10, 4 @ drain WB 606 mov pc, lr 607 608__armv4_cache_flush: 609 mov r2, #64*1024 @ default: 32K dcache size (*2) 610 mov r11, #32 @ default: 32 byte line size 611 mrc p15, 0, r3, c0, c0, 1 @ read cache type 612 teq r3, r6 @ cache ID register present? 613 beq no_cache_id 614 mov r1, r3, lsr #18 615 and r1, r1, #7 616 mov r2, #1024 617 mov r2, r2, lsl r1 @ base dcache size *2 618 tst r3, #1 << 14 @ test M bit 619 addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 620 mov r3, r3, lsr #12 621 and r3, r3, #3 622 mov r11, #8 623 mov r11, r11, lsl r3 @ cache line size in bytes 624no_cache_id: 625 bic r1, pc, #63 @ align to longest cache line 626 add r2, r1, r2 6271: ldr r3, [r1], r11 @ s/w flush D cache 628 teq r1, r2 629 bne 1b 630 631 mcr p15, 0, r1, c7, c5, 0 @ flush I cache 632 mcr p15, 0, r1, c7, c6, 0 @ flush D cache 633 mcr p15, 0, r1, c7, c10, 4 @ drain WB 634 mov pc, lr 635 636__armv3_cache_flush: 637 mov r1, #0 638 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3 639 mov pc, lr 640 641/* 642 * Various debugging routines for printing hex characters and 643 * memory, which again must be relocatable. 644 */ 645#ifdef DEBUG 646 .type phexbuf,#object 647phexbuf: .space 12 648 .size phexbuf, . - phexbuf 649 650phex: adr r3, phexbuf 651 mov r2, #0 652 strb r2, [r3, r1] 6531: subs r1, r1, #1 654 movmi r0, r3 655 bmi puts 656 and r2, r0, #15 657 mov r0, r0, lsr #4 658 cmp r2, #10 659 addge r2, r2, #7 660 add r2, r2, #'0' 661 strb r2, [r3, r1] 662 b 1b 663 664puts: loadsp r3 6651: ldrb r2, [r0], #1 666 teq r2, #0 667 moveq pc, lr 6682: writeb r2, r3 669 mov r1, #0x00020000 6703: subs r1, r1, #1 671 bne 3b 672 teq r2, #'\n' 673 moveq r2, #'\r' 674 beq 2b 675 teq r0, #0 676 bne 1b 677 mov pc, lr 678putc: 679 mov r2, r0 680 mov r0, #0 681 loadsp r3 682 b 2b 683 684memdump: mov r12, r0 685 mov r10, lr 686 mov r11, #0 6872: mov r0, r11, lsl #2 688 add r0, r0, r12 689 mov r1, #8 690 bl phex 691 mov r0, #':' 692 bl putc 6931: mov r0, #' ' 694 bl putc 695 ldr r0, [r12, r11, lsl #2] 696 mov r1, #8 697 bl phex 698 and r0, r11, #7 699 teq r0, #3 700 moveq r0, #' ' 701 bleq putc 702 and r0, r11, #7 703 add r11, r11, #1 704 teq r0, #7 705 bne 1b 706 mov r0, #'\n' 707 bl putc 708 cmp r11, #64 709 blt 2b 710 mov pc, r10 711#endif 712 713reloc_end: 714 715 .align 716 .section ".stack", "w" 717user_stack: .space 4096 718