1/* 2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code. 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * Enhanced CPU detection and feature setting code by Mike Jagdis 7 * and Martin Mares, November 1997. 8 */ 9 10.text 11#include <linux/threads.h> 12#include <linux/init.h> 13#include <linux/linkage.h> 14#include <asm/segment.h> 15#include <asm/page.h> 16#include <asm/pgtable.h> 17#include <asm/desc.h> 18#include <asm/cache.h> 19#include <asm/thread_info.h> 20#include <asm/asm-offsets.h> 21#include <asm/setup.h> 22#include <asm/processor-flags.h> 23 24/* Physical address */ 25#define pa(X) ((X) - __PAGE_OFFSET) 26 27/* 28 * References to members of the new_cpu_data structure. 29 */ 30 31#define X86 new_cpu_data+CPUINFO_x86 32#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor 33#define X86_MODEL new_cpu_data+CPUINFO_x86_model 34#define X86_MASK new_cpu_data+CPUINFO_x86_mask 35#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math 36#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level 37#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability 38#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 39 40/* 41 * This is how much memory *in addition to the memory covered up to 42 * and including _end* we need mapped initially. 43 * We need: 44 * - one bit for each possible page, but only in low memory, which means 45 * 2^32/4096/8 = 128K worst case (4G/4G split.) 46 * - enough space to map all low memory, which means 47 * (2^32/4096) / 1024 pages (worst case, non PAE) 48 * (2^32/4096) / 512 + 4 pages (worst case for PAE) 49 * - a few pages for allocator use before the kernel pagetable has 50 * been set up 51 * 52 * Modulo rounding, each megabyte assigned here requires a kilobyte of 53 * memory, which is currently unreclaimed. 54 * 55 * This should be a multiple of a page. 56 */ 57LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) 58 59/* 60 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate 61 * pagetables from above the 16MB DMA limit, so we'll have to set 62 * up pagetables 16MB more (worst-case): 63 */ 64#ifdef CONFIG_DEBUG_PAGEALLOC 65LOW_PAGES = LOW_PAGES + 0x1000000 66#endif 67 68#if PTRS_PER_PMD > 1 69PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 70#else 71PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 72#endif 73BOOTBITMAP_SIZE = LOW_PAGES / 8 74ALLOCATOR_SLOP = 4 75 76INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 77 78/* 79 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 80 * %esi points to the real-mode code as a 32-bit pointer. 81 * CS and DS must be 4 GB flat segments, but we don't depend on 82 * any particular GDT layout, because we load our own as soon as we 83 * can. 84 */ 85.section .text.head,"ax",@progbits 86ENTRY(startup_32) 87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 88 us to not reload segments */ 89 testb $(1<<6), BP_loadflags(%esi) 90 jnz 2f 91 92/* 93 * Set segments to known values. 94 */ 95 lgdt pa(boot_gdt_descr) 96 movl $(__BOOT_DS),%eax 97 movl %eax,%ds 98 movl %eax,%es 99 movl %eax,%fs 100 movl %eax,%gs 1012: 102 103/* 104 * Clear BSS first so that there are no surprises... 105 */ 106 cld 107 xorl %eax,%eax 108 movl $pa(__bss_start),%edi 109 movl $pa(__bss_stop),%ecx 110 subl %edi,%ecx 111 shrl $2,%ecx 112 rep ; stosl 113/* 114 * Copy bootup parameters out of the way. 115 * Note: %esi still has the pointer to the real-mode data. 116 * With the kexec as boot loader, parameter segment might be loaded beyond 117 * kernel image and might not even be addressable by early boot page tables. 118 * (kexec on panic case). Hence copy out the parameters before initializing 119 * page tables. 120 */ 121 movl $pa(boot_params),%edi 122 movl $(PARAM_SIZE/4),%ecx 123 cld 124 rep 125 movsl 126 movl pa(boot_params) + NEW_CL_POINTER,%esi 127 andl %esi,%esi 128 jz 1f # No comand line 129 movl $pa(boot_command_line),%edi 130 movl $(COMMAND_LINE_SIZE/4),%ecx 131 rep 132 movsl 1331: 134 135#ifdef CONFIG_PARAVIRT 136 /* This is can only trip for a broken bootloader... */ 137 cmpw $0x207, pa(boot_params + BP_version) 138 jb default_entry 139 140 /* Paravirt-compatible boot parameters. Look to see what architecture 141 we're booting under. */ 142 movl pa(boot_params + BP_hardware_subarch), %eax 143 cmpl $num_subarch_entries, %eax 144 jae bad_subarch 145 146 movl pa(subarch_entries)(,%eax,4), %eax 147 subl $__PAGE_OFFSET, %eax 148 jmp *%eax 149 150bad_subarch: 151WEAK(lguest_entry) 152WEAK(xen_entry) 153 /* Unknown implementation; there's really 154 nothing we can do at this point. */ 155 ud2a 156 157 __INITDATA 158 159subarch_entries: 160 .long default_entry /* normal x86/PC */ 161 .long lguest_entry /* lguest hypervisor */ 162 .long xen_entry /* Xen hypervisor */ 163num_subarch_entries = (. - subarch_entries) / 4 164.previous 165#endif /* CONFIG_PARAVIRT */ 166 167/* 168 * Initialize page tables. This creates a PDE and a set of page 169 * tables, which are located immediately beyond _end. The variable 170 * init_pg_tables_end is set up to point to the first "safe" location. 171 * Mappings are created both at virtual address 0 (identity mapping) 172 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 173 * 174 * Note that the stack is not yet set up! 175 */ 176#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ 177#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ 178#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ 179 180default_entry: 181#ifdef CONFIG_X86_PAE 182 183 /* 184 * In PAE mode swapper_pg_dir is statically defined to contain enough 185 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 186 * entries). The identity mapping is handled by pointing two PGD 187 * entries to the first kernel PMD. 188 * 189 * Note the upper half of each PMD or PTE are always zero at 190 * this stage. 191 */ 192 193#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ 194 195 xorl %ebx,%ebx /* %ebx is kept at zero */ 196 197 movl $pa(pg0), %edi 198 movl $pa(swapper_pg_pmd), %edx 199 movl $PTE_ATTR, %eax 20010: 201 leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ 202 movl %ecx,(%edx) /* Store PMD entry */ 203 /* Upper half already zero */ 204 addl $8,%edx 205 movl $512,%ecx 20611: 207 stosl 208 xchgl %eax,%ebx 209 stosl 210 xchgl %eax,%ebx 211 addl $0x1000,%eax 212 loop 11b 213 214 /* 215 * End condition: we must map up to and including INIT_MAP_BEYOND_END 216 * bytes beyond the end of our own page tables. 217 */ 218 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 219 cmpl %ebp,%eax 220 jb 10b 2211: 222 movl %edi,pa(init_pg_tables_end) 223 224 /* Do early initialization of the fixmap area */ 225 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 226 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) 227#else /* Not PAE */ 228 229page_pde_offset = (__PAGE_OFFSET >> 20); 230 231 movl $pa(pg0), %edi 232 movl $pa(swapper_pg_dir), %edx 233 movl $PTE_ATTR, %eax 23410: 235 leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ 236 movl %ecx,(%edx) /* Store identity PDE entry */ 237 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 238 addl $4,%edx 239 movl $1024, %ecx 24011: 241 stosl 242 addl $0x1000,%eax 243 loop 11b 244 /* 245 * End condition: we must map up to and including INIT_MAP_BEYOND_END 246 * bytes beyond the end of our own page tables; the +0x007 is 247 * the attribute bits 248 */ 249 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 250 cmpl %ebp,%eax 251 jb 10b 252 movl %edi,pa(init_pg_tables_end) 253 254 /* Do early initialization of the fixmap area */ 255 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 256 movl %eax,pa(swapper_pg_dir+0xffc) 257#endif 258 jmp 3f 259/* 260 * Non-boot CPU entry point; entered from trampoline.S 261 * We can't lgdt here, because lgdt itself uses a data segment, but 262 * we know the trampoline has already loaded the boot_gdt for us. 263 * 264 * If cpu hotplug is not supported then this code can go in init section 265 * which will be freed later 266 */ 267 268#ifndef CONFIG_HOTPLUG_CPU 269.section .init.text,"ax",@progbits 270#endif 271 272#ifdef CONFIG_SMP 273ENTRY(startup_32_smp) 274 cld 275 movl $(__BOOT_DS),%eax 276 movl %eax,%ds 277 movl %eax,%es 278 movl %eax,%fs 279 movl %eax,%gs 280#endif /* CONFIG_SMP */ 2813: 282 283/* 284 * New page tables may be in 4Mbyte page mode and may 285 * be using the global pages. 286 * 287 * NOTE! If we are on a 486 we may have no cr4 at all! 288 * So we do not try to touch it unless we really have 289 * some bits in it to set. This won't work if the BSP 290 * implements cr4 but this AP does not -- very unlikely 291 * but be warned! The same applies to the pse feature 292 * if not equally supported. --macro 293 * 294 * NOTE! We have to correct for the fact that we're 295 * not yet offset PAGE_OFFSET.. 296 */ 297#define cr4_bits pa(mmu_cr4_features) 298 movl cr4_bits,%edx 299 andl %edx,%edx 300 jz 6f 301 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 302 orl %edx,%eax 303 movl %eax,%cr4 304 305 btl $5, %eax # check if PAE is enabled 306 jnc 6f 307 308 /* Check if extended functions are implemented */ 309 movl $0x80000000, %eax 310 cpuid 311 cmpl $0x80000000, %eax 312 jbe 6f 313 mov $0x80000001, %eax 314 cpuid 315 /* Execute Disable bit supported? */ 316 btl $20, %edx 317 jnc 6f 318 319 /* Setup EFER (Extended Feature Enable Register) */ 320 movl $0xc0000080, %ecx 321 rdmsr 322 323 btsl $11, %eax 324 /* Make changes effective */ 325 wrmsr 326 3276: 328 329/* 330 * Enable paging 331 */ 332 movl $pa(swapper_pg_dir),%eax 333 movl %eax,%cr3 /* set the page table pointer.. */ 334 movl %cr0,%eax 335 orl $X86_CR0_PG,%eax 336 movl %eax,%cr0 /* ..and set paging (PG) bit */ 337 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 3381: 339 /* Set up the stack pointer */ 340 lss stack_start,%esp 341 342/* 343 * Initialize eflags. Some BIOS's leave bits like NT set. This would 344 * confuse the debugger if this code is traced. 345 * XXX - best to initialize before switching to protected mode. 346 */ 347 pushl $0 348 popfl 349 350#ifdef CONFIG_SMP 351 cmpb $0, ready 352 jz 1f /* Initial CPU cleans BSS */ 353 jmp checkCPUtype 3541: 355#endif /* CONFIG_SMP */ 356 357/* 358 * start system 32-bit setup. We need to re-do some of the things done 359 * in 16-bit mode for the "real" operations. 360 */ 361 call setup_idt 362 363checkCPUtype: 364 365 movl $-1,X86_CPUID # -1 for no CPUID initially 366 367/* check if it is 486 or 386. */ 368/* 369 * XXX - this does a lot of unnecessary setup. Alignment checks don't 370 * apply at our cpl of 0 and the stack ought to be aligned already, and 371 * we don't need to preserve eflags. 372 */ 373 374 movb $3,X86 # at least 386 375 pushfl # push EFLAGS 376 popl %eax # get EFLAGS 377 movl %eax,%ecx # save original EFLAGS 378 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS 379 pushl %eax # copy to EFLAGS 380 popfl # set EFLAGS 381 pushfl # get new EFLAGS 382 popl %eax # put it in eax 383 xorl %ecx,%eax # change in flags 384 pushl %ecx # restore original EFLAGS 385 popfl 386 testl $0x40000,%eax # check if AC bit changed 387 je is386 388 389 movb $4,X86 # at least 486 390 testl $0x200000,%eax # check if ID bit changed 391 je is486 392 393 /* get vendor info */ 394 xorl %eax,%eax # call CPUID with 0 -> return vendor ID 395 cpuid 396 movl %eax,X86_CPUID # save CPUID level 397 movl %ebx,X86_VENDOR_ID # lo 4 chars 398 movl %edx,X86_VENDOR_ID+4 # next 4 chars 399 movl %ecx,X86_VENDOR_ID+8 # last 4 chars 400 401 orl %eax,%eax # do we have processor info as well? 402 je is486 403 404 movl $1,%eax # Use the CPUID instruction to get CPU type 405 cpuid 406 movb %al,%cl # save reg for future use 407 andb $0x0f,%ah # mask processor family 408 movb %ah,X86 409 andb $0xf0,%al # mask model 410 shrb $4,%al 411 movb %al,X86_MODEL 412 andb $0x0f,%cl # mask mask revision 413 movb %cl,X86_MASK 414 movl %edx,X86_CAPABILITY 415 416is486: movl $0x50022,%ecx # set AM, WP, NE and MP 417 jmp 2f 418 419is386: movl $2,%ecx # set MP 4202: movl %cr0,%eax 421 andl $0x80000011,%eax # Save PG,PE,ET 422 orl %ecx,%eax 423 movl %eax,%cr0 424 425 call check_x87 426 lgdt early_gdt_descr 427 lidt idt_descr 428 ljmp $(__KERNEL_CS),$1f 4291: movl $(__KERNEL_DS),%eax # reload all the segment registers 430 movl %eax,%ss # after changing gdt. 431 movl %eax,%fs # gets reset once there's real percpu 432 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment 434 movl %eax,%ds 435 movl %eax,%es 436 437 xorl %eax,%eax # Clear GS and LDT 438 movl %eax,%gs 439 lldt %ax 440 441 cld # gcc2 wants the direction flag cleared at all times 442 pushl $0 # fake return address for unwinder 443#ifdef CONFIG_SMP 444 movb ready, %cl 445 movb $1, ready 446 cmpb $0,%cl # the first CPU calls start_kernel 447 je 1f 448 movl $(__KERNEL_PERCPU), %eax 449 movl %eax,%fs # set this cpu's percpu 450 jmp initialize_secondary # all other CPUs call initialize_secondary 4511: 452#endif /* CONFIG_SMP */ 453 jmp i386_start_kernel 454 455/* 456 * We depend on ET to be correct. This checks for 287/387. 457 */ 458check_x87: 459 movb $0,X86_HARD_MATH 460 clts 461 fninit 462 fstsw %ax 463 cmpb $0,%al 464 je 1f 465 movl %cr0,%eax /* no coprocessor: have to set bits */ 466 xorl $4,%eax /* set EM */ 467 movl %eax,%cr0 468 ret 469 ALIGN 4701: movb $1,X86_HARD_MATH 471 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 472 ret 473 474/* 475 * setup_idt 476 * 477 * sets up a idt with 256 entries pointing to 478 * ignore_int, interrupt gates. It doesn't actually load 479 * idt - that can be done only after paging has been enabled 480 * and the kernel moved to PAGE_OFFSET. Interrupts 481 * are enabled elsewhere, when we can be relatively 482 * sure everything is ok. 483 * 484 * Warning: %esi is live across this function. 485 */ 486setup_idt: 487 lea ignore_int,%edx 488 movl $(__KERNEL_CS << 16),%eax 489 movw %dx,%ax /* selector = 0x0010 = cs */ 490 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 491 492 lea idt_table,%edi 493 mov $256,%ecx 494rp_sidt: 495 movl %eax,(%edi) 496 movl %edx,4(%edi) 497 addl $8,%edi 498 dec %ecx 499 jne rp_sidt 500 501.macro set_early_handler handler,trapno 502 lea \handler,%edx 503 movl $(__KERNEL_CS << 16),%eax 504 movw %dx,%ax 505 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 506 lea idt_table,%edi 507 movl %eax,8*\trapno(%edi) 508 movl %edx,8*\trapno+4(%edi) 509.endm 510 511 set_early_handler handler=early_divide_err,trapno=0 512 set_early_handler handler=early_illegal_opcode,trapno=6 513 set_early_handler handler=early_protection_fault,trapno=13 514 set_early_handler handler=early_page_fault,trapno=14 515 516 ret 517 518early_divide_err: 519 xor %edx,%edx 520 pushl $0 /* fake errcode */ 521 jmp early_fault 522 523early_illegal_opcode: 524 movl $6,%edx 525 pushl $0 /* fake errcode */ 526 jmp early_fault 527 528early_protection_fault: 529 movl $13,%edx 530 jmp early_fault 531 532early_page_fault: 533 movl $14,%edx 534 jmp early_fault 535 536early_fault: 537 cld 538#ifdef CONFIG_PRINTK 539 pusha 540 movl $(__KERNEL_DS),%eax 541 movl %eax,%ds 542 movl %eax,%es 543 cmpl $2,early_recursion_flag 544 je hlt_loop 545 incl early_recursion_flag 546 movl %cr2,%eax 547 pushl %eax 548 pushl %edx /* trapno */ 549 pushl $fault_msg 550#ifdef CONFIG_EARLY_PRINTK 551 call early_printk 552#else 553 call printk 554#endif 555#endif 556 call dump_stack 557hlt_loop: 558 hlt 559 jmp hlt_loop 560 561/* This is the default interrupt "handler" :-) */ 562 ALIGN 563ignore_int: 564 cld 565#ifdef CONFIG_PRINTK 566 pushl %eax 567 pushl %ecx 568 pushl %edx 569 pushl %es 570 pushl %ds 571 movl $(__KERNEL_DS),%eax 572 movl %eax,%ds 573 movl %eax,%es 574 cmpl $2,early_recursion_flag 575 je hlt_loop 576 incl early_recursion_flag 577 pushl 16(%esp) 578 pushl 24(%esp) 579 pushl 32(%esp) 580 pushl 40(%esp) 581 pushl $int_msg 582#ifdef CONFIG_EARLY_PRINTK 583 call early_printk 584#else 585 call printk 586#endif 587 addl $(5*4),%esp 588 popl %ds 589 popl %es 590 popl %edx 591 popl %ecx 592 popl %eax 593#endif 594 iret 595 596.section .text 597/* 598 * Real beginning of normal "text" segment 599 */ 600ENTRY(stext) 601ENTRY(_stext) 602 603/* 604 * BSS section 605 */ 606.section ".bss.page_aligned","wa" 607 .align PAGE_SIZE_asm 608#ifdef CONFIG_X86_PAE 609swapper_pg_pmd: 610 .fill 1024*KPMDS,4,0 611#else 612ENTRY(swapper_pg_dir) 613 .fill 1024,4,0 614#endif 615swapper_pg_fixmap: 616 .fill 1024,4,0 617ENTRY(empty_zero_page) 618 .fill 4096,1,0 619/* 620 * This starts the data section. 621 */ 622#ifdef CONFIG_X86_PAE 623.section ".data.page_aligned","wa" 624 /* Page-aligned for the benefit of paravirt? */ 625 .align PAGE_SIZE_asm 626ENTRY(swapper_pg_dir) 627 .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ 628# if KPMDS == 3 629 .long pa(swapper_pg_pmd+PGD_ATTR),0 630 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 631 .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 632# elif KPMDS == 2 633 .long 0,0 634 .long pa(swapper_pg_pmd+PGD_ATTR),0 635 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 636# elif KPMDS == 1 637 .long 0,0 638 .long 0,0 639 .long pa(swapper_pg_pmd+PGD_ATTR),0 640# else 641# error "Kernel PMDs should be 1, 2 or 3" 642# endif 643 .align PAGE_SIZE_asm /* needs to be page-sized too */ 644#endif 645 646.data 647ENTRY(stack_start) 648 .long init_thread_union+THREAD_SIZE 649 .long __BOOT_DS 650 651ready: .byte 0 652 653early_recursion_flag: 654 .long 0 655 656int_msg: 657 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 658 659fault_msg: 660 .asciz \ 661/* fault info: */ "BUG: Int %d: CR2 %p\n" \ 662/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ 663 " EBX %p EDX %p ECX %p EAX %p\n" \ 664/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ 665 \ 666 "Stack: %p %p %p %p %p %p %p %p\n" \ 667 " %p %p %p %p %p %p %p %p\n" \ 668 " %p %p %p %p %p %p %p %p\n" 669 670#include "../../x86/xen/xen-head.S" 671 672/* 673 * The IDT and GDT 'descriptors' are a strange 48-bit object 674 * only used by the lidt and lgdt instructions. They are not 675 * like usual segment descriptors - they consist of a 16-bit 676 * segment size, and 32-bit linear address value: 677 */ 678 679.globl boot_gdt_descr 680.globl idt_descr 681 682 ALIGN 683# early boot GDT descriptor (must use 1:1 address mapping) 684 .word 0 # 32 bit align gdt_desc.address 685boot_gdt_descr: 686 .word __BOOT_DS+7 687 .long boot_gdt - __PAGE_OFFSET 688 689 .word 0 # 32-bit align idt_desc.address 690idt_descr: 691 .word IDT_ENTRIES*8-1 # idt contains 256 entries 692 .long idt_table 693 694# boot GDT descriptor (later on used by CPU#0): 695 .word 0 # 32 bit align gdt_desc.address 696ENTRY(early_gdt_descr) 697 .word GDT_ENTRIES*8-1 698 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 699 700/* 701 * The boot_gdt must mirror the equivalent in setup.S and is 702 * used only for booting. 703 */ 704 .align L1_CACHE_BYTES 705ENTRY(boot_gdt) 706 .fill GDT_ENTRY_BOOT_CS,8,0 707 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ 708 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ 709