1/* 2 * 3 * Copyright (C) 1991, 1992 Linus Torvalds 4 * 5 * Enhanced CPU detection and feature setting code by Mike Jagdis 6 * and Martin Mares, November 1997. 7 */ 8 9.text 10#include <linux/threads.h> 11#include <linux/init.h> 12#include <linux/linkage.h> 13#include <asm/segment.h> 14#include <asm/page.h> 15#include <asm/pgtable.h> 16#include <asm/desc.h> 17#include <asm/cache.h> 18#include <asm/thread_info.h> 19#include <asm/asm-offsets.h> 20#include <asm/setup.h> 21#include <asm/processor-flags.h> 22 23/* Physical address */ 24#define pa(X) ((X) - __PAGE_OFFSET) 25 26/* 27 * References to members of the new_cpu_data structure. 28 */ 29 30#define X86 new_cpu_data+CPUINFO_x86 31#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor 32#define X86_MODEL new_cpu_data+CPUINFO_x86_model 33#define X86_MASK new_cpu_data+CPUINFO_x86_mask 34#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math 35#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level 36#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability 37#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 38 39/* 40 * This is how much memory *in addition to the memory covered up to 41 * and including _end* we need mapped initially. 42 * We need: 43 * - one bit for each possible page, but only in low memory, which means 44 * 2^32/4096/8 = 128K worst case (4G/4G split.) 45 * - enough space to map all low memory, which means 46 * (2^32/4096) / 1024 pages (worst case, non PAE) 47 * (2^32/4096) / 512 + 4 pages (worst case for PAE) 48 * - a few pages for allocator use before the kernel pagetable has 49 * been set up 50 * 51 * Modulo rounding, each megabyte assigned here requires a kilobyte of 52 * memory, which is currently unreclaimed. 53 * 54 * This should be a multiple of a page. 55 */ 56LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) 57 58/* 59 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate 60 * pagetables from above the 16MB DMA limit, so we'll have to set 61 * up pagetables 16MB more (worst-case): 62 */ 63#ifdef CONFIG_DEBUG_PAGEALLOC 64LOW_PAGES = LOW_PAGES + 0x1000000 65#endif 66 67#if PTRS_PER_PMD > 1 68PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 69#else 70PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 71#endif 72BOOTBITMAP_SIZE = LOW_PAGES / 8 73ALLOCATOR_SLOP = 4 74 75INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 76 77/* 78 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 79 * %esi points to the real-mode code as a 32-bit pointer. 80 * CS and DS must be 4 GB flat segments, but we don't depend on 81 * any particular GDT layout, because we load our own as soon as we 82 * can. 83 */ 84.section .text.head,"ax",@progbits 85ENTRY(startup_32) 86 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 87 us to not reload segments */ 88 testb $(1<<6), BP_loadflags(%esi) 89 jnz 2f 90 91/* 92 * Set segments to known values. 93 */ 94 lgdt pa(boot_gdt_descr) 95 movl $(__BOOT_DS),%eax 96 movl %eax,%ds 97 movl %eax,%es 98 movl %eax,%fs 99 movl %eax,%gs 1002: 101 102/* 103 * Clear BSS first so that there are no surprises... 104 */ 105 cld 106 xorl %eax,%eax 107 movl $pa(__bss_start),%edi 108 movl $pa(__bss_stop),%ecx 109 subl %edi,%ecx 110 shrl $2,%ecx 111 rep ; stosl 112/* 113 * Copy bootup parameters out of the way. 114 * Note: %esi still has the pointer to the real-mode data. 115 * With the kexec as boot loader, parameter segment might be loaded beyond 116 * kernel image and might not even be addressable by early boot page tables. 117 * (kexec on panic case). Hence copy out the parameters before initializing 118 * page tables. 119 */ 120 movl $pa(boot_params),%edi 121 movl $(PARAM_SIZE/4),%ecx 122 cld 123 rep 124 movsl 125 movl pa(boot_params) + NEW_CL_POINTER,%esi 126 andl %esi,%esi 127 jz 1f # No comand line 128 movl $pa(boot_command_line),%edi 129 movl $(COMMAND_LINE_SIZE/4),%ecx 130 rep 131 movsl 1321: 133 134#ifdef CONFIG_PARAVIRT 135 /* This is can only trip for a broken bootloader... */ 136 cmpw $0x207, pa(boot_params + BP_version) 137 jb default_entry 138 139 /* Paravirt-compatible boot parameters. Look to see what architecture 140 we're booting under. */ 141 movl pa(boot_params + BP_hardware_subarch), %eax 142 cmpl $num_subarch_entries, %eax 143 jae bad_subarch 144 145 movl pa(subarch_entries)(,%eax,4), %eax 146 subl $__PAGE_OFFSET, %eax 147 jmp *%eax 148 149bad_subarch: 150WEAK(lguest_entry) 151WEAK(xen_entry) 152 /* Unknown implementation; there's really 153 nothing we can do at this point. */ 154 ud2a 155 156 __INITDATA 157 158subarch_entries: 159 .long default_entry /* normal x86/PC */ 160 .long lguest_entry /* lguest hypervisor */ 161 .long xen_entry /* Xen hypervisor */ 162num_subarch_entries = (. - subarch_entries) / 4 163.previous 164#endif /* CONFIG_PARAVIRT */ 165 166/* 167 * Initialize page tables. This creates a PDE and a set of page 168 * tables, which are located immediately beyond _end. The variable 169 * init_pg_tables_end is set up to point to the first "safe" location. 170 * Mappings are created both at virtual address 0 (identity mapping) 171 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 172 * 173 * Note that the stack is not yet set up! 174 */ 175#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ 176#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ 177#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ 178 179default_entry: 180#ifdef CONFIG_X86_PAE 181 182 /* 183 * In PAE mode swapper_pg_dir is statically defined to contain enough 184 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 185 * entries). The identity mapping is handled by pointing two PGD 186 * entries to the first kernel PMD. 187 * 188 * Note the upper half of each PMD or PTE are always zero at 189 * this stage. 190 */ 191 192#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ 193 194 xorl %ebx,%ebx /* %ebx is kept at zero */ 195 196 movl $pa(pg0), %edi 197 movl $pa(swapper_pg_pmd), %edx 198 movl $PTE_ATTR, %eax 19910: 200 leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ 201 movl %ecx,(%edx) /* Store PMD entry */ 202 /* Upper half already zero */ 203 addl $8,%edx 204 movl $512,%ecx 20511: 206 stosl 207 xchgl %eax,%ebx 208 stosl 209 xchgl %eax,%ebx 210 addl $0x1000,%eax 211 loop 11b 212 213 /* 214 * End condition: we must map up to and including INIT_MAP_BEYOND_END 215 * bytes beyond the end of our own page tables. 216 */ 217 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 218 cmpl %ebp,%eax 219 jb 10b 2201: 221 movl %edi,pa(init_pg_tables_end) 222 223 /* Do early initialization of the fixmap area */ 224 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 225 movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) 226#else /* Not PAE */ 227 228page_pde_offset = (__PAGE_OFFSET >> 20); 229 230 movl $pa(pg0), %edi 231 movl $pa(swapper_pg_dir), %edx 232 movl $PTE_ATTR, %eax 23310: 234 leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ 235 movl %ecx,(%edx) /* Store identity PDE entry */ 236 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 237 addl $4,%edx 238 movl $1024, %ecx 23911: 240 stosl 241 addl $0x1000,%eax 242 loop 11b 243 /* 244 * End condition: we must map up to and including INIT_MAP_BEYOND_END 245 * bytes beyond the end of our own page tables; the +0x007 is 246 * the attribute bits 247 */ 248 leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp 249 cmpl %ebp,%eax 250 jb 10b 251 movl %edi,pa(init_pg_tables_end) 252 253 /* Do early initialization of the fixmap area */ 254 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 255 movl %eax,pa(swapper_pg_dir+0xffc) 256#endif 257 jmp 3f 258/* 259 * Non-boot CPU entry point; entered from trampoline.S 260 * We can't lgdt here, because lgdt itself uses a data segment, but 261 * we know the trampoline has already loaded the boot_gdt for us. 262 * 263 * If cpu hotplug is not supported then this code can go in init section 264 * which will be freed later 265 */ 266 267#ifndef CONFIG_HOTPLUG_CPU 268.section .init.text,"ax",@progbits 269#endif 270 271#ifdef CONFIG_SMP 272ENTRY(startup_32_smp) 273 cld 274 movl $(__BOOT_DS),%eax 275 movl %eax,%ds 276 movl %eax,%es 277 movl %eax,%fs 278 movl %eax,%gs 279#endif /* CONFIG_SMP */ 2803: 281 282/* 283 * New page tables may be in 4Mbyte page mode and may 284 * be using the global pages. 285 * 286 * NOTE! If we are on a 486 we may have no cr4 at all! 287 * So we do not try to touch it unless we really have 288 * some bits in it to set. This won't work if the BSP 289 * implements cr4 but this AP does not -- very unlikely 290 * but be warned! The same applies to the pse feature 291 * if not equally supported. --macro 292 * 293 * NOTE! We have to correct for the fact that we're 294 * not yet offset PAGE_OFFSET.. 295 */ 296#define cr4_bits pa(mmu_cr4_features) 297 movl cr4_bits,%edx 298 andl %edx,%edx 299 jz 6f 300 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 301 orl %edx,%eax 302 movl %eax,%cr4 303 304 btl $5, %eax # check if PAE is enabled 305 jnc 6f 306 307 /* Check if extended functions are implemented */ 308 movl $0x80000000, %eax 309 cpuid 310 cmpl $0x80000000, %eax 311 jbe 6f 312 mov $0x80000001, %eax 313 cpuid 314 /* Execute Disable bit supported? */ 315 btl $20, %edx 316 jnc 6f 317 318 /* Setup EFER (Extended Feature Enable Register) */ 319 movl $0xc0000080, %ecx 320 rdmsr 321 322 btsl $11, %eax 323 /* Make changes effective */ 324 wrmsr 325 3266: 327 328/* 329 * Enable paging 330 */ 331 movl $pa(swapper_pg_dir),%eax 332 movl %eax,%cr3 /* set the page table pointer.. */ 333 movl %cr0,%eax 334 orl $X86_CR0_PG,%eax 335 movl %eax,%cr0 /* ..and set paging (PG) bit */ 336 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 3371: 338 /* Set up the stack pointer */ 339 lss stack_start,%esp 340 341/* 342 * Initialize eflags. Some BIOS's leave bits like NT set. This would 343 * confuse the debugger if this code is traced. 344 * XXX - best to initialize before switching to protected mode. 345 */ 346 pushl $0 347 popfl 348 349#ifdef CONFIG_SMP 350 cmpb $0, ready 351 jz 1f /* Initial CPU cleans BSS */ 352 jmp checkCPUtype 3531: 354#endif /* CONFIG_SMP */ 355 356/* 357 * start system 32-bit setup. We need to re-do some of the things done 358 * in 16-bit mode for the "real" operations. 359 */ 360 call setup_idt 361 362checkCPUtype: 363 364 movl $-1,X86_CPUID # -1 for no CPUID initially 365 366/* check if it is 486 or 386. */ 367/* 368 * XXX - this does a lot of unnecessary setup. Alignment checks don't 369 * apply at our cpl of 0 and the stack ought to be aligned already, and 370 * we don't need to preserve eflags. 371 */ 372 373 movb $3,X86 # at least 386 374 pushfl # push EFLAGS 375 popl %eax # get EFLAGS 376 movl %eax,%ecx # save original EFLAGS 377 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS 378 pushl %eax # copy to EFLAGS 379 popfl # set EFLAGS 380 pushfl # get new EFLAGS 381 popl %eax # put it in eax 382 xorl %ecx,%eax # change in flags 383 pushl %ecx # restore original EFLAGS 384 popfl 385 testl $0x40000,%eax # check if AC bit changed 386 je is386 387 388 movb $4,X86 # at least 486 389 testl $0x200000,%eax # check if ID bit changed 390 je is486 391 392 /* get vendor info */ 393 xorl %eax,%eax # call CPUID with 0 -> return vendor ID 394 cpuid 395 movl %eax,X86_CPUID # save CPUID level 396 movl %ebx,X86_VENDOR_ID # lo 4 chars 397 movl %edx,X86_VENDOR_ID+4 # next 4 chars 398 movl %ecx,X86_VENDOR_ID+8 # last 4 chars 399 400 orl %eax,%eax # do we have processor info as well? 401 je is486 402 403 movl $1,%eax # Use the CPUID instruction to get CPU type 404 cpuid 405 movb %al,%cl # save reg for future use 406 andb $0x0f,%ah # mask processor family 407 movb %ah,X86 408 andb $0xf0,%al # mask model 409 shrb $4,%al 410 movb %al,X86_MODEL 411 andb $0x0f,%cl # mask mask revision 412 movb %cl,X86_MASK 413 movl %edx,X86_CAPABILITY 414 415is486: movl $0x50022,%ecx # set AM, WP, NE and MP 416 jmp 2f 417 418is386: movl $2,%ecx # set MP 4192: movl %cr0,%eax 420 andl $0x80000011,%eax # Save PG,PE,ET 421 orl %ecx,%eax 422 movl %eax,%cr0 423 424 call check_x87 425 lgdt early_gdt_descr 426 lidt idt_descr 427 ljmp $(__KERNEL_CS),$1f 4281: movl $(__KERNEL_DS),%eax # reload all the segment registers 429 movl %eax,%ss # after changing gdt. 430 movl %eax,%fs # gets reset once there's real percpu 431 432 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl %eax,%ds 434 movl %eax,%es 435 436 xorl %eax,%eax # Clear GS and LDT 437 movl %eax,%gs 438 lldt %ax 439 440 cld # gcc2 wants the direction flag cleared at all times 441 pushl $0 # fake return address for unwinder 442#ifdef CONFIG_SMP 443 movb ready, %cl 444 movb $1, ready 445 cmpb $0,%cl # the first CPU calls start_kernel 446 je 1f 447 movl $(__KERNEL_PERCPU), %eax 448 movl %eax,%fs # set this cpu's percpu 449 jmp initialize_secondary # all other CPUs call initialize_secondary 4501: 451#endif /* CONFIG_SMP */ 452 jmp i386_start_kernel 453 454/* 455 * We depend on ET to be correct. This checks for 287/387. 456 */ 457check_x87: 458 movb $0,X86_HARD_MATH 459 clts 460 fninit 461 fstsw %ax 462 cmpb $0,%al 463 je 1f 464 movl %cr0,%eax /* no coprocessor: have to set bits */ 465 xorl $4,%eax /* set EM */ 466 movl %eax,%cr0 467 ret 468 ALIGN 4691: movb $1,X86_HARD_MATH 470 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 471 ret 472 473/* 474 * setup_idt 475 * 476 * sets up a idt with 256 entries pointing to 477 * ignore_int, interrupt gates. It doesn't actually load 478 * idt - that can be done only after paging has been enabled 479 * and the kernel moved to PAGE_OFFSET. Interrupts 480 * are enabled elsewhere, when we can be relatively 481 * sure everything is ok. 482 * 483 * Warning: %esi is live across this function. 484 */ 485setup_idt: 486 lea ignore_int,%edx 487 movl $(__KERNEL_CS << 16),%eax 488 movw %dx,%ax /* selector = 0x0010 = cs */ 489 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 490 491 lea idt_table,%edi 492 mov $256,%ecx 493rp_sidt: 494 movl %eax,(%edi) 495 movl %edx,4(%edi) 496 addl $8,%edi 497 dec %ecx 498 jne rp_sidt 499 500.macro set_early_handler handler,trapno 501 lea \handler,%edx 502 movl $(__KERNEL_CS << 16),%eax 503 movw %dx,%ax 504 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 505 lea idt_table,%edi 506 movl %eax,8*\trapno(%edi) 507 movl %edx,8*\trapno+4(%edi) 508.endm 509 510 set_early_handler handler=early_divide_err,trapno=0 511 set_early_handler handler=early_illegal_opcode,trapno=6 512 set_early_handler handler=early_protection_fault,trapno=13 513 set_early_handler handler=early_page_fault,trapno=14 514 515 ret 516 517early_divide_err: 518 xor %edx,%edx 519 pushl $0 /* fake errcode */ 520 jmp early_fault 521 522early_illegal_opcode: 523 movl $6,%edx 524 pushl $0 /* fake errcode */ 525 jmp early_fault 526 527early_protection_fault: 528 movl $13,%edx 529 jmp early_fault 530 531early_page_fault: 532 movl $14,%edx 533 jmp early_fault 534 535early_fault: 536 cld 537#ifdef CONFIG_PRINTK 538 pusha 539 movl $(__KERNEL_DS),%eax 540 movl %eax,%ds 541 movl %eax,%es 542 cmpl $2,early_recursion_flag 543 je hlt_loop 544 incl early_recursion_flag 545 movl %cr2,%eax 546 pushl %eax 547 pushl %edx /* trapno */ 548 pushl $fault_msg 549#ifdef CONFIG_EARLY_PRINTK 550 call early_printk 551#else 552 call printk 553#endif 554#endif 555 call dump_stack 556hlt_loop: 557 hlt 558 jmp hlt_loop 559 560/* This is the default interrupt "handler" :-) */ 561 ALIGN 562ignore_int: 563 cld 564#ifdef CONFIG_PRINTK 565 pushl %eax 566 pushl %ecx 567 pushl %edx 568 pushl %es 569 pushl %ds 570 movl $(__KERNEL_DS),%eax 571 movl %eax,%ds 572 movl %eax,%es 573 cmpl $2,early_recursion_flag 574 je hlt_loop 575 incl early_recursion_flag 576 pushl 16(%esp) 577 pushl 24(%esp) 578 pushl 32(%esp) 579 pushl 40(%esp) 580 pushl $int_msg 581#ifdef CONFIG_EARLY_PRINTK 582 call early_printk 583#else 584 call printk 585#endif 586 addl $(5*4),%esp 587 popl %ds 588 popl %es 589 popl %edx 590 popl %ecx 591 popl %eax 592#endif 593 iret 594 595.section .text 596/* 597 * Real beginning of normal "text" segment 598 */ 599ENTRY(stext) 600ENTRY(_stext) 601 602/* 603 * BSS section 604 */ 605.section ".bss.page_aligned","wa" 606 .align PAGE_SIZE_asm 607#ifdef CONFIG_X86_PAE 608swapper_pg_pmd: 609 .fill 1024*KPMDS,4,0 610#else 611ENTRY(swapper_pg_dir) 612 .fill 1024,4,0 613#endif 614swapper_pg_fixmap: 615 .fill 1024,4,0 616ENTRY(empty_zero_page) 617 .fill 4096,1,0 618/* 619 * This starts the data section. 620 */ 621#ifdef CONFIG_X86_PAE 622.section ".data.page_aligned","wa" 623 /* Page-aligned for the benefit of paravirt? */ 624 .align PAGE_SIZE_asm 625ENTRY(swapper_pg_dir) 626 .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ 627# if KPMDS == 3 628 .long pa(swapper_pg_pmd+PGD_ATTR),0 629 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 630 .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 631# elif KPMDS == 2 632 .long 0,0 633 .long pa(swapper_pg_pmd+PGD_ATTR),0 634 .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 635# elif KPMDS == 1 636 .long 0,0 637 .long 0,0 638 .long pa(swapper_pg_pmd+PGD_ATTR),0 639# else 640# error "Kernel PMDs should be 1, 2 or 3" 641# endif 642 .align PAGE_SIZE_asm /* needs to be page-sized too */ 643#endif 644 645.data 646ENTRY(stack_start) 647 .long init_thread_union+THREAD_SIZE 648 .long __BOOT_DS 649 650ready: .byte 0 651 652early_recursion_flag: 653 .long 0 654 655int_msg: 656 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 657 658fault_msg: 659/* fault info: */ 660 .ascii "BUG: Int %d: CR2 %p\n" 661/* pusha regs: */ 662 .ascii " EDI %p ESI %p EBP %p ESP %p\n" 663 .ascii " EBX %p EDX %p ECX %p EAX %p\n" 664/* fault frame: */ 665 .ascii " err %p EIP %p CS %p flg %p\n" 666 .ascii "Stack: %p %p %p %p %p %p %p %p\n" 667 .ascii " %p %p %p %p %p %p %p %p\n" 668 .asciz " %p %p %p %p %p %p %p %p\n" 669 670#include "../../x86/xen/xen-head.S" 671 672/* 673 * The IDT and GDT 'descriptors' are a strange 48-bit object 674 * only used by the lidt and lgdt instructions. They are not 675 * like usual segment descriptors - they consist of a 16-bit 676 * segment size, and 32-bit linear address value: 677 */ 678 679.globl boot_gdt_descr 680.globl idt_descr 681 682 ALIGN 683# early boot GDT descriptor (must use 1:1 address mapping) 684 .word 0 # 32 bit align gdt_desc.address 685boot_gdt_descr: 686 .word __BOOT_DS+7 687 .long boot_gdt - __PAGE_OFFSET 688 689 .word 0 # 32-bit align idt_desc.address 690idt_descr: 691 .word IDT_ENTRIES*8-1 # idt contains 256 entries 692 .long idt_table 693 694# boot GDT descriptor (later on used by CPU#0): 695 .word 0 # 32 bit align gdt_desc.address 696ENTRY(early_gdt_descr) 697 .word GDT_ENTRIES*8-1 698 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 699 700/* 701 * The boot_gdt must mirror the equivalent in setup.S and is 702 * used only for booting. 703 */ 704 .align L1_CACHE_BYTES 705ENTRY(boot_gdt) 706 .fill GDT_ENTRY_BOOT_CS,8,0 707 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ 708 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ 709