xref: /openbmc/linux/arch/x86/kernel/head_64.S (revision 250c2277)
1/*
2 *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
3 *
4 *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
9 */
10
11
12#include <linux/linkage.h>
13#include <linux/threads.h>
14#include <linux/init.h>
15#include <asm/desc.h>
16#include <asm/segment.h>
17#include <asm/pgtable.h>
18#include <asm/page.h>
19#include <asm/msr.h>
20#include <asm/cache.h>
21
22/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
23 * because we need identity-mapped pages.
24 *
25 */
26
27	.text
28	.section .text.head
29	.code64
30	.globl startup_64
31startup_64:
32
33	/*
34	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
35	 * and someone has loaded an identity mapped page table
36	 * for us.  These identity mapped page tables map all of the
37	 * kernel pages and possibly all of memory.
38	 *
39	 * %esi holds a physical pointer to real_mode_data.
40	 *
41	 * We come here either directly from a 64bit bootloader, or from
42	 * arch/x86_64/boot/compressed/head.S.
43	 *
44	 * We only come here initially at boot nothing else comes here.
45	 *
46	 * Since we may be loaded at an address different from what we were
47	 * compiled to run at we first fixup the physical addresses in our page
48	 * tables and then reload them.
49	 */
50
51	/* Compute the delta between the address I am compiled to run at and the
52	 * address I am actually running at.
53	 */
54	leaq	_text(%rip), %rbp
55	subq	$_text - __START_KERNEL_map, %rbp
56
57	/* Is the address not 2M aligned? */
58	movq	%rbp, %rax
59	andl	$~LARGE_PAGE_MASK, %eax
60	testl	%eax, %eax
61	jnz	bad_address
62
63	/* Is the address too large? */
64	leaq	_text(%rip), %rdx
65	movq	$PGDIR_SIZE, %rax
66	cmpq	%rax, %rdx
67	jae	bad_address
68
69	/* Fixup the physical addresses in the page table
70	 */
71	addq	%rbp, init_level4_pgt + 0(%rip)
72	addq	%rbp, init_level4_pgt + (258*8)(%rip)
73	addq	%rbp, init_level4_pgt + (511*8)(%rip)
74
75	addq	%rbp, level3_ident_pgt + 0(%rip)
76
77	addq	%rbp, level3_kernel_pgt + (510*8)(%rip)
78	addq	%rbp, level3_kernel_pgt + (511*8)(%rip)
79
80	addq	%rbp, level2_fixmap_pgt + (506*8)(%rip)
81
82	/* Add an Identity mapping if I am above 1G */
83	leaq	_text(%rip), %rdi
84	andq	$LARGE_PAGE_MASK, %rdi
85
86	movq	%rdi, %rax
87	shrq	$PUD_SHIFT, %rax
88	andq	$(PTRS_PER_PUD - 1), %rax
89	jz	ident_complete
90
91	leaq	(level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
92	leaq	level3_ident_pgt(%rip), %rbx
93	movq	%rdx, 0(%rbx, %rax, 8)
94
95	movq	%rdi, %rax
96	shrq	$PMD_SHIFT, %rax
97	andq	$(PTRS_PER_PMD - 1), %rax
98	leaq	__PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx
99	leaq	level2_spare_pgt(%rip), %rbx
100	movq	%rdx, 0(%rbx, %rax, 8)
101ident_complete:
102
103	/* Fixup the kernel text+data virtual addresses
104	 */
105	leaq	level2_kernel_pgt(%rip), %rdi
106	leaq	4096(%rdi), %r8
107	/* See if it is a valid page table entry */
1081:	testq	$1, 0(%rdi)
109	jz	2f
110	addq	%rbp, 0(%rdi)
111	/* Go to the next page */
1122:	addq	$8, %rdi
113	cmp	%r8, %rdi
114	jne	1b
115
116	/* Fixup phys_base */
117	addq	%rbp, phys_base(%rip)
118
119#ifdef CONFIG_SMP
120	addq	%rbp, trampoline_level4_pgt + 0(%rip)
121	addq	%rbp, trampoline_level4_pgt + (511*8)(%rip)
122#endif
123#ifdef CONFIG_ACPI_SLEEP
124	addq	%rbp, wakeup_level4_pgt + 0(%rip)
125	addq	%rbp, wakeup_level4_pgt + (511*8)(%rip)
126#endif
127
128	/* Due to ENTRY(), sometimes the empty space gets filled with
129	 * zeros. Better take a jmp than relying on empty space being
130	 * filled with 0x90 (nop)
131	 */
132	jmp secondary_startup_64
133ENTRY(secondary_startup_64)
134	/*
135	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
136	 * and someone has loaded a mapped page table.
137	 *
138	 * %esi holds a physical pointer to real_mode_data.
139	 *
140	 * We come here either from startup_64 (using physical addresses)
141	 * or from trampoline.S (using virtual addresses).
142	 *
143	 * Using virtual addresses from trampoline.S removes the need
144	 * to have any identity mapped pages in the kernel page table
145	 * after the boot processor executes this code.
146	 */
147
148	/* Enable PAE mode and PGE */
149	xorq	%rax, %rax
150	btsq	$5, %rax
151	btsq	$7, %rax
152	movq	%rax, %cr4
153
154	/* Setup early boot stage 4 level pagetables. */
155	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
156	addq	phys_base(%rip), %rax
157	movq	%rax, %cr3
158
159	/* Ensure I am executing from virtual addresses */
160	movq	$1f, %rax
161	jmp	*%rax
1621:
163
164	/* Check if nx is implemented */
165	movl	$0x80000001, %eax
166	cpuid
167	movl	%edx,%edi
168
169	/* Setup EFER (Extended Feature Enable Register) */
170	movl	$MSR_EFER, %ecx
171	rdmsr
172	btsl	$_EFER_SCE, %eax	/* Enable System Call */
173	btl	$20,%edi		/* No Execute supported? */
174	jnc     1f
175	btsl	$_EFER_NX, %eax
1761:	wrmsr				/* Make changes effective */
177
178	/* Setup cr0 */
179#define CR0_PM				1		/* protected mode */
180#define CR0_MP				(1<<1)
181#define CR0_ET				(1<<4)
182#define CR0_NE				(1<<5)
183#define CR0_WP				(1<<16)
184#define CR0_AM				(1<<18)
185#define CR0_PAGING 			(1<<31)
186	movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
187	/* Make changes effective */
188	movq	%rax, %cr0
189
190	/* Setup a boot time stack */
191	movq init_rsp(%rip),%rsp
192
193	/* zero EFLAGS after setting rsp */
194	pushq $0
195	popfq
196
197	/*
198	 * We must switch to a new descriptor in kernel space for the GDT
199	 * because soon the kernel won't have access anymore to the userspace
200	 * addresses where we're currently running on. We have to do that here
201	 * because in 32bit we couldn't load a 64bit linear address.
202	 */
203	lgdt	cpu_gdt_descr(%rip)
204
205	/* set up data segments. actually 0 would do too */
206	movl $__KERNEL_DS,%eax
207	movl %eax,%ds
208	movl %eax,%ss
209	movl %eax,%es
210
211	/*
212	 * We don't really need to load %fs or %gs, but load them anyway
213	 * to kill any stale realmode selectors.  This allows execution
214	 * under VT hardware.
215	 */
216	movl %eax,%fs
217	movl %eax,%gs
218
219	/*
220	 * Setup up a dummy PDA. this is just for some early bootup code
221	 * that does in_interrupt()
222	 */
223	movl	$MSR_GS_BASE,%ecx
224	movq	$empty_zero_page,%rax
225	movq    %rax,%rdx
226	shrq	$32,%rdx
227	wrmsr
228
229	/* esi is pointer to real mode structure with interesting info.
230	   pass it to C */
231	movl	%esi, %edi
232
233	/* Finally jump to run C code and to be on real kernel address
234	 * Since we are running on identity-mapped space we have to jump
235	 * to the full 64bit address, this is only possible as indirect
236	 * jump.  In addition we need to ensure %cs is set so we make this
237	 * a far return.
238	 */
239	movq	initial_code(%rip),%rax
240	pushq	$0		# fake return address to stop unwinder
241	pushq	$__KERNEL_CS	# set correct cs
242	pushq	%rax		# target address in negative space
243	lretq
244
245	/* SMP bootup changes these two */
246#ifndef CONFIG_HOTPLUG_CPU
247	.pushsection .init.data
248#endif
249	.align	8
250	.globl	initial_code
251initial_code:
252	.quad	x86_64_start_kernel
253#ifndef CONFIG_HOTPLUG_CPU
254	.popsection
255#endif
256	.globl init_rsp
257init_rsp:
258	.quad  init_thread_union+THREAD_SIZE-8
259
260bad_address:
261	jmp bad_address
262
263ENTRY(early_idt_handler)
264	cmpl $2,early_recursion_flag(%rip)
265	jz  1f
266	incl early_recursion_flag(%rip)
267	xorl %eax,%eax
268	movq 8(%rsp),%rsi	# get rip
269	movq (%rsp),%rdx
270	movq %cr2,%rcx
271	leaq early_idt_msg(%rip),%rdi
272	call early_printk
273	cmpl $2,early_recursion_flag(%rip)
274	jz  1f
275	call dump_stack
276#ifdef CONFIG_KALLSYMS
277	leaq early_idt_ripmsg(%rip),%rdi
278	movq 8(%rsp),%rsi	# get rip again
279	call __print_symbol
280#endif
2811:	hlt
282	jmp 1b
283early_recursion_flag:
284	.long 0
285
286early_idt_msg:
287	.asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
288early_idt_ripmsg:
289	.asciz "RIP %s\n"
290
291.balign PAGE_SIZE
292
293#define NEXT_PAGE(name) \
294	.balign	PAGE_SIZE; \
295ENTRY(name)
296
297/* Automate the creation of 1 to 1 mapping pmd entries */
298#define PMDS(START, PERM, COUNT)		\
299	i = 0 ;					\
300	.rept (COUNT) ;				\
301	.quad	(START) + (i << 21) + (PERM) ;	\
302	i = i + 1 ;				\
303	.endr
304
305	/*
306	 * This default setting generates an ident mapping at address 0x100000
307	 * and a mapping for the kernel that precisely maps virtual address
308	 * 0xffffffff80000000 to physical address 0x000000. (always using
309	 * 2Mbyte large pages provided by PAE mode)
310	 */
311NEXT_PAGE(init_level4_pgt)
312	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
313	.fill	257,8,0
314	.quad	level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
315	.fill	252,8,0
316	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
317	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
318
319NEXT_PAGE(level3_ident_pgt)
320	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
321	.fill	511,8,0
322
323NEXT_PAGE(level3_kernel_pgt)
324	.fill	510,8,0
325	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
326	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
327	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
328
329NEXT_PAGE(level2_fixmap_pgt)
330	.fill	506,8,0
331	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
332	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
333	.fill	5,8,0
334
335NEXT_PAGE(level1_fixmap_pgt)
336	.fill	512,8,0
337
338NEXT_PAGE(level2_ident_pgt)
339	/* Since I easily can, map the first 1G.
340	 * Don't set NX because code runs from these pages.
341	 */
342	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
343
344NEXT_PAGE(level2_kernel_pgt)
345	/* 40MB kernel mapping. The kernel code cannot be bigger than that.
346	   When you change this change KERNEL_TEXT_SIZE in page.h too. */
347	/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
348	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
349	/* Module mapping starts here */
350	.fill	(PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
351
352NEXT_PAGE(level2_spare_pgt)
353	.fill   512,8,0
354
355#undef PMDS
356#undef NEXT_PAGE
357
358	.data
359	.align 16
360	.globl cpu_gdt_descr
361cpu_gdt_descr:
362	.word	gdt_end-cpu_gdt_table-1
363gdt:
364	.quad	cpu_gdt_table
365#ifdef CONFIG_SMP
366	.rept	NR_CPUS-1
367	.word	0
368	.quad	0
369	.endr
370#endif
371
372ENTRY(phys_base)
373	/* This must match the first entry in level2_kernel_pgt */
374	.quad   0x0000000000000000
375
376/* We need valid kernel segments for data and code in long mode too
377 * IRET will check the segment types  kkeil 2000/10/28
378 * Also sysret mandates a special GDT layout
379 */
380
381	.section .data.page_aligned, "aw"
382	.align PAGE_SIZE
383
384/* The TLS descriptors are currently at a different place compared to i386.
385   Hopefully nobody expects them at a fixed place (Wine?) */
386
387ENTRY(cpu_gdt_table)
388	.quad	0x0000000000000000	/* NULL descriptor */
389	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
390	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
391	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
392	.quad	0x00cffb000000ffff	/* __USER32_CS */
393	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
394	.quad	0x00affb000000ffff	/* __USER_CS */
395	.quad	0x0			/* unused */
396	.quad	0,0			/* TSS */
397	.quad	0,0			/* LDT */
398	.quad   0,0,0			/* three TLS descriptors */
399	.quad	0x0000f40000000000	/* node/CPU stored in limit */
400gdt_end:
401	/* asm/segment.h:GDT_ENTRIES must match this */
402	/* This should be a multiple of the cache line size */
403	/* GDTs of other CPUs are now dynamically allocated */
404
405	/* zero the remaining page */
406	.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
407
408	.section .bss, "aw", @nobits
409	.align L1_CACHE_BYTES
410ENTRY(idt_table)
411	.skip 256 * 16
412
413	.section .bss.page_aligned, "aw", @nobits
414	.align PAGE_SIZE
415ENTRY(empty_zero_page)
416	.skip PAGE_SIZE
417