1/*
2 * trampoline.S: Jump start slave processors on sparc64.
3 *
4 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
5 */
6
7#include <linux/init.h>
8
9#include <asm/head.h>
10#include <asm/asi.h>
11#include <asm/lsu.h>
12#include <asm/dcr.h>
13#include <asm/dcu.h>
14#include <asm/pstate.h>
15#include <asm/page.h>
16#include <asm/pgtable.h>
17#include <asm/spitfire.h>
18#include <asm/processor.h>
19#include <asm/thread_info.h>
20#include <asm/mmu.h>
21#include <asm/hypervisor.h>
22#include <asm/cpudata.h>
23
24	.data
25	.align	8
26call_method:
27	.asciz	"call-method"
28	.align	8
29itlb_load:
30	.asciz	"SUNW,itlb-load"
31	.align	8
32dtlb_load:
33	.asciz	"SUNW,dtlb-load"
34
35#define TRAMP_STACK_SIZE	1024
36	.align	16
37tramp_stack:
38	.skip	TRAMP_STACK_SIZE
39
40	.align		8
41	.globl		sparc64_cpu_startup, sparc64_cpu_startup_end
42sparc64_cpu_startup:
43	BRANCH_IF_SUN4V(g1, niagara_startup)
44	BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
45	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
46
47	ba,pt	%xcc, spitfire_startup
48	 nop
49
50cheetah_plus_startup:
51	/* Preserve OBP chosen DCU and DCR register settings.  */
52	ba,pt	%xcc, cheetah_generic_startup
53	 nop
54
55cheetah_startup:
56	mov	DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
57	wr	%g1, %asr18
58
59	sethi	%uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
60	or	%g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
61	sllx	%g5, 32, %g5
62	or	%g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
63	stxa	%g5, [%g0] ASI_DCU_CONTROL_REG
64	membar	#Sync
65	/* fallthru */
66
67cheetah_generic_startup:
68	mov	TSB_EXTENSION_P, %g3
69	stxa	%g0, [%g3] ASI_DMMU
70	stxa	%g0, [%g3] ASI_IMMU
71	membar	#Sync
72
73	mov	TSB_EXTENSION_S, %g3
74	stxa	%g0, [%g3] ASI_DMMU
75	membar	#Sync
76
77	mov	TSB_EXTENSION_N, %g3
78	stxa	%g0, [%g3] ASI_DMMU
79	stxa	%g0, [%g3] ASI_IMMU
80	membar	#Sync
81	/* fallthru */
82
83niagara_startup:
84	/* Disable STICK_INT interrupts. */
85	sethi		%hi(0x80000000), %g5
86	sllx		%g5, 32, %g5
87	wr		%g5, %asr25
88
89	ba,pt		%xcc, startup_continue
90	 nop
91
92spitfire_startup:
93	mov		(LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1
94	stxa		%g1, [%g0] ASI_LSU_CONTROL
95	membar		#Sync
96
97startup_continue:
98	mov		%o0, %l0
99	BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
100
101	sethi		%hi(0x80000000), %g2
102	sllx		%g2, 32, %g2
103	wr		%g2, 0, %tick_cmpr
104
105	/* Call OBP by hand to lock KERNBASE into i/d tlbs.
106	 * We lock 'num_kernel_image_mappings' consequetive entries.
107	 */
108	sethi		%hi(prom_entry_lock), %g2
1091:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
110	brnz,pn		%g1, 1b
111	 nop
112
113	sethi		%hi(p1275buf), %g2
114	or		%g2, %lo(p1275buf), %g2
115	ldx		[%g2 + 0x10], %l2
116	add		%l2, -(192 + 128), %sp
117	flushw
118
119	/* Setup the loop variables:
120	 * %l3: VADDR base
121	 * %l4: TTE base
122	 * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings'
123	 * %l6: Number of TTE entries to map
124	 * %l7: Highest TTE entry number, we count down
125	 */
126	sethi		%hi(KERNBASE), %l3
127	sethi		%hi(kern_locked_tte_data), %l4
128	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
129	clr		%l5
130	sethi		%hi(num_kernel_image_mappings), %l6
131	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
132
133	mov		15, %l7
134	BRANCH_IF_ANY_CHEETAH(g1,g5,2f)
135
136	mov		63, %l7
1372:
138
1393:
140	/* Lock into I-MMU */
141	sethi		%hi(call_method), %g2
142	or		%g2, %lo(call_method), %g2
143	stx		%g2, [%sp + 2047 + 128 + 0x00]
144	mov		5, %g2
145	stx		%g2, [%sp + 2047 + 128 + 0x08]
146	mov		1, %g2
147	stx		%g2, [%sp + 2047 + 128 + 0x10]
148	sethi		%hi(itlb_load), %g2
149	or		%g2, %lo(itlb_load), %g2
150	stx		%g2, [%sp + 2047 + 128 + 0x18]
151	sethi		%hi(prom_mmu_ihandle_cache), %g2
152	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
153	stx		%g2, [%sp + 2047 + 128 + 0x20]
154
155	/* Each TTE maps 4MB, convert index to offset.  */
156	sllx		%l5, 22, %g1
157
158	add		%l3, %g1, %g2
159	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
160	add		%l4, %g1, %g2
161	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
162
163	/* TTE index is highest minus loop index.  */
164	sub		%l7, %l5, %g2
165	stx		%g2, [%sp + 2047 + 128 + 0x38]
166
167	sethi		%hi(p1275buf), %g2
168	or		%g2, %lo(p1275buf), %g2
169	ldx		[%g2 + 0x08], %o1
170	call		%o1
171	 add		%sp, (2047 + 128), %o0
172
173	/* Lock into D-MMU */
174	sethi		%hi(call_method), %g2
175	or		%g2, %lo(call_method), %g2
176	stx		%g2, [%sp + 2047 + 128 + 0x00]
177	mov		5, %g2
178	stx		%g2, [%sp + 2047 + 128 + 0x08]
179	mov		1, %g2
180	stx		%g2, [%sp + 2047 + 128 + 0x10]
181	sethi		%hi(dtlb_load), %g2
182	or		%g2, %lo(dtlb_load), %g2
183	stx		%g2, [%sp + 2047 + 128 + 0x18]
184	sethi		%hi(prom_mmu_ihandle_cache), %g2
185	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
186	stx		%g2, [%sp + 2047 + 128 + 0x20]
187
188	/* Each TTE maps 4MB, convert index to offset.  */
189	sllx		%l5, 22, %g1
190
191	add		%l3, %g1, %g2
192	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
193	add		%l4, %g1, %g2
194	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
195
196	/* TTE index is highest minus loop index.  */
197	sub		%l7, %l5, %g2
198	stx		%g2, [%sp + 2047 + 128 + 0x38]
199
200	sethi		%hi(p1275buf), %g2
201	or		%g2, %lo(p1275buf), %g2
202	ldx		[%g2 + 0x08], %o1
203	call		%o1
204	 add		%sp, (2047 + 128), %o0
205
206	add		%l5, 1, %l5
207	cmp		%l5, %l6
208	bne,pt		%xcc, 3b
209	 nop
210
211	sethi		%hi(prom_entry_lock), %g2
212	stb		%g0, [%g2 + %lo(prom_entry_lock)]
213
214	ba,pt		%xcc, after_lock_tlb
215	 nop
216
217niagara_lock_tlb:
218	sethi		%hi(KERNBASE), %l3
219	sethi		%hi(kern_locked_tte_data), %l4
220	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
221	clr		%l5
222	sethi		%hi(num_kernel_image_mappings), %l6
223	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
224
2251:
226	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
227	sllx		%l5, 22, %g2
228	add		%l3, %g2, %o0
229	clr		%o1
230	add		%l4, %g2, %o2
231	mov		HV_MMU_IMMU, %o3
232	ta		HV_FAST_TRAP
233
234	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
235	sllx		%l5, 22, %g2
236	add		%l3, %g2, %o0
237	clr		%o1
238	add		%l4, %g2, %o2
239	mov		HV_MMU_DMMU, %o3
240	ta		HV_FAST_TRAP
241
242	add		%l5, 1, %l5
243	cmp		%l5, %l6
244	bne,pt		%xcc, 1b
245	 nop
246
247after_lock_tlb:
248	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
249	wr		%g0, 0, %fprs
250
251	wr		%g0, ASI_P, %asi
252
253	mov		PRIMARY_CONTEXT, %g7
254
255661:	stxa		%g0, [%g7] ASI_DMMU
256	.section	.sun4v_1insn_patch, "ax"
257	.word		661b
258	stxa		%g0, [%g7] ASI_MMU
259	.previous
260
261	membar		#Sync
262	mov		SECONDARY_CONTEXT, %g7
263
264661:	stxa		%g0, [%g7] ASI_DMMU
265	.section	.sun4v_1insn_patch, "ax"
266	.word		661b
267	stxa		%g0, [%g7] ASI_MMU
268	.previous
269
270	membar		#Sync
271
272	/* Everything we do here, until we properly take over the
273	 * trap table, must be done with extreme care.  We cannot
274	 * make any references to %g6 (current thread pointer),
275	 * %g4 (current task pointer), or %g5 (base of current cpu's
276	 * per-cpu area) until we properly take over the trap table
277	 * from the firmware and hypervisor.
278	 *
279	 * Get onto temporary stack which is in the locked kernel image.
280	 */
281	sethi		%hi(tramp_stack), %g1
282	or		%g1, %lo(tramp_stack), %g1
283	add		%g1, TRAMP_STACK_SIZE, %g1
284	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
285	mov		0, %fp
286
287	/* Put garbage in these registers to trap any access to them.  */
288	set		0xdeadbeef, %g4
289	set		0xdeadbeef, %g5
290	set		0xdeadbeef, %g6
291
292	call		init_irqwork_curcpu
293	 nop
294
295	sethi		%hi(tlb_type), %g3
296	lduw		[%g3 + %lo(tlb_type)], %g2
297	cmp		%g2, 3
298	bne,pt		%icc, 1f
299	 nop
300
301	call		hard_smp_processor_id
302	 nop
303
304	call		sun4v_register_mondo_queues
305	 nop
306
3071:	call		init_cur_cpu_trap
308	 ldx		[%l0], %o0
309
310	/* Start using proper page size encodings in ctx register.  */
311	sethi		%hi(sparc64_kern_pri_context), %g3
312	ldx		[%g3 + %lo(sparc64_kern_pri_context)], %g2
313	mov		PRIMARY_CONTEXT, %g1
314
315661:	stxa		%g2, [%g1] ASI_DMMU
316	.section	.sun4v_1insn_patch, "ax"
317	.word		661b
318	stxa		%g2, [%g1] ASI_MMU
319	.previous
320
321	membar		#Sync
322
323	wrpr		%g0, 0, %wstate
324
325	sethi		%hi(prom_entry_lock), %g2
3261:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
327	brnz,pn		%g1, 1b
328	 nop
329
330	/* As a hack, put &init_thread_union into %g6.
331	 * prom_world() loads from here to restore the %asi
332	 * register.
333	 */
334	sethi		%hi(init_thread_union), %g6
335	or		%g6, %lo(init_thread_union), %g6
336
337	sethi		%hi(is_sun4v), %o0
338	lduw		[%o0 + %lo(is_sun4v)], %o0
339	brz,pt		%o0, 2f
340	 nop
341
342	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
343	add		%g2, TRAP_PER_CPU_FAULT_INFO, %g2
344	stxa		%g2, [%g0] ASI_SCRATCHPAD
345
346	/* Compute physical address:
347	 *
348	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
349	 */
350	sethi		%hi(KERNBASE), %g3
351	sub		%g2, %g3, %g2
352	sethi		%hi(kern_base), %g3
353	ldx		[%g3 + %lo(kern_base)], %g3
354	add		%g2, %g3, %o1
355	sethi		%hi(sparc64_ttable_tl0), %o0
356
357	set		prom_set_trap_table_name, %g2
358	stx		%g2, [%sp + 2047 + 128 + 0x00]
359	mov		2, %g2
360	stx		%g2, [%sp + 2047 + 128 + 0x08]
361	mov		0, %g2
362	stx		%g2, [%sp + 2047 + 128 + 0x10]
363	stx		%o0, [%sp + 2047 + 128 + 0x18]
364	stx		%o1, [%sp + 2047 + 128 + 0x20]
365	sethi		%hi(p1275buf), %g2
366	or		%g2, %lo(p1275buf), %g2
367	ldx		[%g2 + 0x08], %o1
368	call		%o1
369	 add		%sp, (2047 + 128), %o0
370
371	ba,pt		%xcc, 3f
372	 nop
373
3742:	sethi		%hi(sparc64_ttable_tl0), %o0
375	set		prom_set_trap_table_name, %g2
376	stx		%g2, [%sp + 2047 + 128 + 0x00]
377	mov		1, %g2
378	stx		%g2, [%sp + 2047 + 128 + 0x08]
379	mov		0, %g2
380	stx		%g2, [%sp + 2047 + 128 + 0x10]
381	stx		%o0, [%sp + 2047 + 128 + 0x18]
382	sethi		%hi(p1275buf), %g2
383	or		%g2, %lo(p1275buf), %g2
384	ldx		[%g2 + 0x08], %o1
385	call		%o1
386	 add		%sp, (2047 + 128), %o0
387
3883:	sethi		%hi(prom_entry_lock), %g2
389	stb		%g0, [%g2 + %lo(prom_entry_lock)]
390
391	ldx		[%l0], %g6
392	ldx		[%g6 + TI_TASK], %g4
393
394	mov		1, %g5
395	sllx		%g5, THREAD_SHIFT, %g5
396	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
397	add		%g6, %g5, %sp
398	mov		0, %fp
399
400	rdpr		%pstate, %o1
401	or		%o1, PSTATE_IE, %o1
402	wrpr		%o1, 0, %pstate
403
404	call		smp_callin
405	 nop
406
407	call		cpu_panic
408	 nop
4091:	b,a,pt		%xcc, 1b
410
411	.align		8
412sparc64_cpu_startup_end:
413