xref: /openbmc/linux/arch/powerpc/kernel/misc_32.S (revision 87c2ce3b)
1/*
2 * This file contains miscellaneous low-level functions.
3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
6 * and Paul Mackerras.
7 *
8 * kexec bits:
9 * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
10 * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <linux/config.h>
20#include <linux/sys.h>
21#include <asm/unistd.h>
22#include <asm/errno.h>
23#include <asm/reg.h>
24#include <asm/page.h>
25#include <asm/cache.h>
26#include <asm/cputable.h>
27#include <asm/mmu.h>
28#include <asm/ppc_asm.h>
29#include <asm/thread_info.h>
30#include <asm/asm-offsets.h>
31#include <asm/processor.h>
32#include <asm/kexec.h>
33
34	.text
35
36/*
37 * This returns the high 64 bits of the product of two 64-bit numbers.
38 */
39_GLOBAL(mulhdu)
40	cmpwi	r6,0
41	cmpwi	cr1,r3,0
42	mr	r10,r4
43	mulhwu	r4,r4,r5
44	beq	1f
45	mulhwu	r0,r10,r6
46	mullw	r7,r10,r5
47	addc	r7,r0,r7
48	addze	r4,r4
491:	beqlr	cr1		/* all done if high part of A is 0 */
50	mr	r10,r3
51	mullw	r9,r3,r5
52	mulhwu	r3,r3,r5
53	beq	2f
54	mullw	r0,r10,r6
55	mulhwu	r8,r10,r6
56	addc	r7,r0,r7
57	adde	r4,r4,r8
58	addze	r3,r3
592:	addc	r4,r4,r9
60	addze	r3,r3
61	blr
62
63/*
64 * Returns (address we're running at) - (address we were linked at)
65 * for use before the text and data are mapped to KERNELBASE.
66 */
67_GLOBAL(reloc_offset)
68	mflr	r0
69	bl	1f
701:	mflr	r3
71	LOADADDR(r4,1b)
72	subf	r3,r4,r3
73	mtlr	r0
74	blr
75
76/*
77 * add_reloc_offset(x) returns x + reloc_offset().
78 */
79_GLOBAL(add_reloc_offset)
80	mflr	r0
81	bl	1f
821:	mflr	r5
83	LOADADDR(r4,1b)
84	subf	r5,r4,r5
85	add	r3,r3,r5
86	mtlr	r0
87	blr
88
89/*
90 * sub_reloc_offset(x) returns x - reloc_offset().
91 */
92_GLOBAL(sub_reloc_offset)
93	mflr	r0
94	bl	1f
951:	mflr	r5
96	lis	r4,1b@ha
97	addi	r4,r4,1b@l
98	subf	r5,r4,r5
99	subf	r3,r5,r3
100	mtlr	r0
101	blr
102
103/*
104 * reloc_got2 runs through the .got2 section adding an offset
105 * to each entry.
106 */
107_GLOBAL(reloc_got2)
108	mflr	r11
109	lis	r7,__got2_start@ha
110	addi	r7,r7,__got2_start@l
111	lis	r8,__got2_end@ha
112	addi	r8,r8,__got2_end@l
113	subf	r8,r7,r8
114	srwi.	r8,r8,2
115	beqlr
116	mtctr	r8
117	bl	1f
1181:	mflr	r0
119	lis	r4,1b@ha
120	addi	r4,r4,1b@l
121	subf	r0,r4,r0
122	add	r7,r0,r7
1232:	lwz	r0,0(r7)
124	add	r0,r0,r3
125	stw	r0,0(r7)
126	addi	r7,r7,4
127	bdnz	2b
128	mtlr	r11
129	blr
130
131/*
132 * identify_cpu,
133 * called with r3 = data offset and r4 = CPU number
134 * doesn't change r3
135 */
136_GLOBAL(identify_cpu)
137	addis	r8,r3,cpu_specs@ha
138	addi	r8,r8,cpu_specs@l
139	mfpvr	r7
1401:
141	lwz	r5,CPU_SPEC_PVR_MASK(r8)
142	and	r5,r5,r7
143	lwz	r6,CPU_SPEC_PVR_VALUE(r8)
144	cmplw	0,r6,r5
145	beq	1f
146	addi	r8,r8,CPU_SPEC_ENTRY_SIZE
147	b	1b
1481:
149	addis	r6,r3,cur_cpu_spec@ha
150	addi	r6,r6,cur_cpu_spec@l
151	sub	r8,r8,r3
152	stw	r8,0(r6)
153	blr
154
155/*
156 * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
157 * and writes nop's over sections of code that don't apply for this cpu.
158 * r3 = data offset (not changed)
159 */
160_GLOBAL(do_cpu_ftr_fixups)
161	/* Get CPU 0 features */
162	addis	r6,r3,cur_cpu_spec@ha
163	addi	r6,r6,cur_cpu_spec@l
164	lwz	r4,0(r6)
165	add	r4,r4,r3
166	lwz	r4,CPU_SPEC_FEATURES(r4)
167
168	/* Get the fixup table */
169	addis	r6,r3,__start___ftr_fixup@ha
170	addi	r6,r6,__start___ftr_fixup@l
171	addis	r7,r3,__stop___ftr_fixup@ha
172	addi	r7,r7,__stop___ftr_fixup@l
173
174	/* Do the fixup */
1751:	cmplw	0,r6,r7
176	bgelr
177	addi	r6,r6,16
178	lwz	r8,-16(r6)	/* mask */
179	and	r8,r8,r4
180	lwz	r9,-12(r6)	/* value */
181	cmplw	0,r8,r9
182	beq	1b
183	lwz	r8,-8(r6)	/* section begin */
184	lwz	r9,-4(r6)	/* section end */
185	subf.	r9,r8,r9
186	beq	1b
187	/* write nops over the section of code */
188	/* todo: if large section, add a branch at the start of it */
189	srwi	r9,r9,2
190	mtctr	r9
191	add	r8,r8,r3
192	lis	r0,0x60000000@h	/* nop */
1933:	stw	r0,0(r8)
194	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
195	beq	2f
196	dcbst	0,r8		/* suboptimal, but simpler */
197	sync
198	icbi	0,r8
1992:	addi	r8,r8,4
200	bdnz	3b
201	sync			/* additional sync needed on g4 */
202	isync
203	b	1b
204
205/*
206 * call_setup_cpu - call the setup_cpu function for this cpu
207 * r3 = data offset, r24 = cpu number
208 *
209 * Setup function is called with:
210 *   r3 = data offset
211 *   r4 = ptr to CPU spec (relocated)
212 */
213_GLOBAL(call_setup_cpu)
214	addis	r4,r3,cur_cpu_spec@ha
215	addi	r4,r4,cur_cpu_spec@l
216	lwz	r4,0(r4)
217	add	r4,r4,r3
218	lwz	r5,CPU_SPEC_SETUP(r4)
219	cmpi	0,r5,0
220	add	r5,r5,r3
221	beqlr
222	mtctr	r5
223	bctr
224
225#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx)
226
227/* This gets called by via-pmu.c to switch the PLL selection
228 * on 750fx CPU. This function should really be moved to some
229 * other place (as most of the cpufreq code in via-pmu
230 */
231_GLOBAL(low_choose_750fx_pll)
232	/* Clear MSR:EE */
233	mfmsr	r7
234	rlwinm	r0,r7,0,17,15
235	mtmsr	r0
236
237	/* If switching to PLL1, disable HID0:BTIC */
238	cmplwi	cr0,r3,0
239	beq	1f
240	mfspr	r5,SPRN_HID0
241	rlwinm	r5,r5,0,27,25
242	sync
243	mtspr	SPRN_HID0,r5
244	isync
245	sync
246
2471:
248	/* Calc new HID1 value */
249	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
250	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
251	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
252	or	r4,r4,r5
253	mtspr	SPRN_HID1,r4
254
255	/* Store new HID1 image */
256	rlwinm	r6,r1,0,0,18
257	lwz	r6,TI_CPU(r6)
258	slwi	r6,r6,2
259	addis	r6,r6,nap_save_hid1@ha
260	stw	r4,nap_save_hid1@l(r6)
261
262	/* If switching to PLL0, enable HID0:BTIC */
263	cmplwi	cr0,r3,0
264	bne	1f
265	mfspr	r5,SPRN_HID0
266	ori	r5,r5,HID0_BTIC
267	sync
268	mtspr	SPRN_HID0,r5
269	isync
270	sync
271
2721:
273	/* Return */
274	mtmsr	r7
275	blr
276
277_GLOBAL(low_choose_7447a_dfs)
278	/* Clear MSR:EE */
279	mfmsr	r7
280	rlwinm	r0,r7,0,17,15
281	mtmsr	r0
282
283	/* Calc new HID1 value */
284	mfspr	r4,SPRN_HID1
285	insrwi	r4,r3,1,9	/* insert parameter into bit 9 */
286	sync
287	mtspr	SPRN_HID1,r4
288	sync
289	isync
290
291	/* Return */
292	mtmsr	r7
293	blr
294
295#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */
296
297/*
298 * complement mask on the msr then "or" some values on.
299 *     _nmask_and_or_msr(nmask, value_to_or)
300 */
301_GLOBAL(_nmask_and_or_msr)
302	mfmsr	r0		/* Get current msr */
303	andc	r0,r0,r3	/* And off the bits set in r3 (first parm) */
304	or	r0,r0,r4	/* Or on the bits in r4 (second parm) */
305	SYNC			/* Some chip revs have problems here... */
306	mtmsr	r0		/* Update machine state */
307	isync
308	blr			/* Done */
309
310
311/*
312 * Flush MMU TLB
313 */
314_GLOBAL(_tlbia)
315#if defined(CONFIG_40x)
316	sync			/* Flush to memory before changing mapping */
317	tlbia
318	isync			/* Flush shadow TLB */
319#elif defined(CONFIG_44x)
320	li	r3,0
321	sync
322
323	/* Load high watermark */
324	lis	r4,tlb_44x_hwater@ha
325	lwz	r5,tlb_44x_hwater@l(r4)
326
3271:	tlbwe	r3,r3,PPC44x_TLB_PAGEID
328	addi	r3,r3,1
329	cmpw	0,r3,r5
330	ble	1b
331
332	isync
333#elif defined(CONFIG_FSL_BOOKE)
334	/* Invalidate all entries in TLB0 */
335	li	r3, 0x04
336	tlbivax	0,3
337	/* Invalidate all entries in TLB1 */
338	li	r3, 0x0c
339	tlbivax	0,3
340	/* Invalidate all entries in TLB2 */
341	li	r3, 0x14
342	tlbivax	0,3
343	/* Invalidate all entries in TLB3 */
344	li	r3, 0x1c
345	tlbivax	0,3
346	msync
347#ifdef CONFIG_SMP
348	tlbsync
349#endif /* CONFIG_SMP */
350#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
351#if defined(CONFIG_SMP)
352	rlwinm	r8,r1,0,0,18
353	lwz	r8,TI_CPU(r8)
354	oris	r8,r8,10
355	mfmsr	r10
356	SYNC
357	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
358	rlwinm	r0,r0,0,28,26		/* clear DR */
359	mtmsr	r0
360	SYNC_601
361	isync
362	lis	r9,mmu_hash_lock@h
363	ori	r9,r9,mmu_hash_lock@l
364	tophys(r9,r9)
36510:	lwarx	r7,0,r9
366	cmpwi	0,r7,0
367	bne-	10b
368	stwcx.	r8,0,r9
369	bne-	10b
370	sync
371	tlbia
372	sync
373	TLBSYNC
374	li	r0,0
375	stw	r0,0(r9)		/* clear mmu_hash_lock */
376	mtmsr	r10
377	SYNC_601
378	isync
379#else /* CONFIG_SMP */
380	sync
381	tlbia
382	sync
383#endif /* CONFIG_SMP */
384#endif /* ! defined(CONFIG_40x) */
385	blr
386
387/*
388 * Flush MMU TLB for a particular address
389 */
390_GLOBAL(_tlbie)
391#if defined(CONFIG_40x)
392	tlbsx.	r3, 0, r3
393	bne	10f
394	sync
395	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
396	 * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate
397	 * the TLB entry. */
398	tlbwe	r3, r3, TLB_TAG
399	isync
40010:
401#elif defined(CONFIG_44x)
402	mfspr	r4,SPRN_MMUCR
403	mfspr	r5,SPRN_PID			/* Get PID */
404	rlwimi	r4,r5,0,24,31			/* Set TID */
405	mtspr	SPRN_MMUCR,r4
406
407	tlbsx.	r3, 0, r3
408	bne	10f
409	sync
410	/* There are only 64 TLB entries, so r3 < 64,
411	 * which means bit 22, is clear.  Since 22 is
412	 * the V bit in the TLB_PAGEID, loading this
413	 * value will invalidate the TLB entry.
414	 */
415	tlbwe	r3, r3, PPC44x_TLB_PAGEID
416	isync
41710:
418#elif defined(CONFIG_FSL_BOOKE)
419	rlwinm	r4, r3, 0, 0, 19
420	ori	r5, r4, 0x08	/* TLBSEL = 1 */
421	ori	r6, r4, 0x10	/* TLBSEL = 2 */
422	ori	r7, r4, 0x18	/* TLBSEL = 3 */
423	tlbivax	0, r4
424	tlbivax	0, r5
425	tlbivax	0, r6
426	tlbivax	0, r7
427	msync
428#if defined(CONFIG_SMP)
429	tlbsync
430#endif /* CONFIG_SMP */
431#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
432#if defined(CONFIG_SMP)
433	rlwinm	r8,r1,0,0,18
434	lwz	r8,TI_CPU(r8)
435	oris	r8,r8,11
436	mfmsr	r10
437	SYNC
438	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
439	rlwinm	r0,r0,0,28,26		/* clear DR */
440	mtmsr	r0
441	SYNC_601
442	isync
443	lis	r9,mmu_hash_lock@h
444	ori	r9,r9,mmu_hash_lock@l
445	tophys(r9,r9)
44610:	lwarx	r7,0,r9
447	cmpwi	0,r7,0
448	bne-	10b
449	stwcx.	r8,0,r9
450	bne-	10b
451	eieio
452	tlbie	r3
453	sync
454	TLBSYNC
455	li	r0,0
456	stw	r0,0(r9)		/* clear mmu_hash_lock */
457	mtmsr	r10
458	SYNC_601
459	isync
460#else /* CONFIG_SMP */
461	tlbie	r3
462	sync
463#endif /* CONFIG_SMP */
464#endif /* ! CONFIG_40x */
465	blr
466
467/*
468 * Flush instruction cache.
469 * This is a no-op on the 601.
470 */
471_GLOBAL(flush_instruction_cache)
472#if defined(CONFIG_8xx)
473	isync
474	lis	r5, IDC_INVALL@h
475	mtspr	SPRN_IC_CST, r5
476#elif defined(CONFIG_4xx)
477#ifdef CONFIG_403GCX
478	li      r3, 512
479	mtctr   r3
480	lis     r4, KERNELBASE@h
4811:	iccci   0, r4
482	addi    r4, r4, 16
483	bdnz    1b
484#else
485	lis	r3, KERNELBASE@h
486	iccci	0,r3
487#endif
488#elif CONFIG_FSL_BOOKE
489BEGIN_FTR_SECTION
490	mfspr   r3,SPRN_L1CSR0
491	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
492	/* msync; isync recommended here */
493	mtspr   SPRN_L1CSR0,r3
494	isync
495	blr
496END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
497	mfspr	r3,SPRN_L1CSR1
498	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
499	mtspr	SPRN_L1CSR1,r3
500#else
501	mfspr	r3,SPRN_PVR
502	rlwinm	r3,r3,16,16,31
503	cmpwi	0,r3,1
504	beqlr			/* for 601, do nothing */
505	/* 603/604 processor - use invalidate-all bit in HID0 */
506	mfspr	r3,SPRN_HID0
507	ori	r3,r3,HID0_ICFI
508	mtspr	SPRN_HID0,r3
509#endif /* CONFIG_8xx/4xx */
510	isync
511	blr
512
513/*
514 * Write any modified data cache blocks out to memory
515 * and invalidate the corresponding instruction cache blocks.
516 * This is a no-op on the 601.
517 *
518 * flush_icache_range(unsigned long start, unsigned long stop)
519 */
520_GLOBAL(__flush_icache_range)
521BEGIN_FTR_SECTION
522	blr				/* for 601, do nothing */
523END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
524	li	r5,L1_CACHE_BYTES-1
525	andc	r3,r3,r5
526	subf	r4,r3,r4
527	add	r4,r4,r5
528	srwi.	r4,r4,L1_CACHE_SHIFT
529	beqlr
530	mtctr	r4
531	mr	r6,r3
5321:	dcbst	0,r3
533	addi	r3,r3,L1_CACHE_BYTES
534	bdnz	1b
535	sync				/* wait for dcbst's to get to ram */
536	mtctr	r4
5372:	icbi	0,r6
538	addi	r6,r6,L1_CACHE_BYTES
539	bdnz	2b
540	sync				/* additional sync needed on g4 */
541	isync
542	blr
543/*
544 * Write any modified data cache blocks out to memory.
545 * Does not invalidate the corresponding cache lines (especially for
546 * any corresponding instruction cache).
547 *
548 * clean_dcache_range(unsigned long start, unsigned long stop)
549 */
550_GLOBAL(clean_dcache_range)
551	li	r5,L1_CACHE_BYTES-1
552	andc	r3,r3,r5
553	subf	r4,r3,r4
554	add	r4,r4,r5
555	srwi.	r4,r4,L1_CACHE_SHIFT
556	beqlr
557	mtctr	r4
558
5591:	dcbst	0,r3
560	addi	r3,r3,L1_CACHE_BYTES
561	bdnz	1b
562	sync				/* wait for dcbst's to get to ram */
563	blr
564
565/*
566 * Write any modified data cache blocks out to memory and invalidate them.
567 * Does not invalidate the corresponding instruction cache blocks.
568 *
569 * flush_dcache_range(unsigned long start, unsigned long stop)
570 */
571_GLOBAL(flush_dcache_range)
572	li	r5,L1_CACHE_BYTES-1
573	andc	r3,r3,r5
574	subf	r4,r3,r4
575	add	r4,r4,r5
576	srwi.	r4,r4,L1_CACHE_SHIFT
577	beqlr
578	mtctr	r4
579
5801:	dcbf	0,r3
581	addi	r3,r3,L1_CACHE_BYTES
582	bdnz	1b
583	sync				/* wait for dcbst's to get to ram */
584	blr
585
586/*
587 * Like above, but invalidate the D-cache.  This is used by the 8xx
588 * to invalidate the cache so the PPC core doesn't get stale data
589 * from the CPM (no cache snooping here :-).
590 *
591 * invalidate_dcache_range(unsigned long start, unsigned long stop)
592 */
593_GLOBAL(invalidate_dcache_range)
594	li	r5,L1_CACHE_BYTES-1
595	andc	r3,r3,r5
596	subf	r4,r3,r4
597	add	r4,r4,r5
598	srwi.	r4,r4,L1_CACHE_SHIFT
599	beqlr
600	mtctr	r4
601
6021:	dcbi	0,r3
603	addi	r3,r3,L1_CACHE_BYTES
604	bdnz	1b
605	sync				/* wait for dcbi's to get to ram */
606	blr
607
608/*
609 * Flush a particular page from the data cache to RAM.
610 * Note: this is necessary because the instruction cache does *not*
611 * snoop from the data cache.
612 * This is a no-op on the 601 which has a unified cache.
613 *
614 *	void __flush_dcache_icache(void *page)
615 */
616_GLOBAL(__flush_dcache_icache)
617BEGIN_FTR_SECTION
618	blr					/* for 601, do nothing */
619END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
620	rlwinm	r3,r3,0,0,19			/* Get page base address */
621	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
622	mtctr	r4
623	mr	r6,r3
6240:	dcbst	0,r3				/* Write line to ram */
625	addi	r3,r3,L1_CACHE_BYTES
626	bdnz	0b
627	sync
628	mtctr	r4
6291:	icbi	0,r6
630	addi	r6,r6,L1_CACHE_BYTES
631	bdnz	1b
632	sync
633	isync
634	blr
635
636/*
637 * Flush a particular page from the data cache to RAM, identified
638 * by its physical address.  We turn off the MMU so we can just use
639 * the physical address (this may be a highmem page without a kernel
640 * mapping).
641 *
642 *	void __flush_dcache_icache_phys(unsigned long physaddr)
643 */
644_GLOBAL(__flush_dcache_icache_phys)
645BEGIN_FTR_SECTION
646	blr					/* for 601, do nothing */
647END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
648	mfmsr	r10
649	rlwinm	r0,r10,0,28,26			/* clear DR */
650	mtmsr	r0
651	isync
652	rlwinm	r3,r3,0,0,19			/* Get page base address */
653	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
654	mtctr	r4
655	mr	r6,r3
6560:	dcbst	0,r3				/* Write line to ram */
657	addi	r3,r3,L1_CACHE_BYTES
658	bdnz	0b
659	sync
660	mtctr	r4
6611:	icbi	0,r6
662	addi	r6,r6,L1_CACHE_BYTES
663	bdnz	1b
664	sync
665	mtmsr	r10				/* restore DR */
666	isync
667	blr
668
669/*
670 * Clear pages using the dcbz instruction, which doesn't cause any
671 * memory traffic (except to write out any cache lines which get
672 * displaced).  This only works on cacheable memory.
673 *
674 * void clear_pages(void *page, int order) ;
675 */
676_GLOBAL(clear_pages)
677	li	r0,4096/L1_CACHE_BYTES
678	slw	r0,r0,r4
679	mtctr	r0
680#ifdef CONFIG_8xx
681	li	r4, 0
6821:	stw	r4, 0(r3)
683	stw	r4, 4(r3)
684	stw	r4, 8(r3)
685	stw	r4, 12(r3)
686#else
6871:	dcbz	0,r3
688#endif
689	addi	r3,r3,L1_CACHE_BYTES
690	bdnz	1b
691	blr
692
693/*
694 * Copy a whole page.  We use the dcbz instruction on the destination
695 * to reduce memory traffic (it eliminates the unnecessary reads of
696 * the destination into cache).  This requires that the destination
697 * is cacheable.
698 */
699#define COPY_16_BYTES		\
700	lwz	r6,4(r4);	\
701	lwz	r7,8(r4);	\
702	lwz	r8,12(r4);	\
703	lwzu	r9,16(r4);	\
704	stw	r6,4(r3);	\
705	stw	r7,8(r3);	\
706	stw	r8,12(r3);	\
707	stwu	r9,16(r3)
708
709_GLOBAL(copy_page)
710	addi	r3,r3,-4
711	addi	r4,r4,-4
712
713#ifdef CONFIG_8xx
714	/* don't use prefetch on 8xx */
715    	li	r0,4096/L1_CACHE_BYTES
716	mtctr	r0
7171:	COPY_16_BYTES
718	bdnz	1b
719	blr
720
721#else	/* not 8xx, we can prefetch */
722	li	r5,4
723
724#if MAX_COPY_PREFETCH > 1
725	li	r0,MAX_COPY_PREFETCH
726	li	r11,4
727	mtctr	r0
72811:	dcbt	r11,r4
729	addi	r11,r11,L1_CACHE_BYTES
730	bdnz	11b
731#else /* MAX_COPY_PREFETCH == 1 */
732	dcbt	r5,r4
733	li	r11,L1_CACHE_BYTES+4
734#endif /* MAX_COPY_PREFETCH */
735	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
736	crclr	4*cr0+eq
7372:
738	mtctr	r0
7391:
740	dcbt	r11,r4
741	dcbz	r5,r3
742	COPY_16_BYTES
743#if L1_CACHE_BYTES >= 32
744	COPY_16_BYTES
745#if L1_CACHE_BYTES >= 64
746	COPY_16_BYTES
747	COPY_16_BYTES
748#if L1_CACHE_BYTES >= 128
749	COPY_16_BYTES
750	COPY_16_BYTES
751	COPY_16_BYTES
752	COPY_16_BYTES
753#endif
754#endif
755#endif
756	bdnz	1b
757	beqlr
758	crnot	4*cr0+eq,4*cr0+eq
759	li	r0,MAX_COPY_PREFETCH
760	li	r11,4
761	b	2b
762#endif	/* CONFIG_8xx */
763
764/*
765 * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
766 * void atomic_set_mask(atomic_t mask, atomic_t *addr);
767 */
768_GLOBAL(atomic_clear_mask)
76910:	lwarx	r5,0,r4
770	andc	r5,r5,r3
771	PPC405_ERR77(0,r4)
772	stwcx.	r5,0,r4
773	bne-	10b
774	blr
775_GLOBAL(atomic_set_mask)
77610:	lwarx	r5,0,r4
777	or	r5,r5,r3
778	PPC405_ERR77(0,r4)
779	stwcx.	r5,0,r4
780	bne-	10b
781	blr
782
783/*
784 * I/O string operations
785 *
786 * insb(port, buf, len)
787 * outsb(port, buf, len)
788 * insw(port, buf, len)
789 * outsw(port, buf, len)
790 * insl(port, buf, len)
791 * outsl(port, buf, len)
792 * insw_ns(port, buf, len)
793 * outsw_ns(port, buf, len)
794 * insl_ns(port, buf, len)
795 * outsl_ns(port, buf, len)
796 *
797 * The *_ns versions don't do byte-swapping.
798 */
799_GLOBAL(_insb)
800	cmpwi	0,r5,0
801	mtctr	r5
802	subi	r4,r4,1
803	blelr-
80400:	lbz	r5,0(r3)
805	eieio
806	stbu	r5,1(r4)
807	bdnz	00b
808	blr
809
810_GLOBAL(_outsb)
811	cmpwi	0,r5,0
812	mtctr	r5
813	subi	r4,r4,1
814	blelr-
81500:	lbzu	r5,1(r4)
816	stb	r5,0(r3)
817	eieio
818	bdnz	00b
819	blr
820
821_GLOBAL(_insw)
822	cmpwi	0,r5,0
823	mtctr	r5
824	subi	r4,r4,2
825	blelr-
82600:	lhbrx	r5,0,r3
827	eieio
828	sthu	r5,2(r4)
829	bdnz	00b
830	blr
831
832_GLOBAL(_outsw)
833	cmpwi	0,r5,0
834	mtctr	r5
835	subi	r4,r4,2
836	blelr-
83700:	lhzu	r5,2(r4)
838	eieio
839	sthbrx	r5,0,r3
840	bdnz	00b
841	blr
842
843_GLOBAL(_insl)
844	cmpwi	0,r5,0
845	mtctr	r5
846	subi	r4,r4,4
847	blelr-
84800:	lwbrx	r5,0,r3
849	eieio
850	stwu	r5,4(r4)
851	bdnz	00b
852	blr
853
854_GLOBAL(_outsl)
855	cmpwi	0,r5,0
856	mtctr	r5
857	subi	r4,r4,4
858	blelr-
85900:	lwzu	r5,4(r4)
860	stwbrx	r5,0,r3
861	eieio
862	bdnz	00b
863	blr
864
865_GLOBAL(__ide_mm_insw)
866_GLOBAL(_insw_ns)
867	cmpwi	0,r5,0
868	mtctr	r5
869	subi	r4,r4,2
870	blelr-
87100:	lhz	r5,0(r3)
872	eieio
873	sthu	r5,2(r4)
874	bdnz	00b
875	blr
876
877_GLOBAL(__ide_mm_outsw)
878_GLOBAL(_outsw_ns)
879	cmpwi	0,r5,0
880	mtctr	r5
881	subi	r4,r4,2
882	blelr-
88300:	lhzu	r5,2(r4)
884	sth	r5,0(r3)
885	eieio
886	bdnz	00b
887	blr
888
889_GLOBAL(__ide_mm_insl)
890_GLOBAL(_insl_ns)
891	cmpwi	0,r5,0
892	mtctr	r5
893	subi	r4,r4,4
894	blelr-
89500:	lwz	r5,0(r3)
896	eieio
897	stwu	r5,4(r4)
898	bdnz	00b
899	blr
900
901_GLOBAL(__ide_mm_outsl)
902_GLOBAL(_outsl_ns)
903	cmpwi	0,r5,0
904	mtctr	r5
905	subi	r4,r4,4
906	blelr-
90700:	lwzu	r5,4(r4)
908	stw	r5,0(r3)
909	eieio
910	bdnz	00b
911	blr
912
913/*
914 * Extended precision shifts.
915 *
916 * Updated to be valid for shift counts from 0 to 63 inclusive.
917 * -- Gabriel
918 *
919 * R3/R4 has 64 bit value
920 * R5    has shift count
921 * result in R3/R4
922 *
923 *  ashrdi3: arithmetic right shift (sign propagation)
924 *  lshrdi3: logical right shift
925 *  ashldi3: left shift
926 */
927_GLOBAL(__ashrdi3)
928	subfic	r6,r5,32
929	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
930	addi	r7,r5,32	# could be xori, or addi with -32
931	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
932	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
933	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
934	or	r4,r4,r6	# LSW |= t1
935	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
936	sraw	r3,r3,r5	# MSW = MSW >> count
937	or	r4,r4,r7	# LSW |= t2
938	blr
939
940_GLOBAL(__ashldi3)
941	subfic	r6,r5,32
942	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
943	addi	r7,r5,32	# could be xori, or addi with -32
944	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
945	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
946	or	r3,r3,r6	# MSW |= t1
947	slw	r4,r4,r5	# LSW = LSW << count
948	or	r3,r3,r7	# MSW |= t2
949	blr
950
951_GLOBAL(__lshrdi3)
952	subfic	r6,r5,32
953	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
954	addi	r7,r5,32	# could be xori, or addi with -32
955	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
956	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
957	or	r4,r4,r6	# LSW |= t1
958	srw	r3,r3,r5	# MSW = MSW >> count
959	or	r4,r4,r7	# LSW |= t2
960	blr
961
962_GLOBAL(abs)
963	srawi	r4,r3,31
964	xor	r3,r3,r4
965	sub	r3,r3,r4
966	blr
967
968_GLOBAL(_get_SP)
969	mr	r3,r1		/* Close enough */
970	blr
971
972/*
973 * Create a kernel thread
974 *   kernel_thread(fn, arg, flags)
975 */
976_GLOBAL(kernel_thread)
977	stwu	r1,-16(r1)
978	stw	r30,8(r1)
979	stw	r31,12(r1)
980	mr	r30,r3		/* function */
981	mr	r31,r4		/* argument */
982	ori	r3,r5,CLONE_VM	/* flags */
983	oris	r3,r3,CLONE_UNTRACED>>16
984	li	r4,0		/* new sp (unused) */
985	li	r0,__NR_clone
986	sc
987	cmpwi	0,r3,0		/* parent or child? */
988	bne	1f		/* return if parent */
989	li	r0,0		/* make top-level stack frame */
990	stwu	r0,-16(r1)
991	mtlr	r30		/* fn addr in lr */
992	mr	r3,r31		/* load arg and call fn */
993	PPC440EP_ERR42
994	blrl
995	li	r0,__NR_exit	/* exit if function returns */
996	li	r3,0
997	sc
9981:	lwz	r30,8(r1)
999	lwz	r31,12(r1)
1000	addi	r1,r1,16
1001	blr
1002
1003_GLOBAL(execve)
1004	li	r0,__NR_execve
1005	sc
1006	bnslr
1007	neg	r3,r3
1008	blr
1009
1010/*
1011 * This routine is just here to keep GCC happy - sigh...
1012 */
1013_GLOBAL(__main)
1014	blr
1015
1016#ifdef CONFIG_KEXEC
1017	/*
1018	 * Must be relocatable PIC code callable as a C function.
1019	 */
1020	.globl relocate_new_kernel
1021relocate_new_kernel:
1022	/* r3 = page_list   */
1023	/* r4 = reboot_code_buffer */
1024	/* r5 = start_address      */
1025
1026	li	r0, 0
1027
1028	/*
1029	 * Set Machine Status Register to a known status,
1030	 * switch the MMU off and jump to 1: in a single step.
1031	 */
1032
1033	mr	r8, r0
1034	ori     r8, r8, MSR_RI|MSR_ME
1035	mtspr	SPRN_SRR1, r8
1036	addi	r8, r4, 1f - relocate_new_kernel
1037	mtspr	SPRN_SRR0, r8
1038	sync
1039	rfi
1040
10411:
1042	/* from this point address translation is turned off */
1043	/* and interrupts are disabled */
1044
1045	/* set a new stack at the bottom of our page... */
1046	/* (not really needed now) */
1047	addi	r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */
1048	stw	r0, 0(r1)
1049
1050	/* Do the copies */
1051	li	r6, 0 /* checksum */
1052	mr	r0, r3
1053	b	1f
1054
10550:	/* top, read another word for the indirection page */
1056	lwzu	r0, 4(r3)
1057
10581:
1059	/* is it a destination page? (r8) */
1060	rlwinm.	r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
1061	beq	2f
1062
1063	rlwinm	r8, r0, 0, 0, 19 /* clear kexec flags, page align */
1064	b	0b
1065
10662:	/* is it an indirection page? (r3) */
1067	rlwinm.	r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
1068	beq	2f
1069
1070	rlwinm	r3, r0, 0, 0, 19 /* clear kexec flags, page align */
1071	subi	r3, r3, 4
1072	b	0b
1073
10742:	/* are we done? */
1075	rlwinm.	r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
1076	beq	2f
1077	b	3f
1078
10792:	/* is it a source page? (r9) */
1080	rlwinm.	r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
1081	beq	0b
1082
1083	rlwinm	r9, r0, 0, 0, 19 /* clear kexec flags, page align */
1084
1085	li	r7, PAGE_SIZE / 4
1086	mtctr   r7
1087	subi    r9, r9, 4
1088	subi    r8, r8, 4
10899:
1090	lwzu    r0, 4(r9)  /* do the copy */
1091	xor	r6, r6, r0
1092	stwu    r0, 4(r8)
1093	dcbst	0, r8
1094	sync
1095	icbi	0, r8
1096	bdnz    9b
1097
1098	addi    r9, r9, 4
1099	addi    r8, r8, 4
1100	b	0b
1101
11023:
1103
1104	/* To be certain of avoiding problems with self-modifying code
1105	 * execute a serializing instruction here.
1106	 */
1107	isync
1108	sync
1109
1110	/* jump to the entry point, usually the setup routine */
1111	mtlr	r5
1112	blrl
1113
11141:	b	1b
1115
1116relocate_new_kernel_end:
1117
1118	.globl relocate_new_kernel_size
1119relocate_new_kernel_size:
1120	.long relocate_new_kernel_end - relocate_new_kernel
1121#endif
1122