xref: /openbmc/linux/arch/parisc/kernel/pacache.S (revision 151f4e2b)
1/*
2 *  PARISC TLB and cache flushing support
3 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
4 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
5 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
6 *
7 *    This program is free software; you can redistribute it and/or modify
8 *    it under the terms of the GNU General Public License as published by
9 *    the Free Software Foundation; either version 2, or (at your option)
10 *    any later version.
11 *
12 *    This program is distributed in the hope that it will be useful,
13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *    GNU General Public License for more details.
16 *
17 *    You should have received a copy of the GNU General Public License
18 *    along with this program; if not, write to the Free Software
19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 */
21
22/*
23 * NOTE: fdc,fic, and pdc instructions that use base register modification
24 *       should only use index and base registers that are not shadowed,
25 *       so that the fast path emulation in the non access miss handler
26 *       can be used.
27 */
28
29#ifdef CONFIG_64BIT
30	.level	2.0w
31#else
32	.level	2.0
33#endif
34
35#include <asm/psw.h>
36#include <asm/assembly.h>
37#include <asm/pgtable.h>
38#include <asm/cache.h>
39#include <asm/ldcw.h>
40#include <asm/alternative.h>
41#include <linux/linkage.h>
42#include <linux/init.h>
43
44	.section .text.hot
45	.align	16
46
47ENTRY_CFI(flush_tlb_all_local)
48	/*
49	 * The pitlbe and pdtlbe instructions should only be used to
50	 * flush the entire tlb. Also, there needs to be no intervening
51	 * tlb operations, e.g. tlb misses, so the operation needs
52	 * to happen in real mode with all interruptions disabled.
53	 */
54
55	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
56	rsm		PSW_SM_I, %r19		/* save I-bit state */
57	load32		PA(1f), %r1
58	nop
59	nop
60	nop
61	nop
62	nop
63
64	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
65	mtctl		%r0, %cr17		/* Clear IIASQ tail */
66	mtctl		%r0, %cr17		/* Clear IIASQ head */
67	mtctl		%r1, %cr18		/* IIAOQ head */
68	ldo		4(%r1), %r1
69	mtctl		%r1, %cr18		/* IIAOQ tail */
70	load32		REAL_MODE_PSW, %r1
71	mtctl           %r1, %ipsw
72	rfi
73	nop
74
751:      load32		PA(cache_info), %r1
76
77	/* Flush Instruction Tlb */
78
79	LDREG		ITLB_SID_BASE(%r1), %r20
80	LDREG		ITLB_SID_STRIDE(%r1), %r21
81	LDREG		ITLB_SID_COUNT(%r1), %r22
82	LDREG		ITLB_OFF_BASE(%r1), %arg0
83	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
84	LDREG		ITLB_OFF_COUNT(%r1), %arg2
85	LDREG		ITLB_LOOP(%r1), %arg3
86
87	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
88	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
89	copy		%arg0, %r28		/* Init base addr */
90
91fitmanyloop:					/* Loop if LOOP >= 2 */
92	mtsp		%r20, %sr1
93	add		%r21, %r20, %r20	/* increment space */
94	copy		%arg2, %r29		/* Init middle loop count */
95
96fitmanymiddle:					/* Loop if LOOP >= 2 */
97	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
98	pitlbe		%r0(%sr1, %r28)
99	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
100	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
101	copy		%arg3, %r31		/* Re-init inner loop count */
102
103	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
104	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
105
106fitoneloop:					/* Loop if LOOP = 1 */
107	mtsp		%r20, %sr1
108	copy		%arg0, %r28		/* init base addr */
109	copy		%arg2, %r29		/* init middle loop count */
110
111fitonemiddle:					/* Loop if LOOP = 1 */
112	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
113	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
114
115	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
116	add		%r21, %r20, %r20		/* increment space */
117
118fitdone:
119
120	/* Flush Data Tlb */
121
122	LDREG		DTLB_SID_BASE(%r1), %r20
123	LDREG		DTLB_SID_STRIDE(%r1), %r21
124	LDREG		DTLB_SID_COUNT(%r1), %r22
125	LDREG		DTLB_OFF_BASE(%r1), %arg0
126	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
127	LDREG		DTLB_OFF_COUNT(%r1), %arg2
128	LDREG		DTLB_LOOP(%r1), %arg3
129
130	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
131	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
132	copy		%arg0, %r28		/* Init base addr */
133
134fdtmanyloop:					/* Loop if LOOP >= 2 */
135	mtsp		%r20, %sr1
136	add		%r21, %r20, %r20	/* increment space */
137	copy		%arg2, %r29		/* Init middle loop count */
138
139fdtmanymiddle:					/* Loop if LOOP >= 2 */
140	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
141	pdtlbe		%r0(%sr1, %r28)
142	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
143	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
144	copy		%arg3, %r31		/* Re-init inner loop count */
145
146	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
147	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
148
149fdtoneloop:					/* Loop if LOOP = 1 */
150	mtsp		%r20, %sr1
151	copy		%arg0, %r28		/* init base addr */
152	copy		%arg2, %r29		/* init middle loop count */
153
154fdtonemiddle:					/* Loop if LOOP = 1 */
155	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
156	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
157
158	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
159	add		%r21, %r20, %r20	/* increment space */
160
161
162fdtdone:
163	/*
164	 * Switch back to virtual mode
165	 */
166	/* pcxt_ssm_bug */
167	rsm		PSW_SM_I, %r0
168	load32		2f, %r1
169	nop
170	nop
171	nop
172	nop
173	nop
174
175	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
176	mtctl		%r0, %cr17		/* Clear IIASQ tail */
177	mtctl		%r0, %cr17		/* Clear IIASQ head */
178	mtctl		%r1, %cr18		/* IIAOQ head */
179	ldo		4(%r1), %r1
180	mtctl		%r1, %cr18		/* IIAOQ tail */
181	load32		KERNEL_PSW, %r1
182	or		%r1, %r19, %r1	/* I-bit to state on entry */
183	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
184	rfi
185	nop
186
1872:      bv		%r0(%r2)
188	nop
189ENDPROC_CFI(flush_tlb_all_local)
190
191	.import cache_info,data
192
193ENTRY_CFI(flush_instruction_cache_local)
19488:	load32		cache_info, %r1
195
196	/* Flush Instruction Cache */
197
198	LDREG		ICACHE_BASE(%r1), %arg0
199	LDREG		ICACHE_STRIDE(%r1), %arg1
200	LDREG		ICACHE_COUNT(%r1), %arg2
201	LDREG		ICACHE_LOOP(%r1), %arg3
202	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
203	mtsp		%r0, %sr1
204	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
205	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
206
207fimanyloop:					/* Loop if LOOP >= 2 */
208	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
209	fice            %r0(%sr1, %arg0)
210	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
211	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
212	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
213
214fioneloop:					/* Loop if LOOP = 1 */
215	/* Some implementations may flush with a single fice instruction */
216	cmpib,COND(>>=),n	15, %arg2, fioneloop2
217
218fioneloop1:
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	fice,m		%arg1(%sr1, %arg0)
232	fice,m		%arg1(%sr1, %arg0)
233	fice,m		%arg1(%sr1, %arg0)
234	addib,COND(>)	-16, %arg2, fioneloop1
235	fice,m		%arg1(%sr1, %arg0)
236
237	/* Check if done */
238	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
239
240fioneloop2:
241	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
242	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
243
244fisync:
245	sync
246	mtsm		%r22			/* restore I-bit */
24789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
248	bv		%r0(%r2)
249	nop
250ENDPROC_CFI(flush_instruction_cache_local)
251
252
253	.import cache_info, data
254ENTRY_CFI(flush_data_cache_local)
25588:	load32		cache_info, %r1
256
257	/* Flush Data Cache */
258
259	LDREG		DCACHE_BASE(%r1), %arg0
260	LDREG		DCACHE_STRIDE(%r1), %arg1
261	LDREG		DCACHE_COUNT(%r1), %arg2
262	LDREG		DCACHE_LOOP(%r1), %arg3
263	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
264	mtsp		%r0, %sr1
265	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
266	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
267
268fdmanyloop:					/* Loop if LOOP >= 2 */
269	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
270	fdce		%r0(%sr1, %arg0)
271	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
272	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
273	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
274
275fdoneloop:					/* Loop if LOOP = 1 */
276	/* Some implementations may flush with a single fdce instruction */
277	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
278
279fdoneloop1:
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	fdce,m		%arg1(%sr1, %arg0)
293	fdce,m		%arg1(%sr1, %arg0)
294	fdce,m		%arg1(%sr1, %arg0)
295	addib,COND(>)	-16, %arg2, fdoneloop1
296	fdce,m		%arg1(%sr1, %arg0)
297
298	/* Check if done */
299	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
300
301fdoneloop2:
302	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
303	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
304
305fdsync:
306	syncdma
307	sync
308	mtsm		%r22			/* restore I-bit */
30989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
310	bv		%r0(%r2)
311	nop
312ENDPROC_CFI(flush_data_cache_local)
313
314/* Clear page using kernel mapping.  */
315
316ENTRY_CFI(clear_page_asm)
317#ifdef CONFIG_64BIT
318
319	/* Unroll the loop.  */
320	ldi		(PAGE_SIZE / 128), %r1
321
3221:
323	std		%r0, 0(%r26)
324	std		%r0, 8(%r26)
325	std		%r0, 16(%r26)
326	std		%r0, 24(%r26)
327	std		%r0, 32(%r26)
328	std		%r0, 40(%r26)
329	std		%r0, 48(%r26)
330	std		%r0, 56(%r26)
331	std		%r0, 64(%r26)
332	std		%r0, 72(%r26)
333	std		%r0, 80(%r26)
334	std		%r0, 88(%r26)
335	std		%r0, 96(%r26)
336	std		%r0, 104(%r26)
337	std		%r0, 112(%r26)
338	std		%r0, 120(%r26)
339
340	/* Note reverse branch hint for addib is taken.  */
341	addib,COND(>),n	-1, %r1, 1b
342	ldo		128(%r26), %r26
343
344#else
345
346	/*
347	 * Note that until (if) we start saving the full 64-bit register
348	 * values on interrupt, we can't use std on a 32 bit kernel.
349	 */
350	ldi		(PAGE_SIZE / 64), %r1
351
3521:
353	stw		%r0, 0(%r26)
354	stw		%r0, 4(%r26)
355	stw		%r0, 8(%r26)
356	stw		%r0, 12(%r26)
357	stw		%r0, 16(%r26)
358	stw		%r0, 20(%r26)
359	stw		%r0, 24(%r26)
360	stw		%r0, 28(%r26)
361	stw		%r0, 32(%r26)
362	stw		%r0, 36(%r26)
363	stw		%r0, 40(%r26)
364	stw		%r0, 44(%r26)
365	stw		%r0, 48(%r26)
366	stw		%r0, 52(%r26)
367	stw		%r0, 56(%r26)
368	stw		%r0, 60(%r26)
369
370	addib,COND(>),n	-1, %r1, 1b
371	ldo		64(%r26), %r26
372#endif
373	bv		%r0(%r2)
374	nop
375ENDPROC_CFI(clear_page_asm)
376
377/* Copy page using kernel mapping.  */
378
379ENTRY_CFI(copy_page_asm)
380#ifdef CONFIG_64BIT
381	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
382	 * Unroll the loop by hand and arrange insn appropriately.
383	 * Prefetch doesn't improve performance on rp3440.
384	 * GCC probably can do this just as well...
385	 */
386
387	ldi		(PAGE_SIZE / 128), %r1
388
3891:	ldd		0(%r25), %r19
390	ldd		8(%r25), %r20
391
392	ldd		16(%r25), %r21
393	ldd		24(%r25), %r22
394	std		%r19, 0(%r26)
395	std		%r20, 8(%r26)
396
397	ldd		32(%r25), %r19
398	ldd		40(%r25), %r20
399	std		%r21, 16(%r26)
400	std		%r22, 24(%r26)
401
402	ldd		48(%r25), %r21
403	ldd		56(%r25), %r22
404	std		%r19, 32(%r26)
405	std		%r20, 40(%r26)
406
407	ldd		64(%r25), %r19
408	ldd		72(%r25), %r20
409	std		%r21, 48(%r26)
410	std		%r22, 56(%r26)
411
412	ldd		80(%r25), %r21
413	ldd		88(%r25), %r22
414	std		%r19, 64(%r26)
415	std		%r20, 72(%r26)
416
417	ldd		 96(%r25), %r19
418	ldd		104(%r25), %r20
419	std		%r21, 80(%r26)
420	std		%r22, 88(%r26)
421
422	ldd		112(%r25), %r21
423	ldd		120(%r25), %r22
424	ldo		128(%r25), %r25
425	std		%r19, 96(%r26)
426	std		%r20, 104(%r26)
427
428	std		%r21, 112(%r26)
429	std		%r22, 120(%r26)
430
431	/* Note reverse branch hint for addib is taken.  */
432	addib,COND(>),n	-1, %r1, 1b
433	ldo		128(%r26), %r26
434
435#else
436
437	/*
438	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
439	 * bundles (very restricted rules for bundling).
440	 * Note that until (if) we start saving
441	 * the full 64 bit register values on interrupt, we can't
442	 * use ldd/std on a 32 bit kernel.
443	 */
444	ldw		0(%r25), %r19
445	ldi		(PAGE_SIZE / 64), %r1
446
4471:
448	ldw		4(%r25), %r20
449	ldw		8(%r25), %r21
450	ldw		12(%r25), %r22
451	stw		%r19, 0(%r26)
452	stw		%r20, 4(%r26)
453	stw		%r21, 8(%r26)
454	stw		%r22, 12(%r26)
455	ldw		16(%r25), %r19
456	ldw		20(%r25), %r20
457	ldw		24(%r25), %r21
458	ldw		28(%r25), %r22
459	stw		%r19, 16(%r26)
460	stw		%r20, 20(%r26)
461	stw		%r21, 24(%r26)
462	stw		%r22, 28(%r26)
463	ldw		32(%r25), %r19
464	ldw		36(%r25), %r20
465	ldw		40(%r25), %r21
466	ldw		44(%r25), %r22
467	stw		%r19, 32(%r26)
468	stw		%r20, 36(%r26)
469	stw		%r21, 40(%r26)
470	stw		%r22, 44(%r26)
471	ldw		48(%r25), %r19
472	ldw		52(%r25), %r20
473	ldw		56(%r25), %r21
474	ldw		60(%r25), %r22
475	stw		%r19, 48(%r26)
476	stw		%r20, 52(%r26)
477	ldo		64(%r25), %r25
478	stw		%r21, 56(%r26)
479	stw		%r22, 60(%r26)
480	ldo		64(%r26), %r26
481	addib,COND(>),n	-1, %r1, 1b
482	ldw		0(%r25), %r19
483#endif
484	bv		%r0(%r2)
485	nop
486ENDPROC_CFI(copy_page_asm)
487
488/*
489 * NOTE: Code in clear_user_page has a hard coded dependency on the
490 *       maximum alias boundary being 4 Mb. We've been assured by the
491 *       parisc chip designers that there will not ever be a parisc
492 *       chip with a larger alias boundary (Never say never :-) ).
493 *
494 *       Subtle: the dtlb miss handlers support the temp alias region by
495 *       "knowing" that if a dtlb miss happens within the temp alias
496 *       region it must have occurred while in clear_user_page. Since
497 *       this routine makes use of processor local translations, we
498 *       don't want to insert them into the kernel page table. Instead,
499 *       we load up some general registers (they need to be registers
500 *       which aren't shadowed) with the physical page numbers (preshifted
501 *       for tlb insertion) needed to insert the translations. When we
502 *       miss on the translation, the dtlb miss handler inserts the
503 *       translation into the tlb using these values:
504 *
505 *          %r26 physical page (shifted for tlb insert) of "to" translation
506 *          %r23 physical page (shifted for tlb insert) of "from" translation
507 */
508
509        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
510        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
511        .macro          convert_phys_for_tlb_insert20  phys
512        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
513#if _PAGE_SIZE_ENCODING_DEFAULT
514        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
515#endif
516	.endm
517
518	/*
519	 * copy_user_page_asm() performs a page copy using mappings
520	 * equivalent to the user page mappings.  It can be used to
521	 * implement copy_user_page() but unfortunately both the `from'
522	 * and `to' pages need to be flushed through mappings equivalent
523	 * to the user mappings after the copy because the kernel accesses
524	 * the `from' page through the kmap kernel mapping and the `to'
525	 * page needs to be flushed since code can be copied.  As a
526	 * result, this implementation is less efficient than the simpler
527	 * copy using the kernel mapping.  It only needs the `from' page
528	 * to flushed via the user mapping.  The kunmap routines handle
529	 * the flushes needed for the kernel mapping.
530	 *
531	 * I'm still keeping this around because it may be possible to
532	 * use it if more information is passed into copy_user_page().
533	 * Have to do some measurements to see if it is worthwhile to
534	 * lobby for such a change.
535	 *
536	 */
537
538ENTRY_CFI(copy_user_page_asm)
539	/* Convert virtual `to' and `from' addresses to physical addresses.
540	   Move `from' physical address to non shadowed register.  */
541	ldil		L%(__PAGE_OFFSET), %r1
542	sub		%r26, %r1, %r26
543	sub		%r25, %r1, %r23
544
545	ldil		L%(TMPALIAS_MAP_START), %r28
546#ifdef CONFIG_64BIT
547#if (TMPALIAS_MAP_START >= 0x80000000)
548	depdi		0, 31,32, %r28		/* clear any sign extension */
549#endif
550	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
551	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
552	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
553	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
554	copy		%r28, %r29
555	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
556#else
557	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
558	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
559	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
560	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
561	copy		%r28, %r29
562	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
563#endif
564
565	/* Purge any old translations */
566
567#ifdef CONFIG_PA20
568	pdtlb,l		%r0(%r28)
569	pdtlb,l		%r0(%r29)
570#else
5710:	pdtlb		%r0(%r28)
5721:	pdtlb		%r0(%r29)
573	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
574	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
575#endif
576
577#ifdef CONFIG_64BIT
578	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
579	 * Unroll the loop by hand and arrange insn appropriately.
580	 * GCC probably can do this just as well.
581	 */
582
583	ldd		0(%r29), %r19
584	ldi		(PAGE_SIZE / 128), %r1
585
5861:	ldd		8(%r29), %r20
587
588	ldd		16(%r29), %r21
589	ldd		24(%r29), %r22
590	std		%r19, 0(%r28)
591	std		%r20, 8(%r28)
592
593	ldd		32(%r29), %r19
594	ldd		40(%r29), %r20
595	std		%r21, 16(%r28)
596	std		%r22, 24(%r28)
597
598	ldd		48(%r29), %r21
599	ldd		56(%r29), %r22
600	std		%r19, 32(%r28)
601	std		%r20, 40(%r28)
602
603	ldd		64(%r29), %r19
604	ldd		72(%r29), %r20
605	std		%r21, 48(%r28)
606	std		%r22, 56(%r28)
607
608	ldd		80(%r29), %r21
609	ldd		88(%r29), %r22
610	std		%r19, 64(%r28)
611	std		%r20, 72(%r28)
612
613	ldd		 96(%r29), %r19
614	ldd		104(%r29), %r20
615	std		%r21, 80(%r28)
616	std		%r22, 88(%r28)
617
618	ldd		112(%r29), %r21
619	ldd		120(%r29), %r22
620	std		%r19, 96(%r28)
621	std		%r20, 104(%r28)
622
623	ldo		128(%r29), %r29
624	std		%r21, 112(%r28)
625	std		%r22, 120(%r28)
626	ldo		128(%r28), %r28
627
628	/* conditional branches nullify on forward taken branch, and on
629	 * non-taken backward branch. Note that .+4 is a backwards branch.
630	 * The ldd should only get executed if the branch is taken.
631	 */
632	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
633	ldd		0(%r29), %r19		/* start next loads */
634
635#else
636	ldi		(PAGE_SIZE / 64), %r1
637
638	/*
639	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
640	 * bundles (very restricted rules for bundling). It probably
641	 * does OK on PCXU and better, but we could do better with
642	 * ldd/std instructions. Note that until (if) we start saving
643	 * the full 64 bit register values on interrupt, we can't
644	 * use ldd/std on a 32 bit kernel.
645	 */
646
6471:	ldw		0(%r29), %r19
648	ldw		4(%r29), %r20
649	ldw		8(%r29), %r21
650	ldw		12(%r29), %r22
651	stw		%r19, 0(%r28)
652	stw		%r20, 4(%r28)
653	stw		%r21, 8(%r28)
654	stw		%r22, 12(%r28)
655	ldw		16(%r29), %r19
656	ldw		20(%r29), %r20
657	ldw		24(%r29), %r21
658	ldw		28(%r29), %r22
659	stw		%r19, 16(%r28)
660	stw		%r20, 20(%r28)
661	stw		%r21, 24(%r28)
662	stw		%r22, 28(%r28)
663	ldw		32(%r29), %r19
664	ldw		36(%r29), %r20
665	ldw		40(%r29), %r21
666	ldw		44(%r29), %r22
667	stw		%r19, 32(%r28)
668	stw		%r20, 36(%r28)
669	stw		%r21, 40(%r28)
670	stw		%r22, 44(%r28)
671	ldw		48(%r29), %r19
672	ldw		52(%r29), %r20
673	ldw		56(%r29), %r21
674	ldw		60(%r29), %r22
675	stw		%r19, 48(%r28)
676	stw		%r20, 52(%r28)
677	stw		%r21, 56(%r28)
678	stw		%r22, 60(%r28)
679	ldo		64(%r28), %r28
680
681	addib,COND(>)		-1, %r1,1b
682	ldo		64(%r29), %r29
683#endif
684
685	bv		%r0(%r2)
686	nop
687ENDPROC_CFI(copy_user_page_asm)
688
689ENTRY_CFI(clear_user_page_asm)
690	tophys_r1	%r26
691
692	ldil		L%(TMPALIAS_MAP_START), %r28
693#ifdef CONFIG_64BIT
694#if (TMPALIAS_MAP_START >= 0x80000000)
695	depdi		0, 31,32, %r28		/* clear any sign extension */
696#endif
697	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
698	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
699	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
700#else
701	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
702	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
703	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
704#endif
705
706	/* Purge any old translation */
707
708#ifdef CONFIG_PA20
709	pdtlb,l		%r0(%r28)
710#else
7110:	pdtlb		%r0(%r28)
712	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
713#endif
714
715#ifdef CONFIG_64BIT
716	ldi		(PAGE_SIZE / 128), %r1
717
718	/* PREFETCH (Write) has not (yet) been proven to help here */
719	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
720
7211:	std		%r0, 0(%r28)
722	std		%r0, 8(%r28)
723	std		%r0, 16(%r28)
724	std		%r0, 24(%r28)
725	std		%r0, 32(%r28)
726	std		%r0, 40(%r28)
727	std		%r0, 48(%r28)
728	std		%r0, 56(%r28)
729	std		%r0, 64(%r28)
730	std		%r0, 72(%r28)
731	std		%r0, 80(%r28)
732	std		%r0, 88(%r28)
733	std		%r0, 96(%r28)
734	std		%r0, 104(%r28)
735	std		%r0, 112(%r28)
736	std		%r0, 120(%r28)
737	addib,COND(>)		-1, %r1, 1b
738	ldo		128(%r28), %r28
739
740#else	/* ! CONFIG_64BIT */
741	ldi		(PAGE_SIZE / 64), %r1
742
7431:	stw		%r0, 0(%r28)
744	stw		%r0, 4(%r28)
745	stw		%r0, 8(%r28)
746	stw		%r0, 12(%r28)
747	stw		%r0, 16(%r28)
748	stw		%r0, 20(%r28)
749	stw		%r0, 24(%r28)
750	stw		%r0, 28(%r28)
751	stw		%r0, 32(%r28)
752	stw		%r0, 36(%r28)
753	stw		%r0, 40(%r28)
754	stw		%r0, 44(%r28)
755	stw		%r0, 48(%r28)
756	stw		%r0, 52(%r28)
757	stw		%r0, 56(%r28)
758	stw		%r0, 60(%r28)
759	addib,COND(>)		-1, %r1, 1b
760	ldo		64(%r28), %r28
761#endif	/* CONFIG_64BIT */
762
763	bv		%r0(%r2)
764	nop
765ENDPROC_CFI(clear_user_page_asm)
766
767ENTRY_CFI(flush_dcache_page_asm)
768	ldil		L%(TMPALIAS_MAP_START), %r28
769#ifdef CONFIG_64BIT
770#if (TMPALIAS_MAP_START >= 0x80000000)
771	depdi		0, 31,32, %r28		/* clear any sign extension */
772#endif
773	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
774	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
775	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
776#else
777	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
778	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
779	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
780#endif
781
782	/* Purge any old translation */
783
784#ifdef CONFIG_PA20
785	pdtlb,l		%r0(%r28)
786#else
7870:	pdtlb		%r0(%r28)
788	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
789#endif
790
79188:	ldil		L%dcache_stride, %r1
792	ldw		R%dcache_stride(%r1), r31
793
794#ifdef CONFIG_64BIT
795	depdi,z		1, 63-PAGE_SHIFT,1, %r25
796#else
797	depwi,z		1, 31-PAGE_SHIFT,1, %r25
798#endif
799	add		%r28, %r25, %r25
800	sub		%r25, r31, %r25
801
8021:	fdc,m		r31(%r28)
803	fdc,m		r31(%r28)
804	fdc,m		r31(%r28)
805	fdc,m		r31(%r28)
806	fdc,m		r31(%r28)
807	fdc,m		r31(%r28)
808	fdc,m		r31(%r28)
809	fdc,m		r31(%r28)
810	fdc,m		r31(%r28)
811	fdc,m		r31(%r28)
812	fdc,m		r31(%r28)
813	fdc,m		r31(%r28)
814	fdc,m		r31(%r28)
815	fdc,m		r31(%r28)
816	fdc,m		r31(%r28)
817	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
818	fdc,m		r31(%r28)
819
82089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
821	sync
822	bv		%r0(%r2)
823	nop
824ENDPROC_CFI(flush_dcache_page_asm)
825
826ENTRY_CFI(purge_dcache_page_asm)
827	ldil		L%(TMPALIAS_MAP_START), %r28
828#ifdef CONFIG_64BIT
829#if (TMPALIAS_MAP_START >= 0x80000000)
830	depdi		0, 31,32, %r28		/* clear any sign extension */
831#endif
832	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
833	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
834	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
835#else
836	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
837	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
838	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
839#endif
840
841	/* Purge any old translation */
842
843#ifdef CONFIG_PA20
844	pdtlb,l		%r0(%r28)
845#else
8460:	pdtlb		%r0(%r28)
847	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
848#endif
849
85088:	ldil		L%dcache_stride, %r1
851	ldw		R%dcache_stride(%r1), r31
852
853#ifdef CONFIG_64BIT
854	depdi,z		1, 63-PAGE_SHIFT,1, %r25
855#else
856	depwi,z		1, 31-PAGE_SHIFT,1, %r25
857#endif
858	add		%r28, %r25, %r25
859	sub		%r25, r31, %r25
860
8611:      pdc,m		r31(%r28)
862	pdc,m		r31(%r28)
863	pdc,m		r31(%r28)
864	pdc,m		r31(%r28)
865	pdc,m		r31(%r28)
866	pdc,m		r31(%r28)
867	pdc,m		r31(%r28)
868	pdc,m		r31(%r28)
869	pdc,m		r31(%r28)
870	pdc,m		r31(%r28)
871	pdc,m		r31(%r28)
872	pdc,m		r31(%r28)
873	pdc,m		r31(%r28)
874	pdc,m		r31(%r28)
875	pdc,m		r31(%r28)
876	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
877	pdc,m		r31(%r28)
878
87989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
880	sync
881	bv		%r0(%r2)
882	nop
883ENDPROC_CFI(purge_dcache_page_asm)
884
885ENTRY_CFI(flush_icache_page_asm)
886	ldil		L%(TMPALIAS_MAP_START), %r28
887#ifdef CONFIG_64BIT
888#if (TMPALIAS_MAP_START >= 0x80000000)
889	depdi		0, 31,32, %r28		/* clear any sign extension */
890#endif
891	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
892	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
893	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
894#else
895	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
896	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
897	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
898#endif
899
900	/* Purge any old translation.  Note that the FIC instruction
901	 * may use either the instruction or data TLB.  Given that we
902	 * have a flat address space, it's not clear which TLB will be
903	 * used.  So, we purge both entries.  */
904
905#ifdef CONFIG_PA20
906	pdtlb,l		%r0(%r28)
9071:	pitlb,l         %r0(%sr4,%r28)
908	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
909#else
9100:	pdtlb		%r0(%r28)
9111:	pitlb           %r0(%sr4,%r28)
912	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
913	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
914	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
915#endif
916
91788:	ldil		L%icache_stride, %r1
918	ldw		R%icache_stride(%r1), %r31
919
920#ifdef CONFIG_64BIT
921	depdi,z		1, 63-PAGE_SHIFT,1, %r25
922#else
923	depwi,z		1, 31-PAGE_SHIFT,1, %r25
924#endif
925	add		%r28, %r25, %r25
926	sub		%r25, %r31, %r25
927
928	/* fic only has the type 26 form on PA1.1, requiring an
929	 * explicit space specification, so use %sr4 */
9301:      fic,m		%r31(%sr4,%r28)
931	fic,m		%r31(%sr4,%r28)
932	fic,m		%r31(%sr4,%r28)
933	fic,m		%r31(%sr4,%r28)
934	fic,m		%r31(%sr4,%r28)
935	fic,m		%r31(%sr4,%r28)
936	fic,m		%r31(%sr4,%r28)
937	fic,m		%r31(%sr4,%r28)
938	fic,m		%r31(%sr4,%r28)
939	fic,m		%r31(%sr4,%r28)
940	fic,m		%r31(%sr4,%r28)
941	fic,m		%r31(%sr4,%r28)
942	fic,m		%r31(%sr4,%r28)
943	fic,m		%r31(%sr4,%r28)
944	fic,m		%r31(%sr4,%r28)
945	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
946	fic,m		%r31(%sr4,%r28)
947
94889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
949	sync
950	bv		%r0(%r2)
951	nop
952ENDPROC_CFI(flush_icache_page_asm)
953
954ENTRY_CFI(flush_kernel_dcache_page_asm)
95588:	ldil		L%dcache_stride, %r1
956	ldw		R%dcache_stride(%r1), %r23
957
958#ifdef CONFIG_64BIT
959	depdi,z		1, 63-PAGE_SHIFT,1, %r25
960#else
961	depwi,z		1, 31-PAGE_SHIFT,1, %r25
962#endif
963	add		%r26, %r25, %r25
964	sub		%r25, %r23, %r25
965
9661:      fdc,m		%r23(%r26)
967	fdc,m		%r23(%r26)
968	fdc,m		%r23(%r26)
969	fdc,m		%r23(%r26)
970	fdc,m		%r23(%r26)
971	fdc,m		%r23(%r26)
972	fdc,m		%r23(%r26)
973	fdc,m		%r23(%r26)
974	fdc,m		%r23(%r26)
975	fdc,m		%r23(%r26)
976	fdc,m		%r23(%r26)
977	fdc,m		%r23(%r26)
978	fdc,m		%r23(%r26)
979	fdc,m		%r23(%r26)
980	fdc,m		%r23(%r26)
981	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
982	fdc,m		%r23(%r26)
983
98489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
985	sync
986	bv		%r0(%r2)
987	nop
988ENDPROC_CFI(flush_kernel_dcache_page_asm)
989
990ENTRY_CFI(purge_kernel_dcache_page_asm)
99188:	ldil		L%dcache_stride, %r1
992	ldw		R%dcache_stride(%r1), %r23
993
994#ifdef CONFIG_64BIT
995	depdi,z		1, 63-PAGE_SHIFT,1, %r25
996#else
997	depwi,z		1, 31-PAGE_SHIFT,1, %r25
998#endif
999	add		%r26, %r25, %r25
1000	sub		%r25, %r23, %r25
1001
10021:      pdc,m		%r23(%r26)
1003	pdc,m		%r23(%r26)
1004	pdc,m		%r23(%r26)
1005	pdc,m		%r23(%r26)
1006	pdc,m		%r23(%r26)
1007	pdc,m		%r23(%r26)
1008	pdc,m		%r23(%r26)
1009	pdc,m		%r23(%r26)
1010	pdc,m		%r23(%r26)
1011	pdc,m		%r23(%r26)
1012	pdc,m		%r23(%r26)
1013	pdc,m		%r23(%r26)
1014	pdc,m		%r23(%r26)
1015	pdc,m		%r23(%r26)
1016	pdc,m		%r23(%r26)
1017	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1018	pdc,m		%r23(%r26)
1019
102089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1021	sync
1022	bv		%r0(%r2)
1023	nop
1024ENDPROC_CFI(purge_kernel_dcache_page_asm)
1025
1026ENTRY_CFI(flush_user_dcache_range_asm)
102788:	ldil		L%dcache_stride, %r1
1028	ldw		R%dcache_stride(%r1), %r23
1029	ldo		-1(%r23), %r21
1030	ANDCM		%r26, %r21, %r26
1031
1032#ifdef CONFIG_64BIT
1033	depd,z		%r23, 59, 60, %r21
1034#else
1035	depw,z		%r23, 27, 28, %r21
1036#endif
1037	add		%r26, %r21, %r22
1038	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10391:	add		%r22, %r21, %r22
1040	fdc,m		%r23(%sr3, %r26)
1041	fdc,m		%r23(%sr3, %r26)
1042	fdc,m		%r23(%sr3, %r26)
1043	fdc,m		%r23(%sr3, %r26)
1044	fdc,m		%r23(%sr3, %r26)
1045	fdc,m		%r23(%sr3, %r26)
1046	fdc,m		%r23(%sr3, %r26)
1047	fdc,m		%r23(%sr3, %r26)
1048	fdc,m		%r23(%sr3, %r26)
1049	fdc,m		%r23(%sr3, %r26)
1050	fdc,m		%r23(%sr3, %r26)
1051	fdc,m		%r23(%sr3, %r26)
1052	fdc,m		%r23(%sr3, %r26)
1053	fdc,m		%r23(%sr3, %r26)
1054	fdc,m		%r23(%sr3, %r26)
1055	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1056	fdc,m		%r23(%sr3, %r26)
1057
10582:	cmpb,COND(>>),n	%r25, %r26, 2b
1059	fdc,m		%r23(%sr3, %r26)
1060
106189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1062	sync
1063	bv		%r0(%r2)
1064	nop
1065ENDPROC_CFI(flush_user_dcache_range_asm)
1066
1067ENTRY_CFI(flush_kernel_dcache_range_asm)
106888:	ldil		L%dcache_stride, %r1
1069	ldw		R%dcache_stride(%r1), %r23
1070	ldo		-1(%r23), %r21
1071	ANDCM		%r26, %r21, %r26
1072
1073#ifdef CONFIG_64BIT
1074	depd,z		%r23, 59, 60, %r21
1075#else
1076	depw,z		%r23, 27, 28, %r21
1077#endif
1078	add		%r26, %r21, %r22
1079	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10801:	add		%r22, %r21, %r22
1081	fdc,m		%r23(%r26)
1082	fdc,m		%r23(%r26)
1083	fdc,m		%r23(%r26)
1084	fdc,m		%r23(%r26)
1085	fdc,m		%r23(%r26)
1086	fdc,m		%r23(%r26)
1087	fdc,m		%r23(%r26)
1088	fdc,m		%r23(%r26)
1089	fdc,m		%r23(%r26)
1090	fdc,m		%r23(%r26)
1091	fdc,m		%r23(%r26)
1092	fdc,m		%r23(%r26)
1093	fdc,m		%r23(%r26)
1094	fdc,m		%r23(%r26)
1095	fdc,m		%r23(%r26)
1096	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1097	fdc,m		%r23(%r26)
1098
10992:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1100	fdc,m		%r23(%r26)
1101
1102	sync
110389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1104	syncdma
1105	bv		%r0(%r2)
1106	nop
1107ENDPROC_CFI(flush_kernel_dcache_range_asm)
1108
1109ENTRY_CFI(purge_kernel_dcache_range_asm)
111088:	ldil		L%dcache_stride, %r1
1111	ldw		R%dcache_stride(%r1), %r23
1112	ldo		-1(%r23), %r21
1113	ANDCM		%r26, %r21, %r26
1114
1115#ifdef CONFIG_64BIT
1116	depd,z		%r23, 59, 60, %r21
1117#else
1118	depw,z		%r23, 27, 28, %r21
1119#endif
1120	add		%r26, %r21, %r22
1121	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11221:	add		%r22, %r21, %r22
1123	pdc,m		%r23(%r26)
1124	pdc,m		%r23(%r26)
1125	pdc,m		%r23(%r26)
1126	pdc,m		%r23(%r26)
1127	pdc,m		%r23(%r26)
1128	pdc,m		%r23(%r26)
1129	pdc,m		%r23(%r26)
1130	pdc,m		%r23(%r26)
1131	pdc,m		%r23(%r26)
1132	pdc,m		%r23(%r26)
1133	pdc,m		%r23(%r26)
1134	pdc,m		%r23(%r26)
1135	pdc,m		%r23(%r26)
1136	pdc,m		%r23(%r26)
1137	pdc,m		%r23(%r26)
1138	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1139	pdc,m		%r23(%r26)
1140
11412:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1142	pdc,m		%r23(%r26)
1143
1144	sync
114589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1146	syncdma
1147	bv		%r0(%r2)
1148	nop
1149ENDPROC_CFI(purge_kernel_dcache_range_asm)
1150
1151ENTRY_CFI(flush_user_icache_range_asm)
115288:	ldil		L%icache_stride, %r1
1153	ldw		R%icache_stride(%r1), %r23
1154	ldo		-1(%r23), %r21
1155	ANDCM		%r26, %r21, %r26
1156
1157#ifdef CONFIG_64BIT
1158	depd,z		%r23, 59, 60, %r21
1159#else
1160	depw,z		%r23, 27, 28, %r21
1161#endif
1162	add		%r26, %r21, %r22
1163	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11641:	add		%r22, %r21, %r22
1165	fic,m		%r23(%sr3, %r26)
1166	fic,m		%r23(%sr3, %r26)
1167	fic,m		%r23(%sr3, %r26)
1168	fic,m		%r23(%sr3, %r26)
1169	fic,m		%r23(%sr3, %r26)
1170	fic,m		%r23(%sr3, %r26)
1171	fic,m		%r23(%sr3, %r26)
1172	fic,m		%r23(%sr3, %r26)
1173	fic,m		%r23(%sr3, %r26)
1174	fic,m		%r23(%sr3, %r26)
1175	fic,m		%r23(%sr3, %r26)
1176	fic,m		%r23(%sr3, %r26)
1177	fic,m		%r23(%sr3, %r26)
1178	fic,m		%r23(%sr3, %r26)
1179	fic,m		%r23(%sr3, %r26)
1180	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1181	fic,m		%r23(%sr3, %r26)
1182
11832:	cmpb,COND(>>),n	%r25, %r26, 2b
1184	fic,m		%r23(%sr3, %r26)
1185
118689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1187	sync
1188	bv		%r0(%r2)
1189	nop
1190ENDPROC_CFI(flush_user_icache_range_asm)
1191
1192ENTRY_CFI(flush_kernel_icache_page)
119388:	ldil		L%icache_stride, %r1
1194	ldw		R%icache_stride(%r1), %r23
1195
1196#ifdef CONFIG_64BIT
1197	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1198#else
1199	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1200#endif
1201	add		%r26, %r25, %r25
1202	sub		%r25, %r23, %r25
1203
1204
12051:      fic,m		%r23(%sr4, %r26)
1206	fic,m		%r23(%sr4, %r26)
1207	fic,m		%r23(%sr4, %r26)
1208	fic,m		%r23(%sr4, %r26)
1209	fic,m		%r23(%sr4, %r26)
1210	fic,m		%r23(%sr4, %r26)
1211	fic,m		%r23(%sr4, %r26)
1212	fic,m		%r23(%sr4, %r26)
1213	fic,m		%r23(%sr4, %r26)
1214	fic,m		%r23(%sr4, %r26)
1215	fic,m		%r23(%sr4, %r26)
1216	fic,m		%r23(%sr4, %r26)
1217	fic,m		%r23(%sr4, %r26)
1218	fic,m		%r23(%sr4, %r26)
1219	fic,m		%r23(%sr4, %r26)
1220	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1221	fic,m		%r23(%sr4, %r26)
1222
122389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1224	sync
1225	bv		%r0(%r2)
1226	nop
1227ENDPROC_CFI(flush_kernel_icache_page)
1228
1229ENTRY_CFI(flush_kernel_icache_range_asm)
123088:	ldil		L%icache_stride, %r1
1231	ldw		R%icache_stride(%r1), %r23
1232	ldo		-1(%r23), %r21
1233	ANDCM		%r26, %r21, %r26
1234
1235#ifdef CONFIG_64BIT
1236	depd,z		%r23, 59, 60, %r21
1237#else
1238	depw,z		%r23, 27, 28, %r21
1239#endif
1240	add		%r26, %r21, %r22
1241	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12421:	add		%r22, %r21, %r22
1243	fic,m		%r23(%sr4, %r26)
1244	fic,m		%r23(%sr4, %r26)
1245	fic,m		%r23(%sr4, %r26)
1246	fic,m		%r23(%sr4, %r26)
1247	fic,m		%r23(%sr4, %r26)
1248	fic,m		%r23(%sr4, %r26)
1249	fic,m		%r23(%sr4, %r26)
1250	fic,m		%r23(%sr4, %r26)
1251	fic,m		%r23(%sr4, %r26)
1252	fic,m		%r23(%sr4, %r26)
1253	fic,m		%r23(%sr4, %r26)
1254	fic,m		%r23(%sr4, %r26)
1255	fic,m		%r23(%sr4, %r26)
1256	fic,m		%r23(%sr4, %r26)
1257	fic,m		%r23(%sr4, %r26)
1258	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1259	fic,m		%r23(%sr4, %r26)
1260
12612:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1262	fic,m		%r23(%sr4, %r26)
1263
126489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1265	sync
1266	bv		%r0(%r2)
1267	nop
1268ENDPROC_CFI(flush_kernel_icache_range_asm)
1269
1270	__INIT
1271
1272	/* align should cover use of rfi in disable_sr_hashing_asm and
1273	 * srdis_done.
1274	 */
1275	.align	256
1276ENTRY_CFI(disable_sr_hashing_asm)
1277	/*
1278	 * Switch to real mode
1279	 */
1280	/* pcxt_ssm_bug */
1281	rsm		PSW_SM_I, %r0
1282	load32		PA(1f), %r1
1283	nop
1284	nop
1285	nop
1286	nop
1287	nop
1288
1289	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1290	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1291	mtctl		%r0, %cr17		/* Clear IIASQ head */
1292	mtctl		%r1, %cr18		/* IIAOQ head */
1293	ldo		4(%r1), %r1
1294	mtctl		%r1, %cr18		/* IIAOQ tail */
1295	load32		REAL_MODE_PSW, %r1
1296	mtctl		%r1, %ipsw
1297	rfi
1298	nop
1299
13001:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1301	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1302	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1303	b,n		srdis_done
1304
1305srdis_pcxs:
1306
1307	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1308
1309	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1310	.word		0x141c1a00		/* must issue twice */
1311	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1312	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1313	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1314	.word		0x141c1600		/* must issue twice */
1315	b,n		srdis_done
1316
1317srdis_pcxl:
1318
1319	/* Disable Space Register Hashing for PCXL */
1320
1321	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1322	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1323	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1324	b,n		srdis_done
1325
1326srdis_pa20:
1327
1328	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1329
1330	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1331	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1332	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1333
1334
1335srdis_done:
1336	/* Switch back to virtual mode */
1337	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1338	load32 	   	2f, %r1
1339	nop
1340	nop
1341	nop
1342	nop
1343	nop
1344
1345	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1346	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1347	mtctl		%r0, %cr17		/* Clear IIASQ head */
1348	mtctl		%r1, %cr18		/* IIAOQ head */
1349	ldo		4(%r1), %r1
1350	mtctl		%r1, %cr18		/* IIAOQ tail */
1351	load32		KERNEL_PSW, %r1
1352	mtctl		%r1, %ipsw
1353	rfi
1354	nop
1355
13562:      bv		%r0(%r2)
1357	nop
1358ENDPROC_CFI(disable_sr_hashing_asm)
1359
1360	.end
1361